From 8e50e0e74d40cdbf39bf98df5d49b02267b4dc69 Mon Sep 17 00:00:00 2001 From: maniospas Date: Fri, 23 Aug 2024 01:42:00 +0300 Subject: [PATCH] docs 85% complete --- .../nodeClassification/Scripting.java | 17 +- JGNN/src/examples/tutorial/Quickstart.java | 2 + .../java/mklab/JGNN/adhoc/ModelBuilder.java | 58 +- .../mklab/JGNN/adhoc/parsers/Neuralang.java | 25 +- .../java/mklab/JGNN/nn/ModelTraining.java | 6 +- docs/index.html | 1317 +++++++---------- 6 files changed, 641 insertions(+), 784 deletions(-) diff --git a/JGNN/src/examples/nodeClassification/Scripting.java b/JGNN/src/examples/nodeClassification/Scripting.java index 7ac4849..2ec1d00 100644 --- a/JGNN/src/examples/nodeClassification/Scripting.java +++ b/JGNN/src/examples/nodeClassification/Scripting.java @@ -14,6 +14,7 @@ import mklab.JGNN.core.empy.EmptyTensor; import mklab.JGNN.nn.initializers.XavierNormal; import mklab.JGNN.nn.loss.CategoricalCrossEntropy; +import mklab.JGNN.nn.loss.report.VerboseLoss; import mklab.JGNN.nn.optimizers.Adam; /** @@ -31,18 +32,12 @@ fn classify(nodes, h, epochs: !3000, patience: !100, lr: !0.01) { return softmax(h[nodes], dim: "row"); } fn gcnlayer(A, h, hidden: 16, reg: 0.005) { - h = A@h@matrix(?, hidden, reg) + vector(hidden); - return h; + return A@h@matrix(?, hidden, reg) + vector(hidden); } fn gcn(A, h, classes: extern) { h = gcnlayer(A, h); h = dropout(relu(h), 0.5); - h = gcnlayer(A, h, hidden: classes); - return h; - } - fn ngcn(A, h, nodes) { - h = classify(nodes, gcn(A,h)); - return h; + return gcnlayer(A, h, hidden: classes); } """; @@ -55,14 +50,14 @@ fn ngcn(A, h, nodes) { .var("nodes") .config("classes", numClasses) .config("hidden", numClasses+2) - .out("ngcn(A,h, nodes)") + .out("classify(nodes, gcn(A,h))") .autosize(new EmptyTensor(numSamples)); + System.out.println("Preferred learning rate: "+modelBuilder.getConfig("lr")); ModelTraining trainer = new ModelTraining() .configFrom(modelBuilder) - .setVerbose(true) .setLoss(new CategoricalCrossEntropy()) - .setValidationLoss(new CategoricalCrossEntropy()); + .setValidationLoss(new VerboseLoss(new CategoricalCrossEntropy())); long tic = System.currentTimeMillis(); Slice nodes = dataset.samples().getSlice().shuffle(100); diff --git a/JGNN/src/examples/tutorial/Quickstart.java b/JGNN/src/examples/tutorial/Quickstart.java index 9a0bff3..7a236b5 100644 --- a/JGNN/src/examples/tutorial/Quickstart.java +++ b/JGNN/src/examples/tutorial/Quickstart.java @@ -42,6 +42,8 @@ public static void main(String[] args) throws Exception { .classify() .autosize(new EmptyTensor(numSamples)); + System.out.println(modelBuilder.getConfig("lr")); + ModelTraining trainer = new ModelTraining() .setOptimizer(new Adam(0.01)) .setEpochs(3000) diff --git a/JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java b/JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java index 4fea43e..ebbba9a 100644 --- a/JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java +++ b/JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java @@ -94,6 +94,13 @@ public ModelBuilder(Model model) { public Model getModel() { return model; } + + /** + * Serializes the model builder instance into a Path, such as + * Paths.get("example.jgnn"). + * @param path A serialized path. + * @return This builder's instance. + */ public ModelBuilder save(Path path) { try(BufferedWriter writer = Files.newBufferedWriter(path)){ writer.write(this.getClass().getCanonicalName()+"\n"); @@ -140,6 +147,13 @@ public ModelBuilder save(Path path) { return this; } + /** + * Loads a ModelBuilder instance from the provided path, such as Paths.get("example.jgnn"). + * The instance may have been serialized with any class that extends the model builder. + * + * @param path The provided path. + * @return The loaded ModelBuilder instance. + */ public static ModelBuilder load(Path path) { ModelBuilder builder; try(BufferedReader reader = Files.newBufferedReader(path)){ @@ -350,14 +364,16 @@ public ModelBuilder param(String name, double regularization, Tensor value) { /** * Declares a configuration hyperparameter, which can be used to declare * matrix and vector parameters during {@link #operation(String)} expressions. - * For in-expression use of hyperparameters, delcare them with {@link #constant(String, double)}. + * For in-expression use of hyperparameters, declare them with {@link #constant(String, double)}. + * In Neuralang terms, this is implements the broadcasting operation. * @param name The name of the configuration hyperparameter. * @param value The value to be assigned to the hyperparameter. - * Typically, provide a long number. + * This may also be a long number. * @return The builder's instance. * @see #operation(String) * @see #param(String, Tensor) * @see #param(String, double, Tensor) + * @see #config(String, String) */ public ModelBuilder config(String name, double value) { if(name.equals("?")) @@ -366,19 +382,41 @@ public ModelBuilder config(String name, double value) { return this; } - + /** + * Applies {@link #config(String, double)} where the set value + * is obtained from another configuration hyperaparameter. + * @param name The name of the configuration hyperparameter to set. + * @param value The name of the configuration hyperparameter whose value should be copied. + * @return The builder's instance. + * @see #config(String, double) + */ public ModelBuilder config(String name, String value) { - Double val = configurations.get(value); + return config(name, getConfig(value)); + } + + /** + * Retrieves a configuration hyperparameter's value. + * @param name The configuration's name. + * @return The retrieved value; + * @throws RuntimeException If a no configuration with the given name was found. + * @see #getConfigOrDefault(String, double) + */ + public double getConfig(String name) { + Double val = configurations.get(name); if(val==null) - throw new RuntimeException("No configuration "+value+" found"); + throw new RuntimeException("No configuration "+name+" found"); this.configurations.put(name, val); - return this; - } - - public int getConfigOrDefault(String name, int defaultValue) { - return (int)(double)configurations.getOrDefault(name, (double) defaultValue); + return val; } + /** + * Retrieves a configuration hyperparameter's value. If no such configuration + * exists, a default value is returned instead. + * @param name The configuration's name. + * @param defaultValue The default to be retrieved if no such configuration was found. + * @return The retrieved value; + * @see #getConfig(String) + */ public double getConfigOrDefault(String name, double defaultValue) { return configurations.getOrDefault(name, defaultValue); } diff --git a/JGNN/src/main/java/mklab/JGNN/adhoc/parsers/Neuralang.java b/JGNN/src/main/java/mklab/JGNN/adhoc/parsers/Neuralang.java index ef7c4fa..27b3ca4 100644 --- a/JGNN/src/main/java/mklab/JGNN/adhoc/parsers/Neuralang.java +++ b/JGNN/src/main/java/mklab/JGNN/adhoc/parsers/Neuralang.java @@ -3,11 +3,17 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; - import mklab.JGNN.adhoc.ModelBuilder; import mklab.JGNN.core.Tensor; +/** + * Extends the base {@link ModelBuilder} with the full capabilities of the Neuralang + * scripting language. + * + * @author Emmanouil Krasanakis + * @see #parse(String) + * @see #parse(Path) + */ public class Neuralang extends ModelBuilder { public Neuralang() { } @@ -15,6 +21,15 @@ public Neuralang config(String name, double value) { super.config(name, value); return this; } + /** + * Parses a Neuralang source code file. + * Reads a file like Paths.get("models.nn") + * from disk with {@link Files#readAllLines(Path)}, and parses + * the loaded String. + * @param path The source code file. + * @return The Neuralang builder's instance. + * @see #parse(String) + */ public Neuralang parse(Path path) { try { parse(String.join("\n", Files.readAllLines(path))); @@ -24,6 +39,12 @@ public Neuralang parse(Path path) { return this; } + /** + * Parses Neuralang source code by handling function declarations in addition to + * other expressions. + * @param text The source code to parse. + * @return The Neuralang builder's instance. + */ public Neuralang parse(String text) { int depth = 0; String progress = ""; diff --git a/JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java b/JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java index 6a6ea91..36cb60d 100644 --- a/JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java +++ b/JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java @@ -224,9 +224,9 @@ public void run() { } public ModelTraining configFrom(ModelBuilder modelBuilder) { setOptimizer(new Adam(modelBuilder.getConfigOrDefault("lr", 0.01))); - setEpochs(modelBuilder.getConfigOrDefault("epochs", epochs)); - numBatches = modelBuilder.getConfigOrDefault("batches", numBatches); - setPatience(modelBuilder.getConfigOrDefault("patience", patience)); + setEpochs((int)modelBuilder.getConfigOrDefault("epochs", epochs)); + numBatches = (int)modelBuilder.getConfigOrDefault("batches", numBatches); + setPatience((int)modelBuilder.getConfigOrDefault("patience", patience)); return this; } } diff --git a/docs/index.html b/docs/index.html index ccc9d02..e1e5d51 100644 --- a/docs/index.html +++ b/docs/index.html @@ -141,17 +141,17 @@ style="color: #777777;">3.1. ModelBuilder + - - - - - - + style="color: #777777;">3.4. Debugging + + + + @@ -166,9 +166,7 @@ - - - + @@ -176,37 +174,29 @@

JGNN

-

Graph Neural Networks (GNNs) are getting more and more popular, for example to - make predictions based on relational information, or to perform inference - on small datasets. JGNN provides cross-platform implementations of this machine - learning paradigm that do not require dedicated hardware or firmware. While - reading this guidebook, keep in mind that this is not a library for running - computationally intensive architectures; it has no GPU support and does not - plan to add any (unless such support becomes integrated in the Java virtual - machine). Instead, the goal is to provide highly portable solutions - that can run under smaller compute and available memory. So, while complex - architectures like gated attention networks with many layers and hidden dimensions - can be created using the libary, running them fastly may require compromises - in terms of the number of learned parameters or computational complexity. - The forte of JGNN lies is porting more lightweight counterparts in applications - grappling with limited resources.

+

Graph Neural Networks (GNNs) are getting more and more popular, for example to make predictions + based on relational information, or to perform inference on small datasets. JGNN is a library that + provides cross-platform implementations of this machine learning paradigm that do not require dedicated + hardware or firmware. The goal is to provide highly portable solutions that fit in + a few megabytes of memory. While reading this guidebook, keep in mind that this is not a library + for running computationally intensive architectures; it has no GPU support and does not plan to + add any (unless such support becomes integrated in the Java virtual machine). So, while complex + architectures like gated attention networks with many layers and hidden dimensions are supported, + running them fastly on graphs with many nodes may require compromises in the number of learned + parameters or computational complexity. The main advantage of JGNN is its support for settings + with limited resources.

-

This guidebook is organized into several sections that detail - the library's most practical capabilities. After this brief introduction and +

This guidebook is organized into six sections that focus on + practical capabilities. After this brief introduction and instructions for including JGNN in Java projects, section 2 - gives a taste of what using the library looks like, and introduces various concepts - that paint the full picture, with details being left for later. Then, + gives a taste of what using the library looks like, with details being left for later. Then, section 3 describes how the library implements - the builder patter to facilitate the construction of GNN models. This construction + the builder patter for constructing GNN models. Model construction includes symbolic expression parsing for machine learning operations, - drastically simplifying parts of the code. Symbolic parsing is - extended into the Neuralang scripting language for model - definition in section 4. This is parsed by a specific - model builders, and there are some hooks that synchronize the language with JGNN code. - Moving on, section 5 describes how raw data - can be created and manipulated, for example to construct model inputs and desired - outputs. Finally, section 6 describes - interfaces for automated training and testing, but also takes a deep dive + which drastically simplifies coding. Parsed expressions are + part of the Neuralang scripting language for model + definition. Finally, section 4 describes + interfaces for training on automatically generated or customized data and testing. It also takes a deep dive into obtaining raw model predictions, and using them in custom training and evaluation schemes.

@@ -219,15 +209,15 @@

JGNN

1. Setup

-

The simplest way to set up JGNN is to download it as JAR package from - the project's releases - and add it in a Java project's dependencies. However, those working with Maven - or Gradle can also add JGNN's latest nightly release as a dependency from the JitPack +

The simplest way to set up JGNN is to download it as a JAR package from + releases + and add it your Java project's dependencies. Those working with Maven + or Gradle can instead add JGNN's latest nightly release as a dependency from the JitPack repository. Follow the link below for full instructions.
download JGNN

- For instance, the fields in the snippet below may be added in a maven .pom file + For example, the fields in the snippet below may be added in a Maven pom.xml file to work with the latest nightly release.

<repositories>
 	<repository>
@@ -239,7 +229,7 @@ 

1. Setup

<dependency> <groupId>com.github.MKLab-ITI</groupId> <artifactId>JGNN</artifactId> - <version>v1.3.24-nightly</version> + <version>SNAPSHOT</version> </dependency> </dependencies>
@@ -248,11 +238,10 @@

1. Setup

2. Quickstart

Here we demonstrate usage of JGNN for node classification. This is an inductive learning - task where node labels are predicted given a graph's structure, node features, and a few known - labels in the graph. - Classifying graphs is also supported, but it is a harder task to explain and set up. - GNN architectures for the chosen node classification task are typically written - as message-passing mechanisms; these diffuse node representations across edges, where + task that predicts node labels given a graph's structure, node features, and a some already known + labels. Classifying graphs is also supported, although it is a harder task to explain and set up. + GNN architectures for node classification are typically written + as message passing mechanisms; they diffuse node representations across edges, where node neighbors pick up, aggregate (e.g., average), and transform incoming representations to update theirs. Alternatives that boast higher expressive power also exist and are supported, but simple architectures @@ -260,44 +249,46 @@

2. Quickstart

practical problems [Krasanakis et al., 2024]. Simple architectures also enjoy reduced resource consumption.

-

The demonstration starts by loading the Cora dataset from those shipped - with the library for out-of-the-box testing. The first time this dataset is - constructed, it automatically downloads some data and stores them in a local downloads/ +

Our demonstration starts by loading the Cora dataset from those shipped + with the library for out-of-the-box testing. The first time an instance of this dataset is created, + it downloads its raw data from a web resource and stores them in a local downloads/ folder. The data are then loaded into a sparse graph adjacency matrix, a dense node feature matrix, and a dense node label matrix. Sparse and dense representations are interchangeable in terms of operations, - but sparse matrices can store graphs with many nodes but relatively smaller degrees with greater memory efficiency. + with the main difference being that sparse matrices are much more efficient when they contain lots of zeros. In the loaded matrices, each row contains the corresponding node's - neighbors, features, or one-hot encoding of labels. We also apply the renormalization trick and symmetric normalization on - the dataset using in-place operations. The first of these makes GNN computations numerically stable by adding self-loops - to all nodes, whereas the second is required by the model we impelement next.

+ neighbors, features, or one-hot encoding of labels. We apply the renormalization trick and + symmetric normalization on the dataset's adjacency matrix using in-place operations for minimal memory footprint; + the first of the two makes GNN computations numerically stable by adding self-loops + to all nodes, while renormalization is required by spectral-based GNNs, such as + the model we implement next.

Dataset dataset = new Cora();
 dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
-

We now incrementally construct a trainable model using symbolic definitions resembling math - notation. Symbolic expressions are part of a scripting-like language, called Neuralang, - that is covered in section 4. However, for faster onboarding we stick to - the FastBuilder class for creating models. This class's constructor - creates two constants A and h0 from its two arguments, respectivel holding - the graph's adjacency matrix and node features. Other constants - and input variables can be set too, but more on this later. After instantiation, we use the - following model builder methods to constuct a model. Some of these methods parse symbolic expressions - to fastly declare machine learning components. +

We now incrementally create a trainable model using symbolic expressions that resemble math + notation. The expressions are part of a scripting language, called Neuralang, + that is covered in section 3.3. However, for faster onboarding, stick to + the FastBuilder class for creating models; this ommits some of + the language's features in favor of providing programmatic shortcuts for boilerplate code. Its constructor + accepts two arguments A and h0, respectivel holding + the graph's adjacency matrix and node features. These are internally set as constant symbols that + parse expressions can use. Other constants and input variables can be set too, + but more on this later. After instantiation, use some + model builder methods to declare a model's dataflow. Some of these methods parse the aforementioned expressions.

- JGNN promotes method chains, where the modelBuilder instance is returned by each of + JGNN promotes method chains, where the builder's instance is returned by each of its methods to access the next one. Below we use this programming pattern to implement the Graph Convolutional Network (GCN) architecture [Kipf and Welling, 2017]. Details on the symbolic parts of definitions are presented later but, for the time being, we point to the matrix and vector Neuralang functions. These inline declarations of learnable parameter for - given dimensions and regularization. The builder stores internally a constructed model, and the latter - can be retrieved through modelBuilder.getModel(). + given dimensions and regularization. Access the builder's created model via modelBuilder.getModel().

long numSamples = dataset.samples().getSlice().size();
@@ -314,33 +305,34 @@ 

2. Quickstart

.autosize(new EmptyTensor(numSamples));
-

Training epochs for the constructed model can be implemented +

Training epochs for the created model can be implemented manually, by passing inputs, obtaining outputs, computing losses, and triggering backpropagation on an optimizer. As these steps may be complicated, JGNN automates common training patterns with a ModelTraining class. Instances of this class accept a method chain notation to set their parameters, like the number of epochs, patience for early stopping, the employed optimizer, and loss functions. An example is presented below, where Adam optimization with learning rate 0.01 is performed, and a verbose - variation of a validation loss prints the progress progress. To run a full training process, - the defined strategy is passed to the model alongside input data, corresponding output data, as well + variation of a validation loss prints the progress. To run a full training process, + pass the defined strategy to a model alongside input data, corresponding output data, as well as training and validation slices.

-

Notice how, before training is conducted, a parameter initializer is applied on the model for cold - start (as opposed to a warm start that continues on the outcome of previous training). - Selecting an initilizer is not part of training strategies +

In the example, a parameter initializer is applied on the model before training is conducted. + This is a cold start scenario, as opposed to a warm start that continues training already + trained parameters. + Selecting an initializer is not part of training strategies to signify its model-dependent nature; dense layers should maintain the expected input variances in the output before the first epoch, and therefore the initializer depends on the type of activation functions. Moreover, the graph's adjacency matrix and node features are already declared as constants by the FastBuilder constructor, as node classification takes place on the same graph - with fully known node features. Instead, what is considered as inputs and outputs in this case - are the node identifiers, which in the classify method above are used to gather - the outputs of respective nodes, and corresponding labels. Labels that are not known, for example because - they refer to test data, still need to have some value, so as a convention if your are working - with your own data leave the one-hot label encoding of test nodes as zeroes. Doing so in this - example would not affect the outcome either. To recap, our full dataset consists of all node - identifiers and corresponding labels. The last two arguments of the train method + with fully known node features. Architecture anputs are the node identifiers, which in the + classify method above are used to gather + the predictions on respective nodes, and desired outputs are the corresponding labels from + the dataset. Labels that are not known still need to have some value; as a convention when working + with your own data, leave the one-hot label encoding of test nodes as zeroes. Doing so in our + present example would not affect the outcome either. + The last two training arguments of the train method then accept training and validation data slices. Slices are effectively lists of integer entries - pointing to rows of the datasets - find more later. + pointing to rows of inputs and outputs - find more later.

ModelTraining trainer = new ModelTraining()
 	.setOptimizer(new Adam(0.01))
@@ -360,7 +352,7 @@ 

2. Quickstart

nodes.range(0.6, 0.8) // validation slice );
-

Trained models and their generating builders can be saved and loaded. These next snippet demonstrates +

Trained models and their generating builders can be saved and loaded. The next snippet demonstrates how raw predictions can be made too. During this process, some matrix manipulation operations are employed to obtain transparent access to parts of input and output data of the dataset. @@ -384,51 +376,43 @@

2. Quickstart

3. GNN Builders

We already touched on the subject of model builders in the quickstart section, - where we saw one of them in action. There exist different kinds of - builders that offer kinds of convenience during model definition. - All of then support method chain notation. Currently implemented builders are: + where one of them was used to create a model. There exist different kinds of + builders that offer different conveniences. All of them support the method chain pattern.

    -
  • GNNBuilder - Parses simple Neuralang expressions. Prefer using FastBuilder, - which offers a superset of this one's functionalities.
  • +
  • GNNBuilder - Parses simple Neuralang expressions.
  • FastBuilder - Extends the GNNBuilder class with methods that inject - boilerplate code often used in node classification. Use this builder of your want to keep track + boilerplate code for the inputs, outputs, and layers of node classification tasks. + Prefer this builder of your want to keep track of the whole model definition in one place within Java code.
  • Neuralang - Extends the GNNBuilder class so that it can parse all aspects - of the Neuralang language, especially the expressions responsible for handling configuration. + of the Neuralang language, such as functional declarations of machine learning modules, + where parts of function signatures manage configuration hyperparameters. Use this builder to maintain model definitions in one place (e.g., packed in one String - variable, or in one read file) to avoid weaving symbolic expressions in Java code.
  • + variable, or in one file) and avoid weaving symbolic expressions in Java code.
- In this section we cover the first two builder classes, and leave integration with Neuralang - for section 4. In its place, we cover debugging mechanisms that + In this section we cover these three builder classes and summarize debugging mechanisms that check the integrity of constructed models, visualize their data flow, and monitor specific data at runtime.

3.1. ModelBuilder

-

This is the base model builder class that is extended by others. We describe it separately - because it offers a wide breadth of functionalities that other builders inherit. Before looking - at how to use it, we need to see what JGNN models look like under the hood. Models are effectively collections - of NNOperation instances, each of which is an operation with specified inputs and outputs of - JGNN's Tensor type. Tensors will be covered later; for now, it suffices to know that they are - numerical vectors, which are sometimes endowed with matrix dimensions. - Models can be manually written using Java code only. Below is an example for computing the expression - y=log(2*x+1) without any trainable parameters.

-

This definition is still readable for simple - expressions, but can quickly become cumbersome to read and maintain once actual architectures are created - hence the need for - model builders that parse simple symbolic expressions. This guidebook does not list available NNOperation - classes, as they are rarely used directly. For more information visit the respective modules - in JGNN's Javadoc, namely +

This is the base model builder class; it offers a wide breadth of functionalities that other builders extend. + Before looking at how to use it, though, we need to see what JGNN models look like under the hood. + Models are collections of NNOperation instances, each representing a numerical computation with + specified inputs and outputs of + JGNN's Tensor type. Tensors will be covered later; for now, it suffices to think of them as + numerical vectors, which are sometimes endowed with matrix dimensions. This guidebook does not list operation classes, as they are rarely used directly and can be found the Javadoc, namely nn.inputs, nn.activations, and nn.pooling. - After models are defined, run them to obtain outputs - the method Tensor Model.predict(Tensor...) - This takes as input one or more comma-separated tensors that match the declared model - inputs (in the same order) and computes. If inputs are dynamically created, + Create models in pure Java like below. The example computes the expression + y=log(2*x+1) without any trainable parameters. + After defining models, run them with the method Tensor Model.predict(Tensor...). + This takes as input one or more comma-separated tensors that match the model's + inputs (in the same order) and computes a list of output tensors. If inputs are dynamically created, an overloaded version of the same method supports an array list of input tensors Tensor Model.predict(ArrayList<Tensor>). - The snippet below also demonstrates one such prediction, where a numerical input value - was provided as a tensor of one element. + The snippet below includes a prediction for an input that consists of one tensor of one element.

Variable x = new Variable();
@@ -448,18 +432,33 @@ 

3.1. ModelBuilder

System.out.println(model.predict(Tensor.fromDouble(2)));
-

Let us now recreate the above code using the base ModelBuilder class. - After creating the builder, we use a method chain to first declare an inpute variable - with the .var(String) method, declare a symbolic expression with the - .operation(String) method, and finally add one of the - symbols as the model's output with the .out(String) method. - The first and last methods accect only one symbol, whereas the operation parses a full expression - that involves the operations described next. The expression is typically an assignment. There may be - multiple operations, parsed through either multiple method calls or separate with a semicolon ; - in the expression. - Constructed models may also have multiple inputs and outputs. All methods need to find - previously declared symbols, so for example .out("symbol") throws an exception - if no operation previously declared what this symbol is. +

Judging by the fact that several lines of code are needed to declare even simple expressions, + pure Java code for creating full models tends to be cumbersome to read and maintain - hence the need for + builders that construct the models from concise symbolic expressions. Let us recreate the above example + with the ModelBuilder class. + After instantiating the builder, use a method chain to declare an input variable + with the .var(String) method, parse an expression with the + .operation(String) method, and finally declare which symbol holds + outputs with the .out(String) method. + The first and last of these methods can be called multiple times + to declare several inputs and outputs. Inputs need to be only one symbol, but a whole expression + for evaluation can be declared in outputs. +

+

+ The operation parses String expressions that are typically structured + as assignments to symbols; the right-hand side of assignments accepts several operators and functions that + are listed in the next table. Models allow multiple operations too, which are parsed through either multiple + method calls or by being separated with a semicolon ; within larger String expressions. + All methods need to use previously declared symbols. For example, parsing .out("symbol") + throws an exception if no operation previously assigned to the symbol or declared it as an input. For logic + safety, symbols cannot be overwritten or set to updated values outside of Neuralang functions. + Finally, the base model builder + class supports a roundabout declaration of Neuralang functions with expressions like this snippet taken from the Quickstart + section: + .function("gcnlayer", "(A,h){return A@(h@matrix(?, hidden, reg))+vector(?);}"). + In this, the first method argument is the declared function symbol's name, and the second should necessarily have the arguments enclosed in + a parenthesis and the function's body enclosed in brackets. Learn more about Neuralang functions in + section 3.3.

ModelBuilder modelBuilder = new ModelBuilder()
@@ -469,41 +468,45 @@ 

3.1. ModelBuilder

System.out.println(model.predict(Tensor.fromDouble(2)));
-

Model definitions so far are too simple for practical machine learning needs. - Next, we show how to add training parameters that depend on hyperparameters, - as well as constants. Of these, trainagle parameters are constructed by - inlining the need to have them in parsed expressions with the matrix - and vector functions described in the next table. (There is also an equivalent Java - method .param(String, Tensor), but its usage is discouraged - to keep code simple.) - On the other hand, configuration hyperparameters and constants are declared - with the model builder's chain methods .config(String, double) - and .const(String, Tensor) respectively. - Constants refer to tensors that are unaffected by training - though you can edit them externally. - In fact, both numbers in the last snippet's symbolic definition are parsed into constants. - On the other hand, hyperparameters refer to symbols whose values affect the outcome of symbolic - parsing, for example by declaring dimension sizes for trainable parameters. +

Model definitions have so far been too simple to be employed in practice; + we need trainable parameters, which are created inline with the matrix + and vector functions. There is also an equivalent Java + method ModelBuilder.param(String, Tensor) that assigns an initialized Tensor + to a variable name, but its usage is discouraged to keep model definitions simple. + Additionally, there may be constants and configuration hyperparameters. Of these, constants reflect + untrainable tensors and set with ModelBuilder.const(String, Tensor), + whereas configuration hyperparameters are numerical values used by the parser and + set with ModelBuilder.config(String, double), or + ModelBuilder.config(String, String) if the second argument value + should be copied from another configuration. + Both numbers in the last snippet's symbolic definition are internally parsed into constants. + On the other hand, hyperparameters can be used as arguments to dimension sizes and regularization. + Retrieve previously set hyperparameters though double ModelBuilder.getConfig(String) + or double ModelBuilder.getConfigOrDefault(String, double) + to replace the error with a default value if the configuration is not found. The usefulness of retrieving + configurations will become apparent later on.

- Next is a table of available operations. Prefer using hyperparameters (set via .config(String, double)) - for matrix and vector creation, as these transfer their names to respective dimensions for error checking. - For dropout, matrix, and vector you can also use the short - names drop, mat, vec. + Next is a table of available operations that you can use in expressions. Standard + priority rules for priority and parentheses apply. + Prefer using configuration hyperparameters + to set matrix and vector creation, as these transfer their names to respective dimensions for error + checking - more on this in section 3.4.

- + - + @@ -533,17 +536,27 @@

3.1. ModelBuilder

- + - + - + - + + + + + + + + + + + @@ -565,6 +578,11 @@

3.1. ModelBuilder

+ + + + + @@ -586,9 +604,19 @@

3.1. ModelBuilder

- + - + + + + + + + + + + + @@ -610,6 +638,16 @@

3.1. ModelBuilder

+ + + + + + + + + + @@ -620,25 +658,40 @@

3.1. ModelBuilder

+ + + + + + + + + +
Symbol TypeNumber of inputsDescription
x = expr OperatorAssign to variable x the outcome of executing expression expr.Assign to variable x the outcome of executing expression expr. This expression does not evaluate to anything.
x + y
x [y] OperatorGathers the rows of x with indexes y.Gathers the rows of x with indexes y. Indexes are still tensors, whose elements are cast to integers during this operation.
transpose(x)transpose(A) FunctionTransposes matrix x.Transposes matrix A.
log(x) FunctionApply logarithm on each tensor element.Applies a logarithm on each element of tensor x.
exp(x)FunctionExponentiates each element of tensor x.
nexp(x)FunctionExponentiates each non-zero element of tensor x. Typically used for neighbor attention (see below).
relu(x) Function Apply training dropout on tensor x with constant dropout rate hyperparameter rate.
drop(x, rate)FunctionShorthand notation dropout.
lrelu(x, slope) Function Apply a sum reduction on x, where dim is either dim:'row' (default) or dim:'col'.
sum(x, dim)mean(x, dim) FunctionApply a sum reduction on x, where dim is either dim:'row' (default) or dim:'col'.Apply a mean reduction on x, where dim is either dim:'row' (default) or dim:'col'.
L1(x, dim)FunctionApply an L1 normalization on x across dimension dim, where dim is either dim:'row' (default) or dim:'col'.
L2(x, dim)FunctionApply an L2 normalization on x across dimension dim, where dim is either dim:'row' (default) or dim:'col'.
max(x, dim) Function Generate a matrix parameter with respective hyperparameter dimensions, and L2 regularization hyperparameter reg.
mat(rows, cols)FunctionShorthand notation matrix.
mat(rows, cols, reg)FunctionShorthand notation matrix.
vector(len) Function Function Generate a vector with size hyperparameter len, and L2 regularization hyperparameter reg.
vec(len)FunctionShorthand notation vector.
vec(len, reg)FunctionShorthand notation vector.

3.2. FastBuilder

The FastBuilder class for building GNN architectures extends the generic - LayerBuilder with common graph neural network operations. The main difference - is that it is initialized with a square matrix A, which is typically expected to - be a normalization of the (sparse) adjacency matrix, and a feature matrix h0. - This parses the notation symbol{l}, - where the layer counter {l}. - It also offers a FastBuilder.layer(String) chained method that substitutes - the notation symbol{l+1} with the next layer's counter, parses the operation - and increments the layer counter by one. Example usage is shown below, where symbolic expressions - read similarly to what you would find in a paper. + ModelBuilder with common graph neural network operations. The main difference + is that it has two constuctor arguments, namely a square matrix A that + is typically a normalization of the (sparse) adjacency matrix, + and a feature matrix h0. + This builder further supports the notation symbol{l}, + where the layer counter replaces the symbol part {l} with 0 for the first layer, + 1 for the second, and so on. Prefer the notation h{l} to refer to the node representation + matrix of the current layer; for the first layer, this is parsed as h0, which is the constant + set by the constructor. + FastBuilder instances also offer a FastBuilder.layer(String) + chain method to compute neural layer outputs. This is a a variation of operation parsing, where the + the symbol part {l+1} is substituted with the next layer's counter, + the expression is parsed, and the layer counter is incremented by one. Example usage is shown below, where + symbolic expressions read similarly to what you would find in a paper.

FastBuilder modelBuilder = new FastBuilder(adjacency, features)  // sets A, h0
-	.layer("h{l+1}=relu(A@(h{l}@matrix(features, hidden, reg))+vector(hidden))")  // parses h1 = ...
-	.layer("h{l+1}=A@(h{l}@matrix(hidden, classes, reg))+vector(classes)"); // parses h2 = ...
+	.layer("h{l+1}=relu(A@(h{l}@matrix(features, hidden, reg))+vector(hidden))")  // parses h1 = relu(A@(h0	@ ...
+	.layer("h{l+1}=A@(h{l}@matrix(hidden, classes, reg))+vector(classes)"); // parses h2 = A@(h1@ ...
 	

Before continuing, let us give some context for the above implementation. @@ -649,55 +702,55 @@

3.2. FastBuilder

weighted sum is compatible with spectral graph signal processing. The operation to perform one propagation can be written as .layer("h{l+1}=A @ h{l}"). The propagation's outcome is typically transformed further by passing through a dense - layer. Several practices that need more compute to gain marginal accuracy improvements can - be implemented, as shown below. However, it is recommended to stay away from these kinds - of complex architectures when learning from large graphs, as JGNN is designed to be lightweight and not - fast. Consider using other systems to learn complex GNNs in GPUs if 1-2% accuracy gains are worth the - incurred loss of portability.

+ layer.

+ +

Several have been proposed as improvements of this scheme. + However, they tend to incur marginal accuracy improvements at the cost of more compute. + Stay away from complex architectures when learning from large graphs, as JGNN is designed to + be lightweight but does not (and is not planned to) leverage GPUs. + The library still supports the improvements listed below, since they could be used when + running time is not a pressing issue (e.g., for transfer or stream learning that applies updates + for a few epochs), or to analyse smaller graphs:

  • Edge dropout - Applying dropout on the adjacency - matrix on each layer with .layer("h{l+1}=dropout(A,0.5) @ h{l}").
  • + matrix on each layer with .layer("h{l+1}=dropout(A,0.5) @ h{l}"). Usage of this operation seems innocuous, + but it disables a bunch of caching optimizations that occur under-the-hood.
  • Heterogeneity - Some rcent approaches explicitly account for high-pass frequency diffusion by accounting for the graph Laplacian too. Insert this into the - architecture as a normal constant like so: .constant("L", adjacency.negative().cast(Matrix.class).setMainDiagonal(1))
  • + architecture as a normal constant like so: .constant("L", adjacency.negative().cast(Matrix.class).setMainDiagonal(1))
  • Edge attention - Performs the dot product of edge nodes to create new edge weights per the mathematical formula A.(hTh), where - A is a sparse adjacency matrix, - . is the Hadamard product + A is a sparse adjacency matrix, the dot + . represents the Hadamard product (element-by-element multiplication), and - h is a dense matrix whose rows hold + h is a dense matrix whose rows hold respective node representations. JGNN efficiently implements this operation with the Neuralang function att(A, h). For example, weighted adjacency matrices for each layer of gated attention networks are implemented as .operation("A{l} = L1(nexp(att(A, h{l})))").
  • General message passing - JGNN also supports the the fully generized - message passing scheme between node neighbors to support - complex types of relational analysis + message passing scheme between node neighbors of more complex relational analysis [Velickovic, 2022]. In this generalization, each edge is responsible for appropriately - transforming and propagating representations to node neighbors. - To implement this, messages in GNNs can - be matrices whose rows correspond to edges and - columns to edge features. In the simplest scenario, - create such matrices by gathering the features of the edge - source and destination nodes by accessing the respective - elements of the previous layer's feature matrix - h{l}. To do this, first - obtain edge source indexes + transforming and propagating representations to node neighbors; + create message matrices whose rows correspond to edges and + columns to edge features by gathering the features of the edge + source and destination nodes. Programmatically,obtain edge source indexes src=from(A) and destination indexes - dst=to(A) where A is - an adjacency matrix. Then use the horizontal concatenation operation - | to concatenate node features. - One may also concatenate edge features. Given a constructed message, define any kind of ad-hoc + dst=to(A), where A is + the adjacency matrix. Then use the horizontal concatenation operation + | to concatenate node features. + One may also concatenate edge features. Given a constructed message, + define any kind of ad-hoc mechanism or neural processing of messages with traditional matrix operations (take care to define correct matrix sizes for dense transformations, e.g., twice the number of - columns as H{l} in the previous + columns as h{l} in the previous snippet). For any kind of LayeredBuilder, - don't forget that message{l} within + don't forget that message{l} within operations is needed to obtain a message from the representations - h{l} that is not accidentally shared with future layers. + h{l} that is not accidentally shared with future layers. Receiver mechanisms need to perform some kind of reduction on messages. JGNN implements summation reduction, given that this has the same @@ -705,7 +758,7 @@

    3.2. FastBuilder

    reduction but is easier to backpropagate through. Perform this like below. The sum is weighted per the values of - the adjacency matrix A. Thus, + the adjacency matrix A. Thus, perform adjacency matrix normalization only if you want such weighting to occur. @@ -723,14 +776,14 @@

    3.2. FastBuilder

    So far, we discussed the propagation mechanisms of GNNs, which consider the features of all nodes. However, in node classification settings, training data labels - are typically available only for certain nodes (even if all node - features are required to make any prediction). We thus - need a mechanism to retrieve the predictions of the top + are typically available only for certain nodes, despite + all node features being required to make any prediction. + We thus need a mechanism to retrieve the predictions of the top layer for those nodes, for example before applying a softmax. This is achieved in the snippet below, which uses the gather operations through brackets. Alternatively, chain the FastBuilder.classify() - method, which injects this code. + method, which injects this exact code.

    modelBuilder
     	.var("nodes")
    @@ -738,28 +791,211 @@ 

    3.2. FastBuilder

    .operation("output = h{l}[nodes]") .out("output");
    +

    So far we tackled only + equivariant GNNs, whose outputs + follow any node permutations applied on their inputs. + In simple terms, if the order of node idenfitifiers is modified + (both in the graph adjacency matrix and in node feature matrices), + the order of rows will be similarly modified for outputs. + Most operations described so far are equivariant (those that are not + explicitly say so), so that their + synthesis is also equivariant. However, there + are cases where created GNNs should be invariant, + which means that they should create predictions that remain + the same despite any input permutations. Invariance is + the property to impose when classifying graphs, where + one prediction should be made for the whole graph.

    + +

    Imposing invariance is simple enough; take an equivariant + architecture and then apply an invariant operation on top. + You may want to perform further transformations (e.g., some + dense layers) afterwards, but the general idea remains + the same. JGNN offers two types of invariant operations, also + known as pooling: + reductions and sort-based pooling. Of these, reductions + are straightforward to implement + by taking a dimensionality reduction mechanism (min, + max, sum, mean) + applying it column-wise on the output feature matrix. + Recall that each row has the features of a different node, + so the result of reduction yields an one-dimensional vector that, + for each feature dimension, aggregates feature values across all nodes. +

    + +

    Reduction-based pooling is conceptually simple, but + could fail to distinguish between the structural positioning of + nodes to be pooled. One computationally light alternative, + which JGNN implements, is sorting nodes based on learned features + before concatenating their features into one vector for + each graph. This process is further simplified by + keeping the top reduced number of nodes to + concatenate their features, where the order is + determined by an arbitrarily selected feature (in our + implementation: the last one, with the previous feature + being used to break ties, and so on). + The idea is that the selected feature determines + important nodes whose information can be + adopted by others. To implement this scheme, JGNN + provides independent operations to sort nodes, gather + node latent representations, and reshape matrices into + row or column tensors with learnable transformations to + class outputs. These components are demonstrated in the + following code snippet: +

    + +
    long reduced = 5;  // input graphs need to have at least that many nodes
    +long hidden = 8;  // many latent dims reduce speed without GPU parallelization
    +
    +ModelBuilder builder = new LayeredBuilder()        
    +.var("A")  
    +.config("features", 1)
    +.config("classes", 2)
    +.config("reduced", reduced)
    +.config("hidden", hidden)
    +.layer("h{l+1}=relu(A@(h{l}@matrix(features, hidden))+vector(hidden))")
    +.layer("h{l+1}=relu(A@(h{l}@matrix(hidden, hidden))+vector(hidden))")
    +.concat(2) // concatenates the outputs of the last 2 layers
    +.config("hiddenReduced", hidden*2*reduced)  // 2* due to concatenation
    +.operation("z{l}=sort(h{l}, reduced)")  // z{l} are node indexes
    +.layer("h{l+1}=reshape(h{l}[z{l}], 1, hiddenReduced)")
    +.layer("h{l+1}=h{l}@matrix(hiddenReduced, classes)")
    +.layer("h{l+1}=softmax(h{l}, dim: 'row')")
    +.out("h{l}");
    + + +

    3.3. Neuralang

    + +

    Neuralang scripts consist of functions that declare machine learning + components and their interactions using a syntax inspired by the + Mojo + language. Use a Rust highlighter to cover all keywords, though. + Before explaining how to use the Neuralang model builder, + To get a sense of the language's syntax, we present and analyse code that leads to a full + architecture definition. First, look at the classify + function, which for completeness is presented below. + This takes two tensor inputs: nodes that correspond to identifiers + insicating which nodes should be classified (the output has a number of rows equal to the + number of identifiers), and a node feature matrix h. + It then computes and returns a softmax for the features of the specified nodes. + Aside from main inputs, the function's + signature also has several configuration values, whose defaults + are indicated by a colon : (only configurations have defaults and conversely). + The same notation is used to + set/overwrite configurations when calling functions, as we do for softmax + to apply it row-wise. Think of configurations as keyword + arguments of typical programming languages, with the difference that + they control hyperparameters, like dimension sizes or regularization. + Write exact values for configurations, as for now there no + arithmetics take place for them. For example, a configuration + patience:2*50 creates an error.

    + +
    fn classify(nodes, h, epochs: !3000, patience: !100, lr: !0.01) {
    +	return softmax(h[nodes], dim: "row");
    +}
    + +

    Exclamation marks ! before numbers broadcast values + to all subsequent function calls that have configurations with the same + name. The broadcasted defaults overwrite any already existing default values with the same + name, but all defaults are replaced by values explicitly set when calling functions. + For example, take advantage of this prioritization to set specific dimensions for some layers. Importantly, + broadcasted values are stored within JGNN's Neuralang model + builder too; this is useful for Java integration, for example to retrieve learning training hyperparameters + from the model. To sum up, configuration values have the following priority, from strongest to weakest:
    + 1. Arguments set during the function's call.
    + 2. Broacasted configurations (the last broadcasted value, including configurations set by Java).
    + 3. Function signature defaults.
    +

    + +

    Next, let us look at the gcnlayer function. This accepts + two parameters: an adjacency matrix A and input feature matrix h. + The configuration hidden: 64 in the functions's signature + specifies the deafult number of hidden units, + whereas reg: 0.005 is the L2 regularization applied + during machine learning. The questionmark ? + in matrix definitions lets the autosize feature of JGNN determine + dimension sizes based on a test run - if possible. + Finally, the function returns the activated output of a + GCN layer. Similarly, look at the gcn function. This declares + the GCN architecture and has as configuration the number of output classes. + The function basically consists of two gcnlayer layers, + where the second's hidden units are set to the value of output classes. + The number of classes is unknown as of writting the model, and thus is externally declared + with the extern keyword to signify that this value should always by provided + by Java's side of the implementation.

    + +
    fn gcnlayer(A, h, hidden: 64, reg: 0.005) {
    +	return A@h@matrix(?, hidden, reg) + vector(hidden);
    +}
    +fn gcn(A, h, classes: extern) {
    +	h = gcnlayer(A, h);
    +	h = dropout(relu(h), 0.5);
    +	return gcnlayer(A, h, hidden: classes);
    +}
    +
    + + +

    We now move to parsing our declarations with the Neuralang + model builder and using them to create an architecture. To this end, save your Neuralang code + to a file and get is as a path Path architecture = Paths.get("filename.nn");, + or avoid external files by inlining the definition within Java code through + a multiline String per String architecture = """ ... """;. + Below, this string is parsed within a functional programming chain, where + each method call returns the modelBuilder instance to continue calling more methods. +

    + + +

    For our model builder, we set remaining hyperparameters and overwrite the default value + for "hidden" using the + .config(String, double) method. Now that + we know about broadcasts, this is the method that implements them. We also determine + which variables are constants, namely the adjacency matrix A and node + representation h, as well as that node identifiers is a variable that serves + as the architecture's inputs. There could be multiple inputs, so this distinction of what + is a constant and what is a variable depends mostly on which quantities change + during training. In the case of node classification, both the adjacency matrix and + node features remain constant, as we work in one graph. Finally, the definition + sets an Neuralang expression as the architecture's output + by calling the .out(String) method, + and applies the .autosize(Tensor...) method to infer hyperparameter + values denoted with ? from an example input. + For faster completion of the model, provide a dataless list of node identifiers as input, + like below.

    + +
    long numSamples = dataset.samples().getSlice().size();
    +long numClasses = dataset.labels().getCols();
    +ModelBuilder modelBuilder = new Neuralang()
    +	.parse(architecture)
    +	.constant("A", dataset.graph())
    +	.constant("h", dataset.features())
    +	.var("nodes")
    +	.config("classes", numClasses)
    +	.config("hidden", numClasses+2)  // custom number of hidden dimensions
    +	.out("classify(nodes, gcn(A,h))")  // expression to parse into a value
    +	.autosize(new EmptyTensor(numSamples));
    +
    +System.out.println("Preferred learning rate: "+modelBuilder.getConfig("lr"));
    -

    3.3. Debugging

    +

    3.4. Debugging

    JGNN offers high-level tools for debugging architectures. Here we cover what diagnostics to run, and how to make sense of error messages to fix erroneous - architectures. We already mention that model builder - variables should be assigned to variables before + architectures. We already mentioned that model builder + symbols should be assigned to before subsequent use. For example, consider a FastBuilder that tries to parse the expression .layer("h{l+1}=relu(hl@matrix(features, 32, reg)+vector(32))"), - where hl is a typographical error of - h{l}. In this case, an exception is thrown: + where hl is a typographical error of + h{l}. In this case, an exception is thrown: Exception in thread "main" java.lang.RuntimeException: Symbol hl not defined. -

    Internally, models are effectively directed acyclic graphs (DAGs) - that model builders construct. DAGs should not be confused with the graphs + Internally, models are effectively directed acyclic graphs (DAGs) + that model builders create. DAGs should not be confused with the graphs that GNNs architectures analyse; they are just an organization of data flow - NNComponents. During parsing, builders + between NNComponents. During parsing, builders may create temporary variables, which start with - the _tmp prefix and are followed by + the _tmp prefix and are followed by a number. Temporary variables often link - components to other that use them. + components to others that use them. The easiest way to understand execution DAGs is to look at them. The library provides two tools for this purpose: a .print() method @@ -771,12 +1007,9 @@

    3.3. Debugging

    Another error-checking procedure consists of - assert that all model operations eventually affect - some outputs defined by - .out(String). - Computational branches that lead nowhere mess up the - DAG traversal during backpropagation and should be checked - with the + an assertion that all model operations eventually affect + at least one output. Computational branches that lead nowhere mess up the + DAG traversal during backpropagation and should be checked with the method .assertBackwardValidity(). The latter throws an exception if an invalid model is found. Performing this assertion early on in @@ -794,10 +1027,11 @@

    3.3. Debugging

    at nodeClassification.APPNP.main(APPNP.java:45) -

    Some methods do not affect tensor or matrix values but +

    Some tensor or matrix methods do not + correspond to numerical operations but are only responsible for naming dimensions. - Functionally, these are decorative and aim - to improve debugging by throwing errors for + Functionally, such methods are largely decorative, + but they cab improve debugging by throwing errors for incompatible non-null names. For example, adding two matrices with different dimension names will result in an error. Likewise, the @@ -806,12 +1040,12 @@

    3.3. Debugging

    Arithmetic operations, including matrix multiplication and copying, automatically infer dimension names in the - result to ensure only compatible data types - are compared. Dimension names can be freely - changed for any Tensor without - backtracking changes (even for see-through + result to ensure that only compatible data types + are compared. Dimension name changes + do not + backtrack the changes, even for see-through data types, such as the outcome of - asTransposed()). + asTransposed(). Matrices effectively have three dimension names: for their rows, columns, and inner data as long as they are treated @@ -821,50 +1055,44 @@

    3.3. Debugging

    Operation - Type Comments Tensor setDimensionName(String name) - arithmetic For naming tensor dimensions (of the 1D space tensors lie in). Tensor setRowName(String rowName) - arithmetic For naming what kind of information matrix rows hold - (e.g., "samples"). + (e.g., "samples"). Defined only to matrices. Tensor setColName(String colName) - arithmetic For naming what kind of information matrix columns hold - (e.g., "features"). + (e.g., "features"). Defined only for matrices. Tensor setDimensionName(String rowName, String colName) - arithmetic A shorthand of calling - setRowName(rowName).setColName(colName). + setRowName(rowName).setColName(colName). Defined only for matrices.

    There are two main mechanisms for identifying - logically erroneous architectures: a) mismatched + logical errors within architectures: a) mismatched dimension size, and b) mismatched dimension names. Of the two, dimension sizes are easier to comprehend since they just mean that operations - are mathematically invalid.

    - -

    On the other hand, dimension names need to be + are mathematically invalid. + On the other hand, dimension names need to be determined for starting data, such as model inputs and parameters, and are automatically inferred from operations on such primitives. For @@ -872,7 +1100,7 @@

    3.3. Debugging

    or layers, dimension names are copied from any hyperparameters. Therefore, for easier debugging, prefer using functional expressions - that declare hyperparameters:

    + that declare hyperparameters, like below.

    new ModelBuilder()
     	.config("features", 7)
    @@ -880,10 +1108,6 @@ 

    3.3. Debugging

    .var("x") .operation("h = x@matrix(features, hidden)");
    -

    instead of the simpler:

    - -
    new ModelBuilder().var(x).operation("h = x@matrix(features, hidden)")
    -

    Both mismatched dimensions and mismatched dimension names throw runtime exceptions. The beginning of their error console traces should @@ -908,7 +1132,7 @@

    3.3. Debugging

    encounters mismatched matrix sizes when trying to multiply a 3327x32 SparseMatrix with a 64x6 dense matrix. Understanding the exact error is - easy—the inner matrix dimensions of matrix + easy—the inner dimensions of matrix multiplication do not agree. However, we need to find the error within our architecture to fix it. To do this, the error message message states @@ -944,140 +1168,34 @@

    3.3. Debugging

    builder.operation("h = relu(monitor(x@matrix(features, 64)) + vector(64))")
- -
-

4. Neuralang

- -
The language
-

Neuralang scripts consist of functions that declare machine learning - components and their interactions using a syntax inspired by the Mojo. - language. Use a Rust highlighter to cover all keywords. - Before explaining how to use the Neuralang model builder, - we first analyse some of the functions in the Quickstart section code - to explain the lenguage's syntax. First, let us look at classify - function that takes - two inputs: nodes that correspond to node identifiers - for classification, and a node feature matrix h. - A softmax is returned for the features of the specified nodes. The function's - signature also has several configuration values, whose defaults - are indicated by a colon :. The same notation is used to - set/overwrite configurations when calling functions, as we do for softmax - to apply it row-wise. Think of configurations as keyword - arguments of typical programming languages, with the difference that - they control hyperparameters, like dimension sizes or regularization. - Write exact values for configurations, as for now there no - arithmetics take place for them. For example, a configuration - patience:2*50 creates an error.

- -
fn classify(nodes, h, epochs: !3000, patience: !100, lr: !0.01) {
-	return softmax(h[nodes], dim: "row");
-}
- -

Exclamation marks ! before numbers broadcast values - to all subsequent function calls that have configurations with the same - name. The broadcasted defaults overwrite default values. Importantly, - broadcasted values are also retrievable from JGNN's Neuralang model - builder too; which is useful for Java integration. In this case, values - for the training process are obtained (see below). Configuration - values have the priority:
- 1. function call arguments
- 2. broacasted configurations (the last broadcasted value, including configurations set by Java)
- 3. function signature defaults
-

- -

Next, let us look at the gcnlayer function. This accepts - two parameters: an adjacency matrix A and input feature matrix h. - The configuration hidden: 64 in the functions's signature - specifies the deafult number of hidden units, - whereas reg: 0.005 is the L2 regularization applied - during machine learning. The questionmark ? - in matrix definitions lets the autosize feature of JGNN determine - dimension sizes based on a test run - if possible. - Finally, the function returns the activated output of a - GCN layer. - -

fn gcnlayer(A, h, hidden: 64, reg: 0.005) {
-	return A@h@matrix(?, hidden, reg) + vector(hidden);
-}
- -

The last of the functions we tackle, the gcn function declares the popular Graph Convoluational Network (GCN) - architecture and has as configuration the number of output classes. - The function first applies a gcnlayer, and then applies another layer - of the same type with the hidden units configuration set to the value of classes. - Thus the output matches the number of classes, which is set as externally declared - with the extern keyword; this signifies that the configuration has no default - and should be provided by Java's side of the implementation.

- - - -
Java-side integration
- - - -

We now need generate a trainable model by incrementally constructing model builders. - These differ by which Neuralang capabilities they can parse - mainly whether they go beyond - support of simple expressions involving operators only. The builder chosen here supports the whole - language, but in turn misses out on some Java-side methods that help fill parts of the architecture - with boilerplate patterns. To use the selected builder, save the first Neuralang snippet - to a file and retrieve it with the - expression String architecture = Paths.get("filename.nn");. - (Use a Rust language highlighter for visual assistance when writting in Neuralang.) - Alternatively, avoid external files by inlining the definition within Java code through - a multiline string per String architecture = """ ... """;. - This string is parsed within a functional programming chain, where - each method call returns the modelBuilder instance to continue calling more methods. -

- - -

For our model builder, we set remaining hyperparameters and overwrite the default value - for "hidden" using the - .config(String, double) method. We also determine - which variables are constants, namely the adjacency matrix A and node - representation h, and that node identifiers is a variable that serves - as the architecture's inputs. There could be multiple inputs, so this distinction of what - is a constant and what is a variable depends mostly on the which quantities change - during training. In the case of node classification, both the adjacency matrix and - node features remain constant, as we work in one graph. Finally, the definition - sets an Neuralang expression as the architecture's output - by calling the .out(String) method, - and applies the .autosize(Tensor...) method to infer hyperparameter - values denoted with ? from an example input (for faster inference, we - provide dataless list of node identifiers as input).

-
long numSamples = dataset.samples().getSlice().size();
-long numClasses = dataset.labels().getCols();
-ModelBuilder modelBuilder = new Neuralang()
-	.parse(architecture)
-	.constant("A", dataset.graph())
-	.constant("h", dataset.features())
-	.var("nodes")
-	.config("classes", numClasses)
-	.config("hidden", numClasses+2)
-	.out("ngcn(A, h, nodes)")
-	.autosize(new EmptyTensor(numSamples));
- -
- - - - - -
-

5. Create Data

-

JGNN provides automatically downloaded dataset classes that automatically - download their data and are ready to test with. - In practice, though, you will want to use your own data. We now cover how to - manually fill in data, as well as which operations are provided to manipulate - those data. Read more of these operations to learn how to process neural +

+

4. Training

+

Here we describe how to train a JGNN model created + per the previous section's instructions. + In general, training a machine learning architecture consists + of using some training and employing some optimization scheme + to adjust trainable parameter values based on those data. We + start by describing generic patterns for creating graph and node + feature data, and then move to specific data organization + for the tasks of node classification and graph classification. + For these, we show how to set up both automated and custom + training schemas.

+ +

4.1. Create data

+

JGNN provides dataset classes that can be used out-of-the-box + by automatically downloading their data. These can be found + in the + adhoc.datasets Javadoc. In practice, though, you will want to + use your own data. We thus describe how to + manually fill in data, as well as operations that manipulate + those data. Data manipulation is needed to preprocess neural inputs, post-process learning outcomes, create custom parameters, contribute to the library with more components, or make derivative works based on native Java vector and - matrix arithmetics. Covered operations are also performed - under-the-hood by Neuralang, so there is no need to dive in - too deep in the first read of this guidebook. + matrix arithmetics.

-

5.1. Fill in data

In the simplest case, both the number of nodes or data samples, and the number of feature dimensions are known beforehand. If so, create dense feature matrices with the following code. This uses the @@ -1115,26 +1233,70 @@

5.1. Fill in data

Creating adjacency matrices is similar to creating preallocated feature matrices. When in doubt, use the sparse - format for adjacency matrices, as their allocated memory scales - with the square of the number of nodes. Note that many GNNs + format for adjacency matrices, as the allocated memory of dense + counterparts scales qudratically to the number of nodes. Note that many GNNs consider bidirectional (i.e., non-directed) edges, in which case - both should be added together. Use the following snippet as a - template for constructing adjacency matrices. Recall that JGNN - follows a function chain notation; in this case, each modification - returns the matrix instance for further modifications. - Don't forget to normalize or apply the - renormalization trick (self-edges) on matrices if these + both directions should be added to the adjacency. Use the following snippet as a + template. Recall that JGNN follows a function chain notation, so each modification + returns the matrix instance. + Don't forget to normalize or apply the renormalization trick (self-edges) on matrices if these are needed by your architecture, for instance by calling adjacency.setMainDiagonal(1).setToSymmetricNormalization(); - after matrix construction.

+ after matrix creation.

Matrix adjacency = new SparseMatrix(numNodes, numNodes);
 for(Entry<Long, Long> edge : edges)
 	matrix
 		.put(edge.getKey(), edge.getValue(), 1)
 		.put(edge.getValue(), edge.getKey(), 1);
+ + +

All tensor operations can be viewed in the + core.tensor + and core.matrix + Javadoc. Of those, the Matrix class extends the concept + of tensors with additional operations, like transposition, matrix multiplication, + and row and column access. Under the + hood, matrices linearly store elements and use + computations to transform the (row, col) + position of their elements to respective + positions. The outcome of some methods inherited + from tensors may need to be typecast back into a + matrix (e.g., for all in-place operations). + + Operations can be split into arithmetics that combine the values + of two tensors to create a new one (e.g., Tensor add(Tensor)), + in-place arithmetics that alter a tensor without creating + a new one (e.g., Tensor selfAdd(Tensor)), + summary statistics that output simple numeric values (e.g., double Tensor.sum()), + and element getters and setters. + In-place arithmetics follow the same naming + conventions of base arithmetics and begin with a "self" + prefix for pairwise operations, or bein with a "setTo" prefix + for unary operations. Prefer in-place arithmetics when + for intermediate calculation steps, as these do not allocate new + memory. For example, the following code can be + used for creating and normalizing a tensor of + ones without using any additional memory.

+ +
Tensor normalized = new DenseTensor(10)
+	.setToOnes()
+	.setToNormalized();
+ +

Initialize a dense or sparse tensor with their number + of elements. If there are many zero elements expected, + prefer using a sparse tensor. For example, one-hot encodings for classification + problems can be generated with the following + code. This creates a dense tensor with + numClasses elements and puts at + element classId the value 1: +

+ +
int classId = ...;
+int numClasses = ...;
+Tensor oneHotEncoding = new mklab.JGNN.tensor.DenseTensor(numClasses).set(classId, 1);
+ -

5.2. Identifiers

The above snippets all make use of numerical node identifiers. To manage these, JGNN provides an IdConverter class. You can convert hashable @@ -1165,324 +1327,15 @@

5.2. Identifiers

Tensor prediction = labels.accessRow(nodeId); long predictedClassId = prediction.argmax(); System.out.println(classIds.get(predictedClassId)); -
- -

5.3. Tensor Operations

-

Tensor operations are performed element-by-element and - can be split into the following categories:

- -
    -
  • arithmetic - combine the values of - two tensors to create a new one
  • -
  • in-place arithmetic - combine the - values of two tensors to alter the first one
  • -
  • summary statistics - output simple - numeric values
  • -
  • element access - manipulation of - specific values
  • -
- -

In-place arithmetics follow the same naming - conventions of base arithmetics and begin with a "self" - prefix for pairwise operations or "setTo" prefix to - perform operators. First we present commonly used operations - applicable to all tensors, whose functionality is inferable from - their name and argument types.

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OperationTypeComments
Tensor copy()arithmetic
Tensor zeroCopy()arithmeticZero copies share the same type with the - tensor and comprise only zeros.
Tensor add(Tensor)arithmetic
Tensor substract(Tensor)arithmetic
Tensor multiply(Tensor)arithmeticMultiplication is performed - element-by-element.
Tensor multiply(double)arithmetic
Tensor normalized()arithmeticDivision with L2 norm (if non-zero).
Tensor toProbability()arithmeticDivision with the sum (if non-zero).
Tensor setToZero()in-place arithmetic
Tensor selfAdd(Tensor)in-place arithmetic
Tensor selfSubtract(Tensor)in-place arithmetic
Tensor setMultiply(Tensor)in-place arithmetic
Tensor selfMultiply(double)in-place arithmetic
Tensor setToRandom()in-place arithmeticelement selected from uniform distribution - in the range [0,1]
Tensor setToOnes()in-place arithmetic
Tensor setToNormalized()in-place arithmeticDivision with L2 norm (if non-zero).
Tensor setToProbability()in-place arithmeticDivision with the sum (if non-zero).
double dot(Tensor)summary statistics
double norm()summary statisticsThe L2 norm.
double sum()summary statistics
double max()summary statistics
double min()summary statistics
long argmax()summary statistics
long argmin()summary statistics
double toDouble()summary statisticsConverts tensor with exactly one element to - a double (throws exception if more - elements).
Tensor set(long position, double value)element accessIs in-place.
double get(long position)element access
Iterator getNonZeroElements()element accessTraverses all elements for dense - tensors, but skips zero elements for - sparse tensors. (Guarantee: there is no - non-zero element not traversed.) Returns - element positions. To write code that - accommodates both dense and - sparse tensors, ensure that iterating over indices - elements is performed with this method.
String describe()summary statisticsDescription of type and dimensions.
- -

Prefer in-place arithmetic operations when - transforming tensor values or for intermediate - calculation steps, as these do not allocate new - memory. For example, the following code can be - used for creating and normalizing a tensor of - ones without using additional memory:

- -
Tensor normalized = new DenseTensor(10).setToOnes().setToNormalized();
- -

Initialize a dense or sparse tensor with their number - of elements. If there are many zero elements expected, - prefer using a sparse tensor.

+ -
long size = ...;
-Tensor denseTensor = new mklab.JGNN.tensor.DenseTensor(size);
-Tensor sparseTensor = new mklab.JGNN.tensor.SparseTensor(size);
+

4.2. Node classification

+ [This section is under contruction] -

For example, one-hot encodings for classification - problems can be generated with the following - code. This creates a dense tensor with - numClasses elements and puts at - element classId the value 1: -

-
int classId = ...;
-int numClasses = ...;
-Tensor oneHotEncoding = new mklab.JGNN.tensor.DenseTensor(numClasses).set(classId, 1);
+

4.3. Graph classification

-

Dense tensors are serialized with their - String toString() method and can be - deserialized into new tensors with the - constructor - mklab.JGNN.tensor.DenseTensor(String). -

-

The Matrix class extends the concept - of tensors with additional operations. Under the - hood, matrices linearly store elements and use - computations to transform the (row, col) - position of their elements to respective - positions. The outcome of some methods inherited - from tensors may need to be typecast back into a - matrix (e.g., for all in-place operations).

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
OperationTypeComments
Matrix onesMask()arithmeticCopy of a matrix with elements set - to one.
Matrix transposed()arithmeticThere is no method for in-place - transposition.
Matrix asTransposed()arithmeticShares data with the original.
Tensor getRow(long)arithmeticShares data with the original.
Tensor getCol(long)arithmeticShares data with the original.
Tensor transform(Tensor x)arithmeticOutputs a dense tensor that holds - the linear transformation of the - given tensor (using it as a column - vector) by multiplying it with the - matrix.
Matrix matmul(Matrix with)arithmeticOutputs the matrix multiplication - **this \* with**. There is no - in-place matrix multiplication.
Matrix matmul(Matrix with, boolean transposeSelf, boolean transposeWith)arithmeticDoes not perform memory allocation - to compute transpositions.
Matrix external(Tensor horizontal, Tensor vertical)static methodExternal product of two tensors. Is a dense matrix.
Matrix symmetricNormalization()in-place arithmeticThe symmetrically normalized matrix.
Matrix setToSymmetricNormalization()in-place arithmeticThe symmetrically normalized matrix.
Matrix setMainDiagonal(double value)in-place arithmeticSets diagonal elements.
Matrix setDiagonal(long diagonal, double value)in-place arithmeticSets diagonal elements.
Matrix put(long row, long col, double value)element accessIs in-place.
Iterable<entry<long, Long>> getNonZeroEntries()element accessSimilar to getNonZeroElements() but iterates through (row, col)pairs.
- - -
-

6. Training

Most neural network architectures are designed with the idea of learning to classify nodes or samples. However, GNNs also provide the capability to classify entire graphs based on @@ -1571,72 +1424,21 @@

6. Training

optimizer.updateAll(); } -
-

Sort Pooling

-

Up to now, the example code performs a naive mean pooling - across all graph node features. However, this can prove - insufficient for the top layers, and more sophisticated - pooling mechanisms can be deployed to let GNNs - differentiate between the structural positioning of - nodes to be pooled.

- -

One computationally light approach to pooling, which JGNN - implements, is sorting nodes based on learned features - before concatenating their features into one vector for - each graph. This process is further simplified by - keeping the top reduced number of nodes to - concatenate their features, where the order is - determined by an arbitrarily selected feature (in our - implementation: the last one, with the previous feature - being used to break ties, and so on).

- -

The idea is that the selected feature determines - important nodes whose information can be - adopted by others. To apply the above operations, JGNN - provides independent operations to sort nodes, gather - node latent representations, and reshape matrices into - row or column tensors with learnable transformations to - class outputs. These components are demonstrated in the - following code snippet: -

- -
long reduced = 5;  // input graphs need to have at least that many nodes
-long hidden = 8;  // many latent dims reduce speed without GPU parallelization
-
-ModelBuilder builder = new LayeredBuilder()        
-    .var("A")  
-    .config("features", 1)
-    .config("classes", 2)
-    .config("reduced", reduced)
-    .config("hidden", hidden)
-    .layer("h{l+1}=relu(A@(h{l}@matrix(features, hidden))+vector(hidden))")
-    .layer("h{l+1}=relu(A@(h{l}@matrix(hidden, hidden))+vector(hidden))")
-    .concat(2) // concatenates the outputs of the last 2 layers
-    .config("hiddenReduced", hidden*2*reduced)  // 2* due to concatenation
-    .operation("z{l}=sort(h{l}, reduced)")
-    .layer("h{l+1}=reshape(h{l}[z{l}], 1, hiddenReduced)")
-    .layer("h{l+1}=h{l}@matrix(hiddenReduced, classes)")
-    .layer("h{l+1}=softmax(h{l}, dim: 'row')")
-    .out("h{l}");
-
- -
-

Parallelized Training

-

To speed up graph classification, you can use JGNN's - parallelization capabilities to calculate gradients - across multiple threads. Parallelization for node - classification holds little meaning, as the same - propagation mechanism needs to be run on the same graph - in parallel. However, this process yields substantial - speedup for the graph classification - problem.

- -

Parallelization can make use of JGNN's thread pooling to - perform gradients, wait for the conclusion of submitted - tasks, and then apply all gradient updates. This is - achieved by declaring a batch optimizer to gather all - the gradients. The entire process is detailed in the - following example:

+

To speed up graph classification, you can use JGNN's + parallelization capabilities to calculate gradients + across multiple threads. Parallelization for node + classification holds little meaning, as the same + propagation mechanism needs to be run on the same graph + in parallel. However, this process yields substantial + speedup for the graph classification + problem.

+ +

Parallelization can make use of JGNN's thread pooling to + perform gradients, wait for the conclusion of submitted + tasks, and then apply all gradient updates. This is + achieved by declaring a batch optimizer to gather all + the gradients. The entire process is detailed in the + following example:

for(int epoch=0; epoch<500; epoch++) {
     // gradient update
@@ -1667,8 +1469,7 @@ 

Parallelized Training

System.out.println("iter = " + epoch + " " + acc/dtest.adjucency.size()); } }
-
-
+