new verbosity mechanism with loss wrappers

MKLab-ITI · Aug 18, 2024 · cb305a4 · cb305a4
1 parent fd0034d
commit cb305a4
Show file tree

Hide file tree

Showing 13 changed files with 939 additions and 416 deletions.
diff --git a/JGNN/gcn_cora.jgnn b/JGNN/gcn_cora.jgnn
diff --git a/JGNN/src/examples/nodeClassification/GCN.java b/JGNN/src/examples/nodeClassification/GCN.java
@@ -14,7 +14,9 @@
 import mklab.JGNN.core.Tensor;
 import mklab.JGNN.core.empy.EmptyTensor;
 import mklab.JGNN.nn.initializers.XavierNormal;
+import mklab.JGNN.nn.loss.Accuracy;
 import mklab.JGNN.nn.loss.CategoricalCrossEntropy;
+import mklab.JGNN.nn.loss.report.VerboseLoss;
 import mklab.JGNN.nn.optimizers.Adam;
 
 /**
@@ -27,36 +29,43 @@ public static void main(String[] args) throws Exception {
 		Dataset dataset = new Cora();
 		dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
 
+		long numSamples = dataset.samples().getSlice().size();
 		long numClasses = dataset.labels().getCols();
 		ModelBuilder modelBuilder = new FastBuilder(dataset.graph(), dataset.features())
-				.config("reg", 0.005)
-				.config("classes", numClasses)
-				.config("hidden", 64)
-				.function("gcnlayer", "(A,h){z=dropout(A, 0.5)@(h@matrix(?, hidden, reg))+vector(?);return z}")
-				.layer("h{l+1}=relu(gcnlayer(A, h{l}))")
-				.config("hidden", "classes")
-				.layer("h{l+1}=gcnlayer(A, h{l})")
-				.classify()
-				.autosize(new EmptyTensor(dataset.samples().getSlice().size()));
+			.config("reg", 0.005)
+			.config("classes", numClasses)
+			.config("hidden", 64)
+			.function("gcnlayer", "(A,h){Adrop = dropout(A, 0.5); return Adrop@(h@matrix(?, hidden, reg))+vector(?);}")
+			.layer("h{l+1}=relu(gcnlayer(A, h{l}))")
+			.config("hidden", "classes")  // reassigns the output gcnlayer's "hidden" to be the number of "classes"
+			.layer("h{l+1}=gcnlayer(A, h{l})")
+			.classify()
+			.autosize(new EmptyTensor(numSamples));
 
 		ModelTraining trainer = new ModelTraining()
 				.setOptimizer(new Adam(0.01))
-				.setEpochs(20)
+				.setEpochs(3000)
 				.setPatience(100)
-				.setVerbose(true)
 				.setLoss(new CategoricalCrossEntropy())
-				.setValidationLoss(new CategoricalCrossEntropy());
+				.setValidationLoss(new VerboseLoss(new Accuracy()).setInterval(10));
 
 		long tic = System.currentTimeMillis();
-		Slice nodes = dataset.samples().getSlice().shuffle(100);
+		Slice nodes = dataset.samples().getSlice().shuffle(); // a permutation of node identifiers
+		Matrix inputData = Tensor.fromRange(nodes.size()).asColumn(); // each node has its identifier as an input
 		Model model = modelBuilder.getModel()
 				.init(new XavierNormal())
-				.train(trainer,
-						Tensor.fromRange(nodes.size()).asColumn(), 
-						dataset.labels(), nodes.range(0, 0.6), nodes.range(0.6, 0.8));
+				.train(trainer, 
+						inputData,
+						dataset.labels(), 
+						nodes.range(0, 0.6),  // train slice
+						nodes.range(0.6, 0.8)  // validation slice
+						);
 
 		System.out.println("Training time "+(System.currentTimeMillis()-tic)/1000.);
-		Matrix output = model.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
+		//modelBuilder.save(Paths.get("gcn_cora.jgnn"));
+
+		Model loadedModel = model;//ModelBuilder.load(Paths.get("gcn_cora.jgnn")).getModel();
+		Matrix output = loadedModel.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
 		double acc = 0;
 		for(Long node : nodes.range(0.8, 1)) {
 			Matrix nodeLabels = dataset.labels().accessRow(node).asRow();

diff --git a/JGNN/src/examples/tutorial/Quickstart.java b/JGNN/src/examples/tutorial/Quickstart.java
@@ -1,66 +1,74 @@
 package tutorial;
 
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
 import mklab.JGNN.adhoc.Dataset;
 import mklab.JGNN.adhoc.ModelBuilder;
-import mklab.JGNN.adhoc.datasets.Citeseer;
 import mklab.JGNN.adhoc.datasets.Cora;
 import mklab.JGNN.adhoc.parsers.FastBuilder;
 import mklab.JGNN.core.Matrix;
 import mklab.JGNN.nn.Model;
 import mklab.JGNN.nn.ModelTraining;
 import mklab.JGNN.core.Slice;
 import mklab.JGNN.core.Tensor;
+import mklab.JGNN.core.empy.EmptyTensor;
 import mklab.JGNN.nn.initializers.XavierNormal;
+import mklab.JGNN.nn.loss.Accuracy;
 import mklab.JGNN.nn.loss.CategoricalCrossEntropy;
+import mklab.JGNN.nn.loss.report.VerboseLoss;
 import mklab.JGNN.nn.optimizers.Adam;
 
 /**
- * Demonstrates classification with an architecture defined through the scripting engine.
+ * Demonstrates classification with the GCN architecture.
  * 
  * @author Emmanouil Krasanakis
  */
 public class Quickstart {
 	public static void main(String[] args) throws Exception {
 		Dataset dataset = new Cora();
-		Matrix adjacency = dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
-		Matrix nodeFeatures = dataset.features();
-		Matrix nodeLabels = dataset.labels();
-		Slice nodes = dataset.samples().getSlice().shuffle(100);
-		long numClasses = nodeLabels.getCols();
-
-		ModelBuilder modelBuilder = new FastBuilder(adjacency, nodeFeatures)
-				.config("reg", 0.005)
-				.config("hidden", 16)
-				.config("classes", numClasses)
-				.layer("h{l+1}=relu(h{l}@matrix(features, hidden, reg)+vector(hidden))")
-				.layer("h{l+1}=h{l}@matrix(hidden, classes)+vector(classes)")
-				.rememberAs("0")
-				.constant("a", 0.9)
-				.layerRepeat("h{l+1} = a*(dropout(A, 0.5)@h{l})+(1-a)*h{0}", 10)
-				.classify();
+		dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
 
+		long numSamples = dataset.samples().getSlice().size();
+		long numClasses = dataset.labels().getCols();
+		ModelBuilder modelBuilder = new FastBuilder(dataset.graph(), dataset.features())
+			.config("reg", 0.005)
+			.config("classes", numClasses)
+			.config("hidden", 64)
+			.function("gcnlayer", "(A,h){Adrop = dropout(A, 0.5); return Adrop@(h@matrix(?, hidden, reg))+vector(?);}")
+			.layer("h{l+1}=relu(gcnlayer(A, h{l}))")
+			.config("hidden", "classes")  // reassigns the output gcnlayer's "hidden" to be the number of "classes"
+			.layer("h{l+1}=gcnlayer(A, h{l})")
+			.classify()
+			.autosize(new EmptyTensor(numSamples));
+
 		ModelTraining trainer = new ModelTraining()
 				.setOptimizer(new Adam(0.01))
-				.setEpochs(300)
+				.setEpochs(3000)
 				.setPatience(100)
 				.setLoss(new CategoricalCrossEntropy())
-				.setVerbose(true)
-				.setValidationLoss(new CategoricalCrossEntropy());
-
+				.setValidationLoss(new VerboseLoss(new Accuracy()).setInterval(10));
+
+		Slice nodes = dataset.samples().getSlice().shuffle(); // a permutation of node identifiers
+		Matrix inputData = Tensor.fromRange(nodes.size()).asColumn(); // each node has its identifier as an input
 		Model model = modelBuilder.getModel()
 				.init(new XavierNormal())
-				.train(trainer,
-						nodes.samplesAsFeatures(), 
-						nodeLabels, 
-						nodes.range(0, 0.6), 
-						nodes.range(0.6, 0.8));
-
-		Matrix output = model.predict(nodes.samplesAsFeatures()).get(0).cast(Matrix.class);
+				.train(trainer, 
+						inputData,
+						dataset.labels(), 
+						nodes.range(0, 0.6),  // train slice
+						nodes.range(0.6, 0.8)  // validation slice
+						);
+
+		//modelBuilder.save(Paths.get("gcn_cora.jgnn"));
+
+		Model loadedModel = model;//ModelBuilder.load(Paths.get("gcn_cora.jgnn")).getModel();
+		Matrix output = loadedModel.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
 		double acc = 0;
 		for(Long node : nodes.range(0.8, 1)) {
-			Matrix trueLabels = dataset.labels().accessRow(node).asRow();
+			Matrix nodeLabels = dataset.labels().accessRow(node).asRow();
 			Tensor nodeOutput = output.accessRow(node).asRow();
-			acc += nodeOutput.argmax()==trueLabels.argmax()?1:0;
+			acc += nodeOutput.argmax()==nodeLabels.argmax()?1:0;
 		}
 		System.out.println("Acc\t "+acc/nodes.range(0.8, 1).size());
 	}

diff --git a/JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java b/JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java
@@ -360,6 +360,8 @@ public ModelBuilder param(String name, double regularization, Tensor value) {
 	 * @see #param(String, double, Tensor)
 	 */
 	public ModelBuilder config(String name, double value) {
+		if(name.equals("?"))
+			throw new RuntimeException("The \"?\" config name is not allowed.");
 		this.configurations.put(name, value);
 		return this;
 	}
@@ -568,6 +570,8 @@ public ModelBuilder operation(String desc) {
 		}
 		desc = desc.replaceAll("\\s\\=\\s+\\+\\s+MINUS\\_ONE", " = MINUS_ONE");
 		desc = desc.replaceAll("\\s+", " ");
+		if(desc.endsWith(";")) // hack to parse return correctly
+			desc = desc.substring(0, desc.length()-1);
 
 		boolean madeChanges = true;
 		while(madeChanges) {

diff --git a/JGNN/src/main/java/mklab/JGNN/core/Slice.java b/JGNN/src/main/java/mklab/JGNN/core/Slice.java
@@ -72,8 +72,8 @@ public Matrix samplesAsFeatures(){
 		return Tensor.fromRange(0, size()).asColumn();
 	}
 	/**
-	 * Performs the {@link #range(double, double)} operation
-	 * but replaces values of <code>from</code> and <code>end</code>
+	 * Performs the {@link #range(int, int)} operation
+	 * while replacing values of <code>from</code> and <code>end</code>
 	 * with <code>(int)(from*size())</code> and <code>(int)(end*size())</code>
 	 * so that fractional ranges can be obtained. For example, 
 	 * you can call <code>slice.shuffle().range(0.5, 1)</code> to obtain a 

diff --git a/JGNN/src/main/java/mklab/JGNN/nn/Loss.java b/JGNN/src/main/java/mklab/JGNN/nn/Loss.java
@@ -15,10 +15,12 @@ public abstract class Loss {
 	 * lower values correspond to better predictions.
 	 * @param output A model's estimation of true outputs.
 	 * @param desired The expected outputs.
-	 * @return A <code>double</code> value (could be negative too).
+	 * @return A <code>double</code> value (is negative if smaller 
+	 * values are better).
 	 * @see #derivative(Tensor, Tensor)
 	 */
 	public abstract double evaluate(Tensor output, Tensor desired);
+
 	/**
 	 * Provides the derivative of a loss function at its evaluation point. 
 	 * @param output A model's estimation of true outputs.

diff --git a/JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java b/JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java
@@ -34,10 +34,28 @@ public class ModelTraining {
 
 	public ModelTraining() {
 	}
+
+	/**
+	 * @param verbose Whether an error message will be printed.
+	 * @deprecated This method was available in earlier JGNN versions but will be gradually phased out.
+	 * Instead, wrap the validation loss within {@link mklab.JGNN.nn.loss.report.VerboseLoss} to replicate
+	 * the same behavior.
+	 */
 	public ModelTraining setVerbose(boolean verbose) {
+		System.err.println("WARNING: The setVerbose method was available in earlier JGNN versions"
+				+ "\n    but will be gradually phased out. Instead, wrap the validation"
+				+ "\n    loss within a VerboseLoss instance to replicate the same"
+				+ "\n    behavior. Look for more losses of the mklab.JGNN.nn.loss.report"
+				+ "\n    package for more types of training feedback.");
 		this.verbose = verbose;
 		return this;
 	}
+
+	/**
+	 * Set 
+	 * @param loss
+	 * @return
+	 */
 	public ModelTraining setLoss(Loss loss) {
 		this.loss = loss;
 		return this;
@@ -46,23 +64,73 @@ public ModelTraining setValidationLoss(Loss loss) {
 		this.validationLoss = loss;
 		return this;
 	}
+
+	/**
+	 * Sets an {@link Optimizer} instance to controls parameter updates during training.
+	 * If the provided optimizer is not an instance of {@link BatchOptimizer},
+	 * it is forcefully wrapped by the latter. Training calls the batch optimizer's
+	 * update method after every batch.
+	 * @param optimizer The desired optimizer.
+	 * @return <code>this</code> model training instance.
+	 * @see #train(Model, Matrix, Matrix, Slice, Slice)
+	 */
 	public ModelTraining setOptimizer(Optimizer optimizer) {
-		this.optimizer = new BatchOptimizer(optimizer);
+		if(optimizer instanceof BatchOptimizer)
+			this.optimizer = (BatchOptimizer) optimizer;
+		else
+			this.optimizer = new BatchOptimizer(optimizer);
 		return this;
 	}
+
+	/**
+	 * Sets the number of batches training data slices should be split into.
+	 * @param numBatches The desired number of batches. Default is 1.
+	 * @return <code>this</code> model training instance.
+	 * @see #setParallelizedStochasticGradientDescent(boolean)
+	 */
 	public ModelTraining setNumBatches(int numBatches) {
 		this.numBatches = numBatches;
 		return this;
 	}
+
+	/**
+	 * Sets whether the training strategy should reflect stochastic
+	 * gradient descent by randomly sampling from the training dataset to obtain data samples.
+	 * If <code>true</code>, both this feature and acceptable thread-based paralellization
+	 * is enabled. Parallelization makes use of JGNN's {@link ThreadPool}.
+	 * @param paralellization A boolean value indicating whether this feature is enabled.
+	 * @return <code>this</code> model training instance.
+	 * @see #setNumBatches(int)
+	 * @see #train(Model, Matrix, Matrix, Slice, Slice)
+	 */
 	public ModelTraining setParallelizedStochasticGradientDescent(boolean paralellization) {
 		this.paralellization = paralellization;
 		this.stochasticGradientDescent = paralellization;
 		return this;
 	}
+
+	/**
+	 * Sets the maximum number of epochs for which training runs. 
+	 * If no patience has been set, training runs for exactly this
+	 * number of epochs.
+	 * @param epochs The maximum number of epochs.
+	 * @return <code>this</code> model training instance.
+	 * @see #setPatience(int)
+	 */
 	public ModelTraining setEpochs(int epochs) {
 		this.epochs = epochs;
 		return this;
 	}
+
+	/**
+	 * Sets the patience of the training strategy that performs early stopping.
+	 * If training does not encounter a smaller validation loss for this number of 
+	 * epochs, it stops. 
+	 * @param patience The number of patience epochs. Default is Integer.MAX_VALUE to effectively disable this
+	 * 	feature and let training always reach the maximum number of set epochs.
+	 * @return <code>this</code> model training instance.
+	 * @see #setEpochs(int)
+	 */
 	public ModelTraining setPatience(int patience) {
 		this.patience = patience;
 		return this;

diff --git a/JGNN/src/main/java/mklab/JGNN/nn/loss/Accuracy.java b/JGNN/src/main/java/mklab/JGNN/nn/loss/Accuracy.java
@@ -4,12 +4,19 @@
 import mklab.JGNN.nn.Loss;
 
 /**
- * Implements an accuracy {@link Loss} of row-by-row comparison.
+ * Implements an accuracy {@link Loss} of row-by-row comparisons.
+ * Each row of the output should have the same {@link Tensor#argmax()}
+ * value as the corresponding row of desired values.
+ * This comparison has no derivative.
+ * 
  * @author Emmanouil Krasanakis
  */
 public class Accuracy extends Loss {
 	/**
-	 * Instantiates a row-by-row accuracy loss.
+	 * Instantiates a row-by-row {@link Accuracy} loss.
+	 * For this loss, each row of the output should have the same {@linke Tensor#argmax()}
+	 * value as the corresponding row of desired values.
+	 * This comparison has no derivative.
 	 */
 	public Accuracy() {
 	}

diff --git a/JGNN/src/main/java/mklab/JGNN/nn/loss/BinaryCrossEntropy.java b/JGNN/src/main/java/mklab/JGNN/nn/loss/BinaryCrossEntropy.java
@@ -14,6 +14,7 @@ public class BinaryCrossEntropy extends Loss {
 
 	/**
 	 * Initializes binary cross entropy with 1.E-12 epsilon value.
+	 * For more than one output dimensions use {@link CategoricalCrossEntropy#CategoricalCrossEntropy()}
 	 * @see #BinaryCrossEntropy(double)
 	 */
 	public BinaryCrossEntropy() {
@@ -22,7 +23,9 @@ public BinaryCrossEntropy() {
 	/**
 	 * Initializes binary cross entropy with and epsilon value 
 	 * to bound its outputs in the range [log(epsilon), -log(epsilon)] instead of (-inf, inf).
+	 * For more than one output dimensions use {@link CategoricalCrossEntropy#CategoricalCrossEntropy(double)}
 	 * @param epsilon A very small positive <code>double</code>.
+	 * @see #BinaryCrossEntropy()
 	 */
 	public BinaryCrossEntropy(double epsilon) {
 		this.epsilon = epsilon;

diff --git a/JGNN/src/main/java/mklab/JGNN/nn/loss/CategoricalCrossEntropy.java b/JGNN/src/main/java/mklab/JGNN/nn/loss/CategoricalCrossEntropy.java
@@ -6,7 +6,7 @@
 
 /**
  * Implements a categorical cross-entropy {@link Loss}.<br>
- * For binary classification of one output use {@link BinaryCrossEntropy}
+ * For binary classification of one output use {@link BinaryCrossEntropy}.
  * @author Emmanouil Krasanakis
  */
 public class CategoricalCrossEntropy extends Loss {
@@ -15,7 +15,8 @@ public class CategoricalCrossEntropy extends Loss {
 
 	/**
 	 * Initializes categorical cross entropy with 1.E-12 epsilon value.
-	 * @see #BinaryCrossEntropy(double)
+	 * For binary classification of one output use {@link BinaryCrossEntropy#BinaryCrossEntropy()}.
+	 * @see #CategoricalCrossEntropy(double)
 	 */
 	public CategoricalCrossEntropy() {
 		this(1.E-12);
@@ -24,7 +25,9 @@ public CategoricalCrossEntropy() {
 	/**
 	 * Initializes categorical cross entropy with and epsilon value 
 	 * to bound its outputs in the range [log(epsilon), -log(epsilon)] instead of (-inf, inf).
+	 * For binary classification of one output use {@link BinaryCrossEntropy#BinaryCrossEntropy(double)}.
 	 * @param epsilon A very small positive <code>double</code>.
+	 * @see #CategoricalCrossEntropy()
 	 */
 	public CategoricalCrossEntropy(double epsilon) {
 		this.epsilon = epsilon;