Skip to content

Commit

Permalink
new verbosity mechanism with loss wrappers
Browse files Browse the repository at this point in the history
  • Loading branch information
maniospas committed Aug 18, 2024
1 parent fd0034d commit cb305a4
Show file tree
Hide file tree
Showing 13 changed files with 939 additions and 416 deletions.
33 changes: 33 additions & 0 deletions JGNN/gcn_cora.jgnn

Large diffs are not rendered by default.

43 changes: 26 additions & 17 deletions JGNN/src/examples/nodeClassification/GCN.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import mklab.JGNN.core.Tensor;
import mklab.JGNN.core.empy.EmptyTensor;
import mklab.JGNN.nn.initializers.XavierNormal;
import mklab.JGNN.nn.loss.Accuracy;
import mklab.JGNN.nn.loss.CategoricalCrossEntropy;
import mklab.JGNN.nn.loss.report.VerboseLoss;
import mklab.JGNN.nn.optimizers.Adam;

/**
Expand All @@ -27,36 +29,43 @@ public static void main(String[] args) throws Exception {
Dataset dataset = new Cora();
dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();

long numSamples = dataset.samples().getSlice().size();
long numClasses = dataset.labels().getCols();
ModelBuilder modelBuilder = new FastBuilder(dataset.graph(), dataset.features())
.config("reg", 0.005)
.config("classes", numClasses)
.config("hidden", 64)
.function("gcnlayer", "(A,h){z=dropout(A, 0.5)@(h@matrix(?, hidden, reg))+vector(?);return z}")
.layer("h{l+1}=relu(gcnlayer(A, h{l}))")
.config("hidden", "classes")
.layer("h{l+1}=gcnlayer(A, h{l})")
.classify()
.autosize(new EmptyTensor(dataset.samples().getSlice().size()));
.config("reg", 0.005)
.config("classes", numClasses)
.config("hidden", 64)
.function("gcnlayer", "(A,h){Adrop = dropout(A, 0.5); return Adrop@(h@matrix(?, hidden, reg))+vector(?);}")
.layer("h{l+1}=relu(gcnlayer(A, h{l}))")
.config("hidden", "classes") // reassigns the output gcnlayer's "hidden" to be the number of "classes"
.layer("h{l+1}=gcnlayer(A, h{l})")
.classify()
.autosize(new EmptyTensor(numSamples));

ModelTraining trainer = new ModelTraining()
.setOptimizer(new Adam(0.01))
.setEpochs(20)
.setEpochs(3000)
.setPatience(100)
.setVerbose(true)
.setLoss(new CategoricalCrossEntropy())
.setValidationLoss(new CategoricalCrossEntropy());
.setValidationLoss(new VerboseLoss(new Accuracy()).setInterval(10));

long tic = System.currentTimeMillis();
Slice nodes = dataset.samples().getSlice().shuffle(100);
Slice nodes = dataset.samples().getSlice().shuffle(); // a permutation of node identifiers
Matrix inputData = Tensor.fromRange(nodes.size()).asColumn(); // each node has its identifier as an input
Model model = modelBuilder.getModel()
.init(new XavierNormal())
.train(trainer,
Tensor.fromRange(nodes.size()).asColumn(),
dataset.labels(), nodes.range(0, 0.6), nodes.range(0.6, 0.8));
.train(trainer,
inputData,
dataset.labels(),
nodes.range(0, 0.6), // train slice
nodes.range(0.6, 0.8) // validation slice
);

System.out.println("Training time "+(System.currentTimeMillis()-tic)/1000.);
Matrix output = model.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
//modelBuilder.save(Paths.get("gcn_cora.jgnn"));

Model loadedModel = model;//ModelBuilder.load(Paths.get("gcn_cora.jgnn")).getModel();
Matrix output = loadedModel.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
double acc = 0;
for(Long node : nodes.range(0.8, 1)) {
Matrix nodeLabels = dataset.labels().accessRow(node).asRow();
Expand Down
70 changes: 39 additions & 31 deletions JGNN/src/examples/tutorial/Quickstart.java
Original file line number Diff line number Diff line change
@@ -1,66 +1,74 @@
package tutorial;

import java.nio.file.Files;
import java.nio.file.Paths;

import mklab.JGNN.adhoc.Dataset;
import mklab.JGNN.adhoc.ModelBuilder;
import mklab.JGNN.adhoc.datasets.Citeseer;
import mklab.JGNN.adhoc.datasets.Cora;
import mklab.JGNN.adhoc.parsers.FastBuilder;
import mklab.JGNN.core.Matrix;
import mklab.JGNN.nn.Model;
import mklab.JGNN.nn.ModelTraining;
import mklab.JGNN.core.Slice;
import mklab.JGNN.core.Tensor;
import mklab.JGNN.core.empy.EmptyTensor;
import mklab.JGNN.nn.initializers.XavierNormal;
import mklab.JGNN.nn.loss.Accuracy;
import mklab.JGNN.nn.loss.CategoricalCrossEntropy;
import mklab.JGNN.nn.loss.report.VerboseLoss;
import mklab.JGNN.nn.optimizers.Adam;

/**
* Demonstrates classification with an architecture defined through the scripting engine.
* Demonstrates classification with the GCN architecture.
*
* @author Emmanouil Krasanakis
*/
public class Quickstart {
public static void main(String[] args) throws Exception {
Dataset dataset = new Cora();
Matrix adjacency = dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
Matrix nodeFeatures = dataset.features();
Matrix nodeLabels = dataset.labels();
Slice nodes = dataset.samples().getSlice().shuffle(100);
long numClasses = nodeLabels.getCols();

ModelBuilder modelBuilder = new FastBuilder(adjacency, nodeFeatures)
.config("reg", 0.005)
.config("hidden", 16)
.config("classes", numClasses)
.layer("h{l+1}=relu(h{l}@matrix(features, hidden, reg)+vector(hidden))")
.layer("h{l+1}=h{l}@matrix(hidden, classes)+vector(classes)")
.rememberAs("0")
.constant("a", 0.9)
.layerRepeat("h{l+1} = a*(dropout(A, 0.5)@h{l})+(1-a)*h{0}", 10)
.classify();
dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();

long numSamples = dataset.samples().getSlice().size();
long numClasses = dataset.labels().getCols();
ModelBuilder modelBuilder = new FastBuilder(dataset.graph(), dataset.features())
.config("reg", 0.005)
.config("classes", numClasses)
.config("hidden", 64)
.function("gcnlayer", "(A,h){Adrop = dropout(A, 0.5); return Adrop@(h@matrix(?, hidden, reg))+vector(?);}")
.layer("h{l+1}=relu(gcnlayer(A, h{l}))")
.config("hidden", "classes") // reassigns the output gcnlayer's "hidden" to be the number of "classes"
.layer("h{l+1}=gcnlayer(A, h{l})")
.classify()
.autosize(new EmptyTensor(numSamples));

ModelTraining trainer = new ModelTraining()
.setOptimizer(new Adam(0.01))
.setEpochs(300)
.setEpochs(3000)
.setPatience(100)
.setLoss(new CategoricalCrossEntropy())
.setVerbose(true)
.setValidationLoss(new CategoricalCrossEntropy());

.setValidationLoss(new VerboseLoss(new Accuracy()).setInterval(10));

Slice nodes = dataset.samples().getSlice().shuffle(); // a permutation of node identifiers
Matrix inputData = Tensor.fromRange(nodes.size()).asColumn(); // each node has its identifier as an input
Model model = modelBuilder.getModel()
.init(new XavierNormal())
.train(trainer,
nodes.samplesAsFeatures(),
nodeLabels,
nodes.range(0, 0.6),
nodes.range(0.6, 0.8));

Matrix output = model.predict(nodes.samplesAsFeatures()).get(0).cast(Matrix.class);
.train(trainer,
inputData,
dataset.labels(),
nodes.range(0, 0.6), // train slice
nodes.range(0.6, 0.8) // validation slice
);

//modelBuilder.save(Paths.get("gcn_cora.jgnn"));

Model loadedModel = model;//ModelBuilder.load(Paths.get("gcn_cora.jgnn")).getModel();
Matrix output = loadedModel.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
double acc = 0;
for(Long node : nodes.range(0.8, 1)) {
Matrix trueLabels = dataset.labels().accessRow(node).asRow();
Matrix nodeLabels = dataset.labels().accessRow(node).asRow();
Tensor nodeOutput = output.accessRow(node).asRow();
acc += nodeOutput.argmax()==trueLabels.argmax()?1:0;
acc += nodeOutput.argmax()==nodeLabels.argmax()?1:0;
}
System.out.println("Acc\t "+acc/nodes.range(0.8, 1).size());
}
Expand Down
4 changes: 4 additions & 0 deletions JGNN/src/main/java/mklab/JGNN/adhoc/ModelBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,8 @@ public ModelBuilder param(String name, double regularization, Tensor value) {
* @see #param(String, double, Tensor)
*/
public ModelBuilder config(String name, double value) {
if(name.equals("?"))
throw new RuntimeException("The \"?\" config name is not allowed.");
this.configurations.put(name, value);
return this;
}
Expand Down Expand Up @@ -568,6 +570,8 @@ public ModelBuilder operation(String desc) {
}
desc = desc.replaceAll("\\s\\=\\s+\\+\\s+MINUS\\_ONE", " = MINUS_ONE");
desc = desc.replaceAll("\\s+", " ");
if(desc.endsWith(";")) // hack to parse return correctly
desc = desc.substring(0, desc.length()-1);

boolean madeChanges = true;
while(madeChanges) {
Expand Down
4 changes: 2 additions & 2 deletions JGNN/src/main/java/mklab/JGNN/core/Slice.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ public Matrix samplesAsFeatures(){
return Tensor.fromRange(0, size()).asColumn();
}
/**
* Performs the {@link #range(double, double)} operation
* but replaces values of <code>from</code> and <code>end</code>
* Performs the {@link #range(int, int)} operation
* while replacing values of <code>from</code> and <code>end</code>
* with <code>(int)(from*size())</code> and <code>(int)(end*size())</code>
* so that fractional ranges can be obtained. For example,
* you can call <code>slice.shuffle().range(0.5, 1)</code> to obtain a
Expand Down
4 changes: 3 additions & 1 deletion JGNN/src/main/java/mklab/JGNN/nn/Loss.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ public abstract class Loss {
* lower values correspond to better predictions.
* @param output A model's estimation of true outputs.
* @param desired The expected outputs.
* @return A <code>double</code> value (could be negative too).
* @return A <code>double</code> value (is negative if smaller
* values are better).
* @see #derivative(Tensor, Tensor)
*/
public abstract double evaluate(Tensor output, Tensor desired);

/**
* Provides the derivative of a loss function at its evaluation point.
* @param output A model's estimation of true outputs.
Expand Down
70 changes: 69 additions & 1 deletion JGNN/src/main/java/mklab/JGNN/nn/ModelTraining.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,28 @@ public class ModelTraining {

public ModelTraining() {
}

/**
* @param verbose Whether an error message will be printed.
* @deprecated This method was available in earlier JGNN versions but will be gradually phased out.
* Instead, wrap the validation loss within {@link mklab.JGNN.nn.loss.report.VerboseLoss} to replicate
* the same behavior.
*/
public ModelTraining setVerbose(boolean verbose) {
System.err.println("WARNING: The setVerbose method was available in earlier JGNN versions"
+ "\n but will be gradually phased out. Instead, wrap the validation"
+ "\n loss within a VerboseLoss instance to replicate the same"
+ "\n behavior. Look for more losses of the mklab.JGNN.nn.loss.report"
+ "\n package for more types of training feedback.");
this.verbose = verbose;
return this;
}

/**
* Set
* @param loss
* @return
*/
public ModelTraining setLoss(Loss loss) {
this.loss = loss;
return this;
Expand All @@ -46,23 +64,73 @@ public ModelTraining setValidationLoss(Loss loss) {
this.validationLoss = loss;
return this;
}

/**
* Sets an {@link Optimizer} instance to controls parameter updates during training.
* If the provided optimizer is not an instance of {@link BatchOptimizer},
* it is forcefully wrapped by the latter. Training calls the batch optimizer's
* update method after every batch.
* @param optimizer The desired optimizer.
* @return <code>this</code> model training instance.
* @see #train(Model, Matrix, Matrix, Slice, Slice)
*/
public ModelTraining setOptimizer(Optimizer optimizer) {
this.optimizer = new BatchOptimizer(optimizer);
if(optimizer instanceof BatchOptimizer)
this.optimizer = (BatchOptimizer) optimizer;
else
this.optimizer = new BatchOptimizer(optimizer);
return this;
}

/**
* Sets the number of batches training data slices should be split into.
* @param numBatches The desired number of batches. Default is 1.
* @return <code>this</code> model training instance.
* @see #setParallelizedStochasticGradientDescent(boolean)
*/
public ModelTraining setNumBatches(int numBatches) {
this.numBatches = numBatches;
return this;
}

/**
* Sets whether the training strategy should reflect stochastic
* gradient descent by randomly sampling from the training dataset to obtain data samples.
* If <code>true</code>, both this feature and acceptable thread-based paralellization
* is enabled. Parallelization makes use of JGNN's {@link ThreadPool}.
* @param paralellization A boolean value indicating whether this feature is enabled.
* @return <code>this</code> model training instance.
* @see #setNumBatches(int)
* @see #train(Model, Matrix, Matrix, Slice, Slice)
*/
public ModelTraining setParallelizedStochasticGradientDescent(boolean paralellization) {
this.paralellization = paralellization;
this.stochasticGradientDescent = paralellization;
return this;
}

/**
* Sets the maximum number of epochs for which training runs.
* If no patience has been set, training runs for exactly this
* number of epochs.
* @param epochs The maximum number of epochs.
* @return <code>this</code> model training instance.
* @see #setPatience(int)
*/
public ModelTraining setEpochs(int epochs) {
this.epochs = epochs;
return this;
}

/**
* Sets the patience of the training strategy that performs early stopping.
* If training does not encounter a smaller validation loss for this number of
* epochs, it stops.
* @param patience The number of patience epochs. Default is Integer.MAX_VALUE to effectively disable this
* feature and let training always reach the maximum number of set epochs.
* @return <code>this</code> model training instance.
* @see #setEpochs(int)
*/
public ModelTraining setPatience(int patience) {
this.patience = patience;
return this;
Expand Down
11 changes: 9 additions & 2 deletions JGNN/src/main/java/mklab/JGNN/nn/loss/Accuracy.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@
import mklab.JGNN.nn.Loss;

/**
* Implements an accuracy {@link Loss} of row-by-row comparison.
* Implements an accuracy {@link Loss} of row-by-row comparisons.
* Each row of the output should have the same {@link Tensor#argmax()}
* value as the corresponding row of desired values.
* This comparison has no derivative.
*
* @author Emmanouil Krasanakis
*/
public class Accuracy extends Loss {
/**
* Instantiates a row-by-row accuracy loss.
* Instantiates a row-by-row {@link Accuracy} loss.
* For this loss, each row of the output should have the same {@linke Tensor#argmax()}
* value as the corresponding row of desired values.
* This comparison has no derivative.
*/
public Accuracy() {
}
Expand Down
3 changes: 3 additions & 0 deletions JGNN/src/main/java/mklab/JGNN/nn/loss/BinaryCrossEntropy.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public class BinaryCrossEntropy extends Loss {

/**
* Initializes binary cross entropy with 1.E-12 epsilon value.
* For more than one output dimensions use {@link CategoricalCrossEntropy#CategoricalCrossEntropy()}
* @see #BinaryCrossEntropy(double)
*/
public BinaryCrossEntropy() {
Expand All @@ -22,7 +23,9 @@ public BinaryCrossEntropy() {
/**
* Initializes binary cross entropy with and epsilon value
* to bound its outputs in the range [log(epsilon), -log(epsilon)] instead of (-inf, inf).
* For more than one output dimensions use {@link CategoricalCrossEntropy#CategoricalCrossEntropy(double)}
* @param epsilon A very small positive <code>double</code>.
* @see #BinaryCrossEntropy()
*/
public BinaryCrossEntropy(double epsilon) {
this.epsilon = epsilon;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

/**
* Implements a categorical cross-entropy {@link Loss}.<br>
* For binary classification of one output use {@link BinaryCrossEntropy}
* For binary classification of one output use {@link BinaryCrossEntropy}.
* @author Emmanouil Krasanakis
*/
public class CategoricalCrossEntropy extends Loss {
Expand All @@ -15,7 +15,8 @@ public class CategoricalCrossEntropy extends Loss {

/**
* Initializes categorical cross entropy with 1.E-12 epsilon value.
* @see #BinaryCrossEntropy(double)
* For binary classification of one output use {@link BinaryCrossEntropy#BinaryCrossEntropy()}.
* @see #CategoricalCrossEntropy(double)
*/
public CategoricalCrossEntropy() {
this(1.E-12);
Expand All @@ -24,7 +25,9 @@ public CategoricalCrossEntropy() {
/**
* Initializes categorical cross entropy with and epsilon value
* to bound its outputs in the range [log(epsilon), -log(epsilon)] instead of (-inf, inf).
* For binary classification of one output use {@link BinaryCrossEntropy#BinaryCrossEntropy(double)}.
* @param epsilon A very small positive <code>double</code>.
* @see #CategoricalCrossEntropy()
*/
public CategoricalCrossEntropy(double epsilon) {
this.epsilon = epsilon;
Expand Down
Loading

0 comments on commit cb305a4

Please sign in to comment.