diff --git a/Amaigoma/AverageTransformer.cs b/Amaigoma/AverageTransformer.cs index 5b511b1..d2ec901 100644 --- a/Amaigoma/AverageTransformer.cs +++ b/Amaigoma/AverageTransformer.cs @@ -1,6 +1,5 @@ using System.Collections.Generic; using System.Collections.Immutable; -using System.Diagnostics; using System.Linq; namespace Amaigoma @@ -8,7 +7,7 @@ namespace Amaigoma // TODO Use Skia to add more advanced features ? public sealed record AverageTransformer // ncrunch: no coverage { - // TODO This should not be hardcoded here + // UNDONE This should not be hardcoded here public const int FeatureWindowSize = 17; private int WindowSize @@ -38,21 +37,20 @@ public IEnumerable ConvertAll(IEnumerable list) double[] integral = list.ToArray(); double sum; - // TODO These loops can be simplified (remove the -1 everywhere). But better to have a sturdy unit test before. - for (int y = 1; y <= (sizeY - WindowSize + 1); y += WindowSize) + for (int y = 0; y <= (sizeY - WindowSize); y += WindowSize) { - int topY = (y - 1); - int bottomY = (y + WindowSize - 1); + int topOffsetY = (width * y); + int bottomOffsetY = width * (y + WindowSize); - for (int x = 1; x <= (sizeX - WindowSize + 1); x += WindowSize) + for (int x = 0; x <= (sizeX - WindowSize); x += WindowSize) { - int leftX = x - 1; - int rightX = x + WindowSize - 1; + int rightX = x + WindowSize; - sum = integral[rightX + (width * bottomY)]; - sum -= integral[leftX + (width * bottomY)]; - sum -= integral[rightX + (width * topY)]; - sum += integral[leftX + (width * topY)]; + // UNDONE All these indices could be precomputed in the constructor. The loop would be a lot simpler. + sum = integral[rightX + bottomOffsetY]; + sum -= integral[x + bottomOffsetY]; + sum -= integral[rightX + topOffsetY]; + sum += integral[x + topOffsetY]; features = features.Add(sum * WindowSizeSquaredInverted); } diff --git a/Amaigoma/SabotenCacheExtensions.cs b/Amaigoma/SabotenCacheExtensions.cs index 281dd6a..57863aa 100644 --- a/Amaigoma/SabotenCacheExtensions.cs +++ b/Amaigoma/SabotenCacheExtensions.cs @@ -7,7 +7,7 @@ namespace Amaigoma public static class SabotenCacheExtensions { - // TODO This extension method could be moved inside a static method of sabotencache and get rid of this extension class + // UNDONE This extension method could be moved inside a static method of sabotencache and get rid of this extension class public static SabotenCache Prefetch(this SabotenCache sabotenCache, TanukiTransformers tanukiTransformers, IEnumerable data, int featureIndex) { // TODO Maybe the TanukiTransformers should be responsible to do the ET(L) on the data instead of getting its DataTransformer. diff --git a/Amaigoma/TanukiTransformers.cs b/Amaigoma/TanukiTransformers.cs index 849df94..770c3bb 100644 --- a/Amaigoma/TanukiTransformers.cs +++ b/Amaigoma/TanukiTransformers.cs @@ -81,7 +81,7 @@ public int ConvertAll(int id) } } - // TODO Rename to TanukiETL + // UNDONE Rename to TanukiETL public sealed record TanukiTransformers // ncrunch: no coverage { private readonly ImmutableList> dataTransformers = ImmutableList>.Empty; diff --git a/AmaigomaTests/AmaigomaIntegrationTests.cs b/AmaigomaTests/AmaigomaIntegrationTests.cs index 905c062..d62c331 100644 --- a/AmaigomaTests/AmaigomaIntegrationTests.cs +++ b/AmaigomaTests/AmaigomaIntegrationTests.cs @@ -14,6 +14,8 @@ using Xunit; using Xunit.Abstractions; +// UNDONE Bring back code coverage to 100% + // TODO January 15th 2024: New algorithm idea. The strength of each node can be validated if, and only if, there are enough leaves under it to apply // the logic of swapping the node condition and validating the success rate on train data. For nodes which do not have enough leaves under, this process // will probably not give reliable results. The solution is probably to prune these nodes. This will force some leaves to have more than one class. So @@ -58,13 +60,16 @@ public AverageWindowFeature(ImmutableDictionary positions, Buff public IEnumerable ConvertAll(int id) { Point position = Samples[id].Position; - List newSample = new(FeatureWindowSize * FeatureWindowSize); + List newSample = new((FeatureWindowSize + 1) * (FeatureWindowSize + 1)); int top = position.Y + HalfFeatureWindowSize; int xPosition = position.X + HalfFeatureWindowSize; xPosition.ShouldBePositive(); + // UNDONE I should get rid of the data extractors. Most of the time the data transformers don't need the full data sample, except in train mode, + // so it is slow for nothing. The data transformer could fetch only what it needs and back it up with a SabotenCache. + // UNDONE Try to apply this solution to see if it is faster, although it will probably allocate more: https://github.com/SixLabors/ImageSharp/discussions/1666#discussioncomment-876494 // +1 length to support first row of integral image for (int y2 = -HalfFeatureWindowSize; y2 <= HalfFeatureWindowSize + 1; y2++) { @@ -334,16 +339,16 @@ public void UppercaseA_507484246(DataSet dataSet) dataTransformers += new AverageTransformer(3).ConvertAll; // dataTransformers += new AverageTransformer(1).ConvertAll; - AverageWindowFeature theDataExtractor = new AverageWindowFeature(trainPositions, integralImage, AverageTransformer.FeatureWindowSize); + AverageWindowFeature trainDataExtractor = new AverageWindowFeature(trainPositions, integralImage, AverageTransformer.FeatureWindowSize); AverageWindowFeature validationDataExtractor = new AverageWindowFeature(validationPositions, integralImage, AverageTransformer.FeatureWindowSize); AverageWindowFeature testDataExtractor = new AverageWindowFeature(testPositions, integralImage, AverageTransformer.FeatureWindowSize); - TanukiTransformers tanukiTransformers = new(trainPositions.Keys.First(), theDataExtractor.ConvertAll, dataTransformers, theDataExtractor.ExtractLabel); + TanukiTransformers trainTanukiTransformers = new(trainPositions.Keys.First(), trainDataExtractor.ConvertAll, dataTransformers, trainDataExtractor.ExtractLabel); TanukiTransformers validationTanukiTransformers = new(validationPositions.Keys.First(), validationDataExtractor.ConvertAll, dataTransformers, validationDataExtractor.ExtractLabel); TanukiTransformers testTanukiTransformers = new(testPositions.Keys.First(), testDataExtractor.ConvertAll, dataTransformers, testDataExtractor.ExtractLabel); PakiraDecisionTreeModel pakiraDecisionTreeModel = new(); - pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { trainPositions.Keys.First() }, tanukiTransformers); + pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { trainPositions.Keys.First() }, trainTanukiTransformers); // TODO Evaluate the possibility of using shallow trees to serve as sub-routines. The features could be chosen based on the // best discrimination, like it was done a while ago. This will result in categories instead of a scalar so the leaves will need to be recombined @@ -377,7 +382,7 @@ public void UppercaseA_507484246(DataSet dataSet) IEnumerable batchSamples = trainPositions.Keys.Skip(i).Take(batchSize); bool processBatch = true; - PakiraTreeWalker pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, tanukiTransformers); + PakiraTreeWalker pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, trainTanukiTransformers); // TODO The validation set should be used to identify the leaves which are not predicting correctly. Then find // some data in the train set to improve these leaves @@ -394,8 +399,8 @@ public void UppercaseA_507484246(DataSet dataSet) if (resultLabels.Count() > 1 || !resultLabels.Contains(expectedLabel)) { - pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { id }, tanukiTransformers); - pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, tanukiTransformers); + pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { id }, trainTanukiTransformers); + pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, trainTanukiTransformers); IEnumerable labelValues = pakiraTreeWalker.PredictLeaf(id).LabelValues; @@ -412,7 +417,7 @@ public void UppercaseA_507484246(DataSet dataSet) } } - trainAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, trainPositions.Keys, tanukiTransformers); + trainAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, trainPositions.Keys, trainTanukiTransformers); validationAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, validationPositions.Keys, validationTanukiTransformers); testAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, testPositions.Keys, testTanukiTransformers); diff --git a/AmaigomaTests/AmaigomaTests.csproj b/AmaigomaTests/AmaigomaTests.csproj index 2f9eeee..4d4faa1 100644 --- a/AmaigomaTests/AmaigomaTests.csproj +++ b/AmaigomaTests/AmaigomaTests.csproj @@ -18,7 +18,7 @@ - + all runtime; build; native; contentfiles; analyzers diff --git a/AmaigomaTests/PakiraGeneratorTests.cs b/AmaigomaTests/PakiraGeneratorTests.cs index 9a04011..d21d684 100644 --- a/AmaigomaTests/PakiraGeneratorTests.cs +++ b/AmaigomaTests/PakiraGeneratorTests.cs @@ -34,7 +34,7 @@ public static IEnumerable ConvertAll(IEnumerable list) } } - // TODO This should be done automatically upon initialization of each test + // UNDONE This should be done automatically upon initialization of each test public static PakiraDecisionTreeGenerator CreatePakiraGeneratorInstance() { PakiraDecisionTreeGenerator pakiraDecisionTreeGenerator = new();