Skip to content

Commit

Permalink
Updated tasks priority.
Browse files Browse the repository at this point in the history
Small refactoring.
Small optimization.
Updated library.
  • Loading branch information
rold2007 committed Dec 20, 2024
1 parent fd33a1a commit cbff463
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 25 deletions.
24 changes: 11 additions & 13 deletions Amaigoma/AverageTransformer.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;

namespace Amaigoma
{
// TODO Use Skia to add more advanced features ?
public sealed record AverageTransformer // ncrunch: no coverage
{
// TODO This should not be hardcoded here
// UNDONE This should not be hardcoded here
public const int FeatureWindowSize = 17;

private int WindowSize
Expand Down Expand Up @@ -38,21 +37,20 @@ public IEnumerable<double> ConvertAll(IEnumerable<double> list)
double[] integral = list.ToArray();
double sum;

// TODO These loops can be simplified (remove the -1 everywhere). But better to have a sturdy unit test before.
for (int y = 1; y <= (sizeY - WindowSize + 1); y += WindowSize)
for (int y = 0; y <= (sizeY - WindowSize); y += WindowSize)
{
int topY = (y - 1);
int bottomY = (y + WindowSize - 1);
int topOffsetY = (width * y);
int bottomOffsetY = width * (y + WindowSize);

for (int x = 1; x <= (sizeX - WindowSize + 1); x += WindowSize)
for (int x = 0; x <= (sizeX - WindowSize); x += WindowSize)
{
int leftX = x - 1;
int rightX = x + WindowSize - 1;
int rightX = x + WindowSize;

sum = integral[rightX + (width * bottomY)];
sum -= integral[leftX + (width * bottomY)];
sum -= integral[rightX + (width * topY)];
sum += integral[leftX + (width * topY)];
// UNDONE All these indices could be precomputed in the constructor. The loop would be a lot simpler.
sum = integral[rightX + bottomOffsetY];
sum -= integral[x + bottomOffsetY];
sum -= integral[rightX + topOffsetY];
sum += integral[x + topOffsetY];

features = features.Add(sum * WindowSizeSquaredInverted);
}
Expand Down
2 changes: 1 addition & 1 deletion Amaigoma/SabotenCacheExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace Amaigoma

public static class SabotenCacheExtensions
{
// TODO This extension method could be moved inside a static method of sabotencache and get rid of this extension class
// UNDONE This extension method could be moved inside a static method of sabotencache and get rid of this extension class
public static SabotenCache Prefetch(this SabotenCache sabotenCache, TanukiTransformers tanukiTransformers, IEnumerable<double> data, int featureIndex)
{
// TODO Maybe the TanukiTransformers should be responsible to do the ET(L) on the data instead of getting its DataTransformer.
Expand Down
2 changes: 1 addition & 1 deletion Amaigoma/TanukiTransformers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public int ConvertAll(int id)
}
}

// TODO Rename to TanukiETL
// UNDONE Rename to TanukiETL
public sealed record TanukiTransformers // ncrunch: no coverage
{
private readonly ImmutableList<Tuple<Range, DataTransformer>> dataTransformers = ImmutableList<Tuple<Range, DataTransformer>>.Empty;
Expand Down
21 changes: 13 additions & 8 deletions AmaigomaTests/AmaigomaIntegrationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
using Xunit;
using Xunit.Abstractions;

// UNDONE Bring back code coverage to 100%

// TODO January 15th 2024: New algorithm idea. The strength of each node can be validated if, and only if, there are enough leaves under it to apply
// the logic of swapping the node condition and validating the success rate on train data. For nodes which do not have enough leaves under, this process
// will probably not give reliable results. The solution is probably to prune these nodes. This will force some leaves to have more than one class. So
Expand Down Expand Up @@ -58,13 +60,16 @@ public AverageWindowFeature(ImmutableDictionary<int, SampleData> positions, Buff
public IEnumerable<double> ConvertAll(int id)
{
Point position = Samples[id].Position;
List<double> newSample = new(FeatureWindowSize * FeatureWindowSize);
List<double> newSample = new((FeatureWindowSize + 1) * (FeatureWindowSize + 1));

int top = position.Y + HalfFeatureWindowSize;
int xPosition = position.X + HalfFeatureWindowSize;

xPosition.ShouldBePositive();

// UNDONE I should get rid of the data extractors. Most of the time the data transformers don't need the full data sample, except in train mode,
// so it is slow for nothing. The data transformer could fetch only what it needs and back it up with a SabotenCache.
// UNDONE Try to apply this solution to see if it is faster, although it will probably allocate more: https://github.com/SixLabors/ImageSharp/discussions/1666#discussioncomment-876494
// +1 length to support first row of integral image
for (int y2 = -HalfFeatureWindowSize; y2 <= HalfFeatureWindowSize + 1; y2++)
{
Expand Down Expand Up @@ -334,16 +339,16 @@ public void UppercaseA_507484246(DataSet dataSet)
dataTransformers += new AverageTransformer(3).ConvertAll;
// dataTransformers += new AverageTransformer(1).ConvertAll;

AverageWindowFeature theDataExtractor = new AverageWindowFeature(trainPositions, integralImage, AverageTransformer.FeatureWindowSize);
AverageWindowFeature trainDataExtractor = new AverageWindowFeature(trainPositions, integralImage, AverageTransformer.FeatureWindowSize);
AverageWindowFeature validationDataExtractor = new AverageWindowFeature(validationPositions, integralImage, AverageTransformer.FeatureWindowSize);
AverageWindowFeature testDataExtractor = new AverageWindowFeature(testPositions, integralImage, AverageTransformer.FeatureWindowSize);
TanukiTransformers tanukiTransformers = new(trainPositions.Keys.First(), theDataExtractor.ConvertAll, dataTransformers, theDataExtractor.ExtractLabel);
TanukiTransformers trainTanukiTransformers = new(trainPositions.Keys.First(), trainDataExtractor.ConvertAll, dataTransformers, trainDataExtractor.ExtractLabel);
TanukiTransformers validationTanukiTransformers = new(validationPositions.Keys.First(), validationDataExtractor.ConvertAll, dataTransformers, validationDataExtractor.ExtractLabel);
TanukiTransformers testTanukiTransformers = new(testPositions.Keys.First(), testDataExtractor.ConvertAll, dataTransformers, testDataExtractor.ExtractLabel);

PakiraDecisionTreeModel pakiraDecisionTreeModel = new();

pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { trainPositions.Keys.First() }, tanukiTransformers);
pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { trainPositions.Keys.First() }, trainTanukiTransformers);

// TODO Evaluate the possibility of using shallow trees to serve as sub-routines. The features could be chosen based on the
// best discrimination, like it was done a while ago. This will result in categories instead of a scalar so the leaves will need to be recombined
Expand Down Expand Up @@ -377,7 +382,7 @@ public void UppercaseA_507484246(DataSet dataSet)
IEnumerable<int> batchSamples = trainPositions.Keys.Skip(i).Take(batchSize);

bool processBatch = true;
PakiraTreeWalker pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, tanukiTransformers);
PakiraTreeWalker pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, trainTanukiTransformers);

// TODO The validation set should be used to identify the leaves which are not predicting correctly. Then find
// some data in the train set to improve these leaves
Expand All @@ -394,8 +399,8 @@ public void UppercaseA_507484246(DataSet dataSet)

if (resultLabels.Count() > 1 || !resultLabels.Contains(expectedLabel))
{
pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { id }, tanukiTransformers);
pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, tanukiTransformers);
pakiraDecisionTreeModel = pakiraGenerator.Generate(pakiraDecisionTreeModel, new[] { id }, trainTanukiTransformers);
pakiraTreeWalker = new PakiraTreeWalker(pakiraDecisionTreeModel.Tree, trainTanukiTransformers);

IEnumerable<int> labelValues = pakiraTreeWalker.PredictLeaf(id).LabelValues;

Expand All @@ -412,7 +417,7 @@ public void UppercaseA_507484246(DataSet dataSet)
}
}

trainAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, trainPositions.Keys, tanukiTransformers);
trainAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, trainPositions.Keys, trainTanukiTransformers);
validationAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, validationPositions.Keys, validationTanukiTransformers);
testAccuracyResult = ComputeAccuracy(pakiraDecisionTreeModel, testPositions.Keys, testTanukiTransformers);

Expand Down
2 changes: 1 addition & 1 deletion AmaigomaTests/AmaigomaTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<PackageReference Include="Shouldly" Version="4.2.1" />
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.6" />
<PackageReference Include="xunit" Version="2.9.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
<PackageReference Include="xunit.runner.visualstudio" Version="3.0.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
</PackageReference>
Expand Down
2 changes: 1 addition & 1 deletion AmaigomaTests/PakiraGeneratorTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public static IEnumerable<double> ConvertAll(IEnumerable<double> list)
}
}

// TODO This should be done automatically upon initialization of each test
// UNDONE This should be done automatically upon initialization of each test
public static PakiraDecisionTreeGenerator CreatePakiraGeneratorInstance()
{
PakiraDecisionTreeGenerator pakiraDecisionTreeGenerator = new();
Expand Down

0 comments on commit cbff463

Please sign in to comment.