From b504855838b611a04d732035d8ae418adbb00c66 Mon Sep 17 00:00:00 2001 From: Luca Rossetto Date: Fri, 21 Jan 2022 16:49:18 +0100 Subject: [PATCH] CLIP feature module (#246) * Added feature modules for OpenAI CLIP * Minor refactoring of utility classes * Refactored common image preprocessing logic into helper class Co-authored-by: Silvan Heller Co-authored-by: Florian Spiess Former-commit-id: c2315f028cc4980c5fd8fb307f32d425ea34b08d --- build.gradle | 2 +- .../vitrivr/cineast/api/util/QueryUtil.java | 2 +- .../vitrivr/cineast/core/data/Location.java | 2 +- .../org/vitrivr/cineast/core/data/Pair.java | 19 ++ .../AbstractQueryTermContainer.java | 2 +- .../db/cottontaildb/CottontailWrapper.java | 9 +- .../video/VideoOutputStreamContainer.java | 2 +- .../cineast/core/features/AverageHPCP.java | 2 +- .../vitrivr/cineast/core/features/CENS.java | 2 +- .../cineast/core/features/CLIPImage.java | 111 +++++++++ .../cineast/core/features/CLIPText.java | 149 ++++++++++++ .../vitrivr/cineast/core/features/EHD.java | 2 +- .../core/features/HOGMirflickr25K256.java | 2 +- .../core/features/HOGMirflickr25K512.java | 2 +- .../cineast/core/features/HPCPShingle.java | 2 +- .../core/features/InceptionResnetV2.java | 67 ++---- .../core/features/LightfieldFourier.java | 2 +- .../core/features/LightfieldZernike.java | 2 +- .../cineast/core/features/MFCCShingle.java | 2 +- .../cineast/core/features/MelodyEstimate.java | 2 +- .../core/features/MotionHistogram.java | 2 +- .../features/MotionHistogramBackground.java | 2 +- .../cineast/core/features/OCRSearch.java | 6 +- .../cineast/core/features/SegmentTags.java | 2 +- .../core/features/SphericalHarmonics.java | 2 +- .../core/features/SubDivMotionHistogram2.java | 2 +- .../core/features/SubDivMotionHistogram3.java | 2 +- .../core/features/SubDivMotionHistogram4.java | 2 +- .../core/features/SubDivMotionHistogram5.java | 2 +- .../SubDivMotionHistogramBackground2.java | 2 +- .../SubDivMotionHistogramBackground3.java | 2 +- .../SubDivMotionHistogramBackground4.java | 2 +- .../SubDivMotionHistogramBackground5.java | 2 +- .../neuralnet/tf/models/yolo/YOLO.java | 4 +- .../visualization/AudioSignalVisualizer.java | 2 +- .../util/images/ImagePreprocessingHelper.java | 129 ++++++++++ .../core/util/{ => math}/MathHelper.java | 2 +- .../cineast/core/util/text/ClipTokenizer.java | 225 ++++++++++++++++++ .../util/{ => text}/TextDetector_EAST.java | 2 +- .../util/{ => text}/TextRecognizer_CTC.java | 2 +- .../core/util/{ => text}/TextStream.java | 2 +- .../AssociatedLegendrePolynomialTest.java | 2 +- .../math/functions/RadialPolynomialTest.java | 2 +- .../functions/ZernikePolynomialsTest.java | 2 +- .../runtime/ContinuousQueryDispatcher.java | 2 +- externalFiles.csv | 1 + 46 files changed, 698 insertions(+), 94 deletions(-) create mode 100644 cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPImage.java create mode 100644 cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPText.java create mode 100644 cineast-core/src/main/java/org/vitrivr/cineast/core/util/images/ImagePreprocessingHelper.java rename cineast-core/src/main/java/org/vitrivr/cineast/core/util/{ => math}/MathHelper.java (99%) create mode 100644 cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/ClipTokenizer.java rename cineast-core/src/main/java/org/vitrivr/cineast/core/util/{ => text}/TextDetector_EAST.java (99%) rename cineast-core/src/main/java/org/vitrivr/cineast/core/util/{ => text}/TextRecognizer_CTC.java (99%) rename cineast-core/src/main/java/org/vitrivr/cineast/core/util/{ => text}/TextStream.java (99%) diff --git a/build.gradle b/build.gradle index 07b8a50b9..e57bb5562 100644 --- a/build.gradle +++ b/build.gradle @@ -11,7 +11,7 @@ allprojects { group = 'org.vitrivr' /* Our current version, on dev branch this should always be release+1-SNAPSHOT */ - version = '3.6.2' + version = '3.6.3' apply plugin: 'java-library' apply plugin: 'maven-publish' diff --git a/cineast-api/src/main/java/org/vitrivr/cineast/api/util/QueryUtil.java b/cineast-api/src/main/java/org/vitrivr/cineast/api/util/QueryUtil.java index 13217c6fc..9b43bebfb 100644 --- a/cineast-api/src/main/java/org/vitrivr/cineast/api/util/QueryUtil.java +++ b/cineast-api/src/main/java/org/vitrivr/cineast/api/util/QueryUtil.java @@ -26,7 +26,7 @@ import org.vitrivr.cineast.core.db.DBSelector; import org.vitrivr.cineast.core.db.dao.reader.TagReader; import org.vitrivr.cineast.core.features.SegmentTags; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.standalone.config.Config; import org.vitrivr.cineast.standalone.config.RetrievalRuntimeConfig; import org.vitrivr.cineast.standalone.util.ContinuousRetrievalLogic; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Location.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Location.java index b96120d96..77a4d8b7c 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Location.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Location.java @@ -5,7 +5,7 @@ import java.util.Objects; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class Location implements ReadableFloatVector { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Pair.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Pair.java index 6ff0aca18..1645d6fe6 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Pair.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/data/Pair.java @@ -1,5 +1,7 @@ package org.vitrivr.cineast.core.data; +import java.util.Objects; + public class Pair { public K first; @@ -10,4 +12,21 @@ public Pair(K first, V second) { this.second = second; } + @Override + public String toString() { + return "Pair(" + this.first + ", " + this.second + ")"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Pair pair = (Pair) o; + return Objects.equals(first, pair.first) && Objects.equals(second, pair.second); + } + + @Override + public int hashCode() { + return Objects.hash(first, second); + } } diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/data/query/containers/AbstractQueryTermContainer.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/data/query/containers/AbstractQueryTermContainer.java index a55fcff92..149224191 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/data/query/containers/AbstractQueryTermContainer.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/data/query/containers/AbstractQueryTermContainer.java @@ -5,7 +5,7 @@ import org.vitrivr.cineast.core.data.entities.MediaObjectDescriptor; import org.vitrivr.cineast.core.data.entities.MediaSegmentDescriptor; import org.vitrivr.cineast.core.data.segments.SegmentContainer; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; /** * An {@link AbstractQueryTermContainer} is the implementation of a {@link SegmentContainer} which is used in the online-phase (during retrieval). diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/db/cottontaildb/CottontailWrapper.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/db/cottontaildb/CottontailWrapper.java index 92b39465d..65a83585d 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/db/cottontaildb/CottontailWrapper.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/db/cottontaildb/CottontailWrapper.java @@ -51,9 +51,14 @@ public CottontailWrapper(DatabaseConfig config, boolean keepOpen) { } this.channel = builder.build(); this.client = new SimpleClient(this.channel); + + boolean pingSuccessful = this.client.ping(); watch.stop(); - LOGGER.info("Connected to Cottontail in {} ms at {}:{}", watch.getTime(TimeUnit.MILLISECONDS), - config.getHost(), config.getPort()); + if (pingSuccessful) { + LOGGER.info("Connected to Cottontail in {} ms at {}:{}", watch.getTime(TimeUnit.MILLISECONDS), config.getHost(), config.getPort()); + } else { + LOGGER.warn("Could not connect to Cottontail at {}:{}", config.getHost(), config.getPort()); + } } /** diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/extraction/decode/video/VideoOutputStreamContainer.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/extraction/decode/video/VideoOutputStreamContainer.java index ab8ce4688..11b148b1f 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/extraction/decode/video/VideoOutputStreamContainer.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/extraction/decode/video/VideoOutputStreamContainer.java @@ -19,7 +19,7 @@ import org.bytedeco.javacpp.avutil; import org.bytedeco.javacpp.swscale; import org.vitrivr.cineast.core.data.raw.images.MultiImage; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; class VideoOutputStreamContainer extends AbstractAVStreamContainer { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/AverageHPCP.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/AverageHPCP.java index 91a6d7c2e..e0fe4a3cf 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/AverageHPCP.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/AverageHPCP.java @@ -13,7 +13,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.StagedFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.audio.HPCP; import org.vitrivr.cineast.core.util.dsp.fft.FFTUtil; import org.vitrivr.cineast.core.util.dsp.fft.STFT; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CENS.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CENS.java index a592ba42f..2ea165257 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CENS.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CENS.java @@ -14,7 +14,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.StagedFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.audio.HPCP; import org.vitrivr.cineast.core.util.dsp.fft.FFTUtil; import org.vitrivr.cineast.core.util.dsp.fft.STFT; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPImage.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPImage.java new file mode 100644 index 000000000..3af7c1a1f --- /dev/null +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPImage.java @@ -0,0 +1,111 @@ +package org.vitrivr.cineast.core.features; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.tensorflow.SavedModelBundle; +import org.tensorflow.Tensor; +import org.tensorflow.ndarray.Shape; +import org.tensorflow.ndarray.buffer.DataBuffers; +import org.tensorflow.ndarray.buffer.FloatDataBuffer; +import org.tensorflow.types.TFloat16; +import org.vitrivr.cineast.core.config.QueryConfig; +import org.vitrivr.cineast.core.config.ReadableQueryConfig; +import org.vitrivr.cineast.core.data.FloatVectorImpl; +import org.vitrivr.cineast.core.data.frames.VideoFrame; +import org.vitrivr.cineast.core.data.score.ScoreElement; +import org.vitrivr.cineast.core.data.segments.SegmentContainer; +import org.vitrivr.cineast.core.features.abstracts.AbstractFeatureModule; +import org.vitrivr.cineast.core.util.images.ImagePreprocessingHelper; + +import java.awt.image.BufferedImage; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CLIPImage extends AbstractFeatureModule { + + private static final Logger LOGGER = LogManager.getLogger(); + + private static final int EMBEDDING_SIZE = 512; + private static final String TABLE_NAME = "features_clip"; + private static final ReadableQueryConfig.Distance DISTANCE = ReadableQueryConfig.Distance.cosine; + + private static final int IMAGE_SIZE = 224; + + private static final String RESOURCE_PATH = "resources/CLIP/"; + private static final String EMBEDDING_MODEL = "clip-image-vit-32-tf"; + + private static final String EMBEDDING_INPUT = "input"; + private static final String EMBEDDING_OUTPUT = "output"; + + private static final float[] MEAN = new float[]{0.48145466f, 0.4578275f, 0.40821073f}; + private static final float[] STD = new float[]{0.26862954f, 0.26130258f, 0.27577711f}; + + private SavedModelBundle model; + + public CLIPImage() { + super(TABLE_NAME, 1f, EMBEDDING_SIZE); + model = SavedModelBundle.load(RESOURCE_PATH + EMBEDDING_MODEL); + } + + @Override + public void processSegment(SegmentContainer shot) { + + if (shot.getMostRepresentativeFrame() == VideoFrame.EMPTY_VIDEO_FRAME) { + return; + } + + float[] embeddingArray = embedImage(shot.getMostRepresentativeFrame().getImage().getBufferedImage()); + this.persist(shot.getId(), new FloatVectorImpl(embeddingArray)); + + } + + @Override + protected ReadableQueryConfig setQueryConfig(ReadableQueryConfig qc) { + return QueryConfig.clone(qc).setDistance(DISTANCE); + } + + @Override + public List getSimilar(SegmentContainer sc, ReadableQueryConfig qc) { + + if (sc.getMostRepresentativeFrame() == VideoFrame.EMPTY_VIDEO_FRAME) { + return Collections.emptyList(); + } + + QueryConfig queryConfig = QueryConfig.clone(qc); + queryConfig.setDistance(DISTANCE); + + float[] embeddingArray = embedImage(sc.getMostRepresentativeFrame().getImage().getBufferedImage()); + + return getSimilar(embeddingArray, queryConfig); + } + + private float[] embedImage(BufferedImage img) { + + float[] rgb = prepareImage(img); + + try (TFloat16 imageTensor = TFloat16.tensorOf(Shape.of(1, 3, IMAGE_SIZE, IMAGE_SIZE), DataBuffers.of(rgb))) { + HashMap inputMap = new HashMap<>(); + inputMap.put(EMBEDDING_INPUT, imageTensor); + + Map resultMap = model.call(inputMap); + + try (TFloat16 encoding = (TFloat16) resultMap.get(EMBEDDING_OUTPUT)) { + + float[] embeddingArray = new float[EMBEDDING_SIZE]; + FloatDataBuffer floatBuffer = DataBuffers.of(embeddingArray); + encoding.read(floatBuffer); + + return embeddingArray; + + } + } + } + + private static float[] prepareImage(BufferedImage img) { + return ImagePreprocessingHelper.imageToCHWArray( + ImagePreprocessingHelper.squaredScaleCenterCrop(img, IMAGE_SIZE), + MEAN, STD); + } +} diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPText.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPText.java new file mode 100644 index 000000000..50ddae2e5 --- /dev/null +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/CLIPText.java @@ -0,0 +1,149 @@ +package org.vitrivr.cineast.core.features; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.tensorflow.SavedModelBundle; +import org.tensorflow.Tensor; +import org.tensorflow.ndarray.LongNdArray; +import org.tensorflow.ndarray.NdArrays; +import org.tensorflow.ndarray.Shape; +import org.tensorflow.ndarray.buffer.DataBuffers; +import org.tensorflow.ndarray.buffer.FloatDataBuffer; +import org.tensorflow.types.TFloat16; +import org.tensorflow.types.TInt64; +import org.vitrivr.cineast.core.config.QueryConfig; +import org.vitrivr.cineast.core.config.ReadableQueryConfig; +import org.vitrivr.cineast.core.data.CorrespondenceFunction; +import org.vitrivr.cineast.core.data.distance.DistanceElement; +import org.vitrivr.cineast.core.data.distance.SegmentDistanceElement; +import org.vitrivr.cineast.core.data.providers.primitive.FloatArrayTypeProvider; +import org.vitrivr.cineast.core.data.providers.primitive.PrimitiveTypeProvider; +import org.vitrivr.cineast.core.data.providers.primitive.StringTypeProvider; +import org.vitrivr.cineast.core.data.score.ScoreElement; +import org.vitrivr.cineast.core.data.segments.SegmentContainer; +import org.vitrivr.cineast.core.db.DBSelector; +import org.vitrivr.cineast.core.db.DBSelectorSupplier; +import org.vitrivr.cineast.core.db.setup.EntityCreator; +import org.vitrivr.cineast.core.features.retriever.Retriever; +import org.vitrivr.cineast.core.util.text.ClipTokenizer; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +import static org.vitrivr.cineast.core.util.CineastConstants.FEATURE_COLUMN_QUALIFIER; +import static org.vitrivr.cineast.core.util.CineastConstants.GENERIC_ID_COLUMN_QUALIFIER; + +public class CLIPText implements Retriever { + + private static final Logger LOGGER = LogManager.getLogger(); + + private static final int EMBEDDING_SIZE = 512; + private static final String TABLE_NAME = "features_clip"; + private static final ReadableQueryConfig.Distance DISTANCE = ReadableQueryConfig.Distance.cosine; + + private static final String RESOURCE_PATH = "resources/CLIP/"; + private static final String EMBEDDING_MODEL = "clip-text-vit-32-tf"; + + private static final String EMBEDDING_INPUT = "input"; + private static final String EMBEDDING_OUTPUT = "output"; + + private static final CorrespondenceFunction CORRESPONDENCE = CorrespondenceFunction.linear(1f); + + private static SavedModelBundle model; + + private DBSelector selector; + private ClipTokenizer ct = new ClipTokenizer(); + + private static void init() { + if (model == null) { + model = SavedModelBundle.load(RESOURCE_PATH + EMBEDDING_MODEL); + } + } + + public CLIPText() { + init(); + } + + @Override + public void initalizePersistentLayer(Supplier supply) { + supply.get().createFeatureEntity(TABLE_NAME, true, EMBEDDING_SIZE); + } + + @Override + public void dropPersistentLayer(Supplier supply) { + supply.get().dropEntity(TABLE_NAME); + } + + @Override + public void init(DBSelectorSupplier selectorSupply) { + this.selector = selectorSupply.get(); + this.selector.open(TABLE_NAME); + } + + @Override + public List getSimilar(SegmentContainer sc, ReadableQueryConfig qc) { + + String text = sc.getText(); + + if (text == null || text.isBlank()) { + return Collections.emptyList(); + } + + return getSimilar(new FloatArrayTypeProvider(embedText(text)), qc); + } + + private float[] embedText(String text) { + + long[] tokens = ct.clipTokenize(text); + + LongNdArray arr = NdArrays.ofLongs(Shape.of(1, tokens.length)); + for (int i = 0; i < tokens.length; i++) { + arr.setLong(tokens[i], 0, i); + } + + try (TInt64 textTensor = TInt64.tensorOf(arr)) { + + HashMap inputMap = new HashMap<>(); + inputMap.put(EMBEDDING_INPUT, textTensor); + + Map resultMap = model.call(inputMap); + + try (TFloat16 embedding = (TFloat16) resultMap.get(EMBEDDING_OUTPUT)) { + + float[] embeddingArray = new float[EMBEDDING_SIZE]; + FloatDataBuffer floatBuffer = DataBuffers.of(embeddingArray); + embedding.read(floatBuffer); + return embeddingArray; + + } + } + } + + @Override + public List getSimilar(String segmentId, ReadableQueryConfig qc) { + List list = this.selector.getFeatureVectorsGeneric(GENERIC_ID_COLUMN_QUALIFIER, new StringTypeProvider(segmentId), FEATURE_COLUMN_QUALIFIER); + if (list.isEmpty()) { + LOGGER.warn("No feature vector for shotId {} found, returning empty result-list", segmentId); + return Collections.emptyList(); + } + return getSimilar(list.get(0), qc); + } + + private List getSimilar(PrimitiveTypeProvider queryProvider, ReadableQueryConfig qc) { + ReadableQueryConfig qcc = QueryConfig.clone(qc).setDistance(DISTANCE); + List distances = this.selector.getNearestNeighboursGeneric(qc.getResultsPerModule(), queryProvider, FEATURE_COLUMN_QUALIFIER, SegmentDistanceElement.class, qcc); + CorrespondenceFunction function = qcc.getCorrespondenceFunction().orElse(CORRESPONDENCE); + return DistanceElement.toScore(distances, function); + } + + @Override + public void finish() { + if (this.selector != null) { + this.selector.close(); + this.selector = null; + } + } +} diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/EHD.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/EHD.java index 3ef126e12..35d4ea1f5 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/EHD.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/EHD.java @@ -16,7 +16,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.AbstractFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; /** * see Efficient Use of MPEG-7 Edge Histogram Descriptor by Won '02 see http://stackoverflow.com/questions/909542/opencv-edge-extraction diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K256.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K256.java index 1aa61d84a..0d8cf2946 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K256.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K256.java @@ -1,6 +1,6 @@ package org.vitrivr.cineast.core.features; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; /** * A Extraction and Retrieval module that uses HOG descriptors and a 256 word codebook based on Mirflickr 25K to obtain a histograms of codewords. These histograms ares used as feature-vectors. diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K512.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K512.java index 9f40fd862..afe0e82cf 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K512.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HOGMirflickr25K512.java @@ -1,6 +1,6 @@ package org.vitrivr.cineast.core.features; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; /** * A Extraction and Retrieval module that uses HOG descriptors and a 512 word codebook based on Mirflickr 25K to obtain a histograms of codewords. These histograms ares used as feature-vectors. diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HPCPShingle.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HPCPShingle.java index b0a4b2e35..fab74ec51 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HPCPShingle.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/HPCPShingle.java @@ -15,7 +15,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.StagedFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.audio.HPCP; import org.vitrivr.cineast.core.util.dsp.fft.FFTUtil; import org.vitrivr.cineast.core.util.dsp.fft.STFT; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/InceptionResnetV2.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/InceptionResnetV2.java index 4294b3475..35266766b 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/InceptionResnetV2.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/InceptionResnetV2.java @@ -1,13 +1,5 @@ package org.vitrivr.cineast.core.features; -import java.awt.image.BufferedImage; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import net.coobird.thumbnailator.Thumbnails; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.tensorflow.SavedModelBundle; @@ -25,6 +17,14 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.AbstractFeatureModule; +import org.vitrivr.cineast.core.util.images.ImagePreprocessingHelper; + +import java.awt.image.BufferedImage; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; public class InceptionResnetV2 extends AbstractFeatureModule { @@ -40,6 +40,12 @@ public class InceptionResnetV2 extends AbstractFeatureModule { public static final int IMAGE_WIDTH = 299; public static final int IMAGE_HEIGHT = 299; + /** + * mean and std for color values + */ + private static final float[] MEAN = new float[]{0.5f, 0.5f, 0.5f}; + private static final float[] STD = new float[]{0.5f, 0.5f, 0.5f}; + /** * Resource paths. */ @@ -202,49 +208,8 @@ public static float[] encodeVideo(List frames) { * @return Float array representation of the input image. */ public static float[] preprocessImage(BufferedImage image) { - if (image.getWidth() != IMAGE_WIDTH || image.getHeight() != IMAGE_HEIGHT) { - try { - image = Thumbnails.of(image).forceSize(IMAGE_WIDTH, IMAGE_HEIGHT).asBufferedImage(); - } catch (IOException e) { - LOGGER.error("Could not resize image", e); - } - } - int[] colors = image.getRGB(0, 0, IMAGE_WIDTH, IMAGE_HEIGHT, null, 0, IMAGE_WIDTH); - int[] rgb = colorsToRGB(colors); - return preprocessInput(rgb); - } - - /** - * Preprocesses input in a way equivalent to that performed in the Python TensorFlow library. - *

- * Maps all values from [0,255] to [-1, 1]. - */ - private static float[] preprocessInput(int[] colors) { - // x /= 127.5 - // x -= 1. - float[] processedColors = new float[colors.length]; - for (int i = 0; i < colors.length; i++) { - processedColors[i] = (colors[i] / 127.5f) - 1; - } - - return processedColors; - } - - /** - * Converts an integer colors array storing ARGB values in each integer into an integer array where each integer stores R, G or B value. - */ - private static int[] colorsToRGB(int[] colors) { - int[] rgb = new int[colors.length * 3]; - - for (int i = 0; i < colors.length; i++) { - // Start index for rgb array - int j = i * 3; - rgb[j] = (colors[i] >> 16) & 0xFF; // r - rgb[j + 1] = (colors[i] >> 8) & 0xFF; // g - rgb[j + 2] = colors[i] & 0xFF; // b - } - - return rgb; + BufferedImage img = ImagePreprocessingHelper.forceScale(image, IMAGE_WIDTH, IMAGE_HEIGHT); + return ImagePreprocessingHelper.imageToHWCArray(img, MEAN, STD); } private static void initializeModel() { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldFourier.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldFourier.java index 744287268..ee59f26a5 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldFourier.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldFourier.java @@ -11,7 +11,7 @@ import org.apache.commons.math3.transform.TransformType; import org.vitrivr.cineast.core.config.QueryConfig; import org.vitrivr.cineast.core.config.ReadableQueryConfig; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.images.ContourHelper; import org.vitrivr.cineast.core.util.math.MathConstants; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldZernike.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldZernike.java index 972e3fb84..7d81c2855 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldZernike.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/LightfieldZernike.java @@ -6,7 +6,7 @@ import org.apache.commons.math3.complex.Complex; import org.vitrivr.cineast.core.config.QueryConfig; import org.vitrivr.cineast.core.config.ReadableQueryConfig; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.images.ZernikeHelper; import org.vitrivr.cineast.core.util.math.MathConstants; import org.vitrivr.cineast.core.util.math.ZernikeMoments; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MFCCShingle.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MFCCShingle.java index b79f7be72..9af872f62 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MFCCShingle.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MFCCShingle.java @@ -15,7 +15,7 @@ import org.vitrivr.cineast.core.data.score.SegmentScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.StagedFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.audio.MFCC; import org.vitrivr.cineast.core.util.dsp.fft.FFTUtil; import org.vitrivr.cineast.core.util.dsp.fft.STFT; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MelodyEstimate.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MelodyEstimate.java index 3dcbc2931..c48e1211d 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MelodyEstimate.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MelodyEstimate.java @@ -11,7 +11,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.StagedFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.audio.pitch.Melody; import org.vitrivr.cineast.core.util.audio.pitch.Pitch; import org.vitrivr.cineast.core.util.audio.pitch.estimation.KLF0PitchEstimator; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogram.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogram.java index e58c6bad2..cb005a025 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogram.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogram.java @@ -14,7 +14,7 @@ import org.vitrivr.cineast.core.db.PersistentTuple; import org.vitrivr.cineast.core.db.setup.EntityCreator; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class MotionHistogram extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogramBackground.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogramBackground.java index cb29ef853..640d1b291 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogramBackground.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/MotionHistogramBackground.java @@ -16,7 +16,7 @@ import org.vitrivr.cineast.core.db.setup.AttributeDefinition.AttributeType; import org.vitrivr.cineast.core.db.setup.EntityCreator; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class MotionHistogramBackground extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/OCRSearch.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/OCRSearch.java index 717da2248..97e8edbec 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/OCRSearch.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/OCRSearch.java @@ -23,9 +23,9 @@ import org.vitrivr.cineast.core.util.HungarianAlgorithm; import org.vitrivr.cineast.core.util.MultiTracker; import org.vitrivr.cineast.core.util.NeedlemanWunschMerge; -import org.vitrivr.cineast.core.util.TextDetector_EAST; -import org.vitrivr.cineast.core.util.TextRecognizer_CTC; -import org.vitrivr.cineast.core.util.TextStream; +import org.vitrivr.cineast.core.util.text.TextDetector_EAST; +import org.vitrivr.cineast.core.util.text.TextRecognizer_CTC; +import org.vitrivr.cineast.core.util.text.TextStream; import org.vitrivr.cineast.core.util.ThreadLocalObjectCache; /** diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SegmentTags.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SegmentTags.java index 8f620255e..336fb05e6 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SegmentTags.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SegmentTags.java @@ -32,7 +32,7 @@ import org.vitrivr.cineast.core.db.setup.EntityCreator; import org.vitrivr.cineast.core.features.extractor.Extractor; import org.vitrivr.cineast.core.features.retriever.Retriever; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SegmentTags implements Extractor, Retriever { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SphericalHarmonics.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SphericalHarmonics.java index aeb034527..4e31cee01 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SphericalHarmonics.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SphericalHarmonics.java @@ -14,7 +14,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.StagedFeatureModule; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.math.functions.SphericalHarmonicsFunction; /** diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram2.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram2.java index 5cdb15599..038e97718 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram2.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram2.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogram2 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram3.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram3.java index 7a855a209..d4601e961 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram3.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram3.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogram3 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram4.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram4.java index 7fb8521c5..7364efe25 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram4.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram4.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogram4 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram5.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram5.java index 536d94f67..57aedabfb 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram5.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogram5.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogram5 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground2.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground2.java index ead12a7fc..acffc6672 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground2.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground2.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogramBackground2 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground3.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground3.java index e83c132c7..451668033 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground3.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground3.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogramBackground3 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground4.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground4.java index 58423ea1d..84180120d 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground4.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground4.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogramBackground4 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground5.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground5.java index ee054293e..3fb552088 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground5.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/SubDivMotionHistogramBackground5.java @@ -10,7 +10,7 @@ import org.vitrivr.cineast.core.data.score.ScoreElement; import org.vitrivr.cineast.core.data.segments.SegmentContainer; import org.vitrivr.cineast.core.features.abstracts.SubDivMotionHistogram; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class SubDivMotionHistogramBackground5 extends SubDivMotionHistogram { diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/neuralnet/tf/models/yolo/YOLO.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/neuralnet/tf/models/yolo/YOLO.java index 414932f83..fd87f0463 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/features/neuralnet/tf/models/yolo/YOLO.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/features/neuralnet/tf/models/yolo/YOLO.java @@ -28,8 +28,8 @@ import org.vitrivr.cineast.core.features.neuralnet.tf.models.yolo.util.BoxPosition; import org.vitrivr.cineast.core.features.neuralnet.tf.models.yolo.util.Recognition; import org.vitrivr.cineast.core.util.LogHelper; -import org.vitrivr.cineast.core.util.MathHelper; -import org.vitrivr.cineast.core.util.MathHelper.ArgMaxResult; +import org.vitrivr.cineast.core.util.math.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper.ArgMaxResult; /** diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/dsp/visualization/AudioSignalVisualizer.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/dsp/visualization/AudioSignalVisualizer.java index 50cf22b63..ab7343a4d 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/dsp/visualization/AudioSignalVisualizer.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/dsp/visualization/AudioSignalVisualizer.java @@ -3,7 +3,7 @@ import java.awt.Color; import java.awt.image.BufferedImage; import java.util.List; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.audio.CENS; import org.vitrivr.cineast.core.util.audio.HPCP; import org.vitrivr.cineast.core.util.dsp.fft.Spectrum; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/images/ImagePreprocessingHelper.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/images/ImagePreprocessingHelper.java new file mode 100644 index 000000000..d6156e6fa --- /dev/null +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/images/ImagePreprocessingHelper.java @@ -0,0 +1,129 @@ +package org.vitrivr.cineast.core.util.images; + +import net.coobird.thumbnailator.Thumbnails; +import net.coobird.thumbnailator.geometry.Positions; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.awt.image.BufferedImage; +import java.io.IOException; + +public class ImagePreprocessingHelper { + + private ImagePreprocessingHelper() { + } + + private static final Logger LOGGER = LogManager.getLogger(); + + /** + * Scales an input image to the specified dimensions, not preserving the original aspect ratio + */ + public static BufferedImage forceScale(BufferedImage img, int width, int height) { + if (img == null) { + return null; + } + if (img.getWidth() == width && img.getHeight() == height) { + return img; + } + try { + return Thumbnails.of(img).forceSize(width, height).asBufferedImage(); + } catch (IOException e) { + LOGGER.error("Could not resize image", e); + return null; + } + } + + /** + * Scales image to fit and crops center square + */ + public static BufferedImage squaredScaleCenterCrop(BufferedImage img, int size) { + + if (img == null) { + return null; + } + + try { + BufferedImage tmp; + + if (img.getWidth() > img.getHeight()) { + tmp = Thumbnails.of(img).height(size).asBufferedImage(); + } else { + tmp = Thumbnails.of(img).width(size).asBufferedImage(); + } + + return Thumbnails.of(tmp).crop(Positions.CENTER).size(size, size).asBufferedImage(); + } catch (IOException e) { + LOGGER.error("Error while preparing image", e); + return null; + } + + } + + /** + * Transforms provided image to a linearized color tensor of shape Height x Width x Color + */ + public static float[] imageToHWCArray(BufferedImage img, float[] mean, float[] std) { + + if (img == null) { + return new float[0]; + } + + if (mean == null || mean.length < 3) { + mean = new float[]{0f, 0f, 0f}; + } + + if (std == null || std.length < 3) { + std = new float[]{1f, 1f, 1f}; + } + + int[] colors = img.getRGB(0, 0, img.getWidth(), img.getHeight(), null, 0, img.getWidth()); + + float[] rgb = new float[img.getWidth() * img.getHeight() * 3]; + + for (int i = 0; i < colors.length; i++) { + int j = i * 3; + rgb[j] = ((((colors[i] >> 16) & 0xFF) / 255f) - mean[0]) / std[0]; // r + rgb[j + 1] = ((((colors[i] >> 8) & 0xFF) / 255f) - mean[1]) / std[1]; // g + rgb[j + 2] = (((colors[i] & 0xFF) / 255f) - mean[2]) / std[2]; // b + } + + return rgb; + + } + + /** + * Transforms provided image to a linearized color tensor of shape Color x Height x Width + */ + public static float[] imageToCHWArray(BufferedImage img, float[] mean, float[] std) { + + if (img == null) { + return new float[0]; + } + + if (mean == null || mean.length < 3) { + mean = new float[]{0f, 0f, 0f}; + } + + if (std == null || std.length < 3) { + std = new float[]{1f, 1f, 1f}; + } + + int[] colors = img.getRGB(0, 0, img.getWidth(), img.getHeight(), null, 0, img.getWidth()); + + final int gOffset = colors.length; + final int bOffset = 2 * gOffset; + + float[] rgb = new float[img.getWidth() * img.getHeight() * 3]; + + for (int i = 0; i < colors.length; i++) { + + rgb[i] = ((((colors[i] >> 16) & 0xFF) / 255f) - mean[0]) / std[0]; // r + rgb[i + gOffset] = ((((colors[i] >> 8) & 0xFF) / 255f) - mean[1]) / std[1]; // g + rgb[i + bOffset] = (((colors[i] & 0xFF) / 255f) - mean[2]) / std[2]; // b + } + + return rgb; + + } + +} diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/MathHelper.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/math/MathHelper.java similarity index 99% rename from cineast-core/src/main/java/org/vitrivr/cineast/core/util/MathHelper.java rename to cineast-core/src/main/java/org/vitrivr/cineast/core/util/math/MathHelper.java index f7a12dbd1..6c3992e9d 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/MathHelper.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/math/MathHelper.java @@ -1,4 +1,4 @@ -package org.vitrivr.cineast.core.util; +package org.vitrivr.cineast.core.util.math; import java.util.Arrays; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/ClipTokenizer.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/ClipTokenizer.java new file mode 100644 index 000000000..6e7a2cdf6 --- /dev/null +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/ClipTokenizer.java @@ -0,0 +1,225 @@ +package org.vitrivr.cineast.core.util.text; + +import org.apache.commons.text.StringEscapeUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.vitrivr.cineast.core.data.Pair; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Port of https://github.com/openai/CLIP/blob/573315e83f07b53a61ff5098757e8fc885f1703e/clip/simple_tokenizer.py + */ +public class ClipTokenizer { + + private static final Logger LOGGER = LogManager.getLogger(); + + private static HashMap bytes_to_unicode() { + //33 - 126, 161 - 172, 174 - 255 + char[] CHARS = new char[]{ + 'Ā', 'ā', 'Ă', 'ă', 'Ą', 'ą', 'Ć', 'ć', 'Ĉ', 'ĉ', 'Ċ', 'ċ', 'Č', 'č', 'Ď', 'ď', 'Đ', 'đ', 'Ē', 'ē', 'Ĕ', 'ĕ', 'Ė', 'ė', 'Ę', 'ę', 'Ě', 'ě', 'Ĝ', 'ĝ', 'Ğ', 'ğ', + 'Ġ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', '`', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'ġ', 'Ģ', + 'ģ', 'Ĥ', 'ĥ', 'Ħ', 'ħ', 'Ĩ', 'ĩ', 'Ī', 'ī', 'Ĭ', 'ĭ', 'Į', 'į', 'İ', 'ı', 'IJ', 'ij', 'Ĵ', 'ĵ', 'Ķ', 'ķ', 'ĸ', 'Ĺ', 'ĺ', 'Ļ', 'ļ', 'Ľ', 'ľ', 'Ŀ', 'ŀ', 'Ł', 'ł', + '¡', '¢', '£', '¤', '¥', '¦', '§', '¨', '©', 'ª', '«', '¬', 'Ń', '®', '¯', '°', '±', '²', '³', '´', 'µ', '¶', '·', '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 'À', + 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 'à', + 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ' + }; + HashMap map = new HashMap<>(); + + for (int i = 0; i < 256; ++i) { + map.put(i, CHARS[i]); + } + return map; + } + + private static Set> get_pairs(List word) { + HashSet> set = new HashSet<>(); + if (word.isEmpty()) { + return set; + } + + for (int i = 0; i < word.size() - 1; ++i) { + set.add(new Pair<>(word.get(i) + "", word.get(i + 1))); + } + + + return set; + } + + private static String whitespace_clean(String text) { + return text.replaceAll("\\s+", " ").strip(); + } + + private static String clean(String text) { + return StringEscapeUtils.unescapeHtml4(StringEscapeUtils.unescapeXml(text)); + } + + private static final HashMap byte_encoder = bytes_to_unicode(); + private static final HashMap byte_decoder = new HashMap<>(); + + static { + for (int i : byte_encoder.keySet()) { + byte_decoder.put(byte_encoder.get(i), i); + } + } + + private static final ArrayList vocab = new ArrayList<>(49500); + private static final HashMap, Integer> bpe_ranks = new HashMap<>(); + private static final HashMap encoder = new HashMap<>(); + + private static void init() { + + if (!vocab.isEmpty()) { + return; + } + + for (char c : byte_decoder.keySet()) { + vocab.add("" + c); + } + for (int i = 0; i < 256; ++i) { + vocab.add(vocab.get(i) + ""); + } + + try { + List f = Files.readAllLines(Path.of("resources/CLIP/bpe_simple_vocab_16e6.txt"), StandardCharsets.UTF_8); + for (int i = 1; i < 49152 - 256 - 2 + 1; ++i) { + + String s = f.get(i); + vocab.add(s.replaceAll(" ", "")); + + String[] split = s.split(" "); + bpe_ranks.put(new Pair<>(split[0], split[1]), i - 1); + + } + } catch (IOException e) { + LOGGER.error("Cannot load vocabulary {}", e); + } + + vocab.add("<|startoftext|>"); + vocab.add("<|endoftext|>"); + + for (int i = 0; i < vocab.size(); ++i) { + encoder.put(vocab.get(i), i); + } + + } + + + private HashMap cache = new HashMap<>(); + private Pattern pat = Pattern.compile("<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+", Pattern.CASE_INSENSITIVE); + + public ClipTokenizer() { + init(); + cache.put("<|startoftext|>", "<|startoftext|>"); + cache.put("<|endoftext|>", "<|endoftext|>"); + } + + String bpe(String token) { + + if (cache.containsKey(token)) { + return cache.get(token); + } + + ArrayList word = new ArrayList<>(token.length()); + for (int j = 0; j < token.length() - 1; j++) { + word.add(token.charAt(j) + ""); + } + word.add(token.charAt(token.length() - 1) + ""); + + Set> pairs = get_pairs(word); + + if (pairs.isEmpty()) { + return token + ""; + } + + while (true) { + + Pair bigram = pairs.stream().map(p -> new Pair<>(p, bpe_ranks.getOrDefault(p, Integer.MAX_VALUE))).min(Comparator.comparingInt(p -> p.second)).get().first; + + if (!bpe_ranks.containsKey(bigram)) { + break; + } + + String first = bigram.first, second = bigram.second; + + int i = 0; + + ArrayList new_word = new ArrayList<>(); + + while (i < word.size()) { + + int j = word.subList(i, word.size()).indexOf(first) + i; + if (j < i) { + new_word.addAll(word.subList(i, word.size())); + break; + } + new_word.addAll(word.subList(i, j)); + i = j; + + + if (word.get(i).equals(first) && i < word.size() - 1 && word.get(i + 1).equals(second)) { + new_word.add(first + second); + i += 2; + } else { + new_word.add(word.get(i)); + ++i; + } + } + + word = new_word; + if (word.size() == 1) { + break; + } + pairs = get_pairs(word); + + } + + String bpe = word.stream().reduce("", (a, b) -> a + " " + b).trim(); + cache.put(token, bpe); + return bpe; + + } + + public ArrayList encode(String text) { + ArrayList bpe_tokens = new ArrayList(); + String clean = whitespace_clean(clean(text)).toLowerCase(Locale.ROOT); + Matcher m = pat.matcher(clean); + while (m.find()) { + String t = m.group(); + StringBuilder sb = new StringBuilder(); + for (byte b : t.getBytes(StandardCharsets.UTF_8)) { + sb.append(byte_encoder.get((int) b)); + } + for (String s : bpe(sb.toString()).split(" ")) { + bpe_tokens.add(encoder.get(s)); + } + } + return bpe_tokens; + + } + + public long[] clipTokenize(String text) { + int start = encoder.get("<|startoftext|>"); + int end = encoder.get("<|endoftext|>"); + ArrayList tokens = encode(text); + long[] arr = new long[77]; + arr[0] = start; + int tokenCount = Math.min(tokens.size(), arr.length - 1); + for (int i = 0; i < tokenCount; ++i) { + arr[i + 1] = (long) tokens.get(i); + } + if (tokenCount < arr.length) { + arr[tokenCount] = end; + } + return arr; + } + +} diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextDetector_EAST.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextDetector_EAST.java similarity index 99% rename from cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextDetector_EAST.java rename to cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextDetector_EAST.java index 9c8cc4a00..8a5bb96c9 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextDetector_EAST.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextDetector_EAST.java @@ -1,4 +1,4 @@ -package org.vitrivr.cineast.core.util; +package org.vitrivr.cineast.core.util.text; import java.util.ArrayList; import java.util.List; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextRecognizer_CTC.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextRecognizer_CTC.java similarity index 99% rename from cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextRecognizer_CTC.java rename to cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextRecognizer_CTC.java index bb584037f..4f958e9c4 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextRecognizer_CTC.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextRecognizer_CTC.java @@ -1,4 +1,4 @@ -package org.vitrivr.cineast.core.util; +package org.vitrivr.cineast.core.util.text; import java.util.ArrayList; import java.util.Arrays; diff --git a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextStream.java b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextStream.java similarity index 99% rename from cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextStream.java rename to cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextStream.java index 963fec4b4..12ce7387c 100644 --- a/cineast-core/src/main/java/org/vitrivr/cineast/core/util/TextStream.java +++ b/cineast-core/src/main/java/org/vitrivr/cineast/core/util/text/TextStream.java @@ -1,4 +1,4 @@ -package org.vitrivr.cineast.core.util; +package org.vitrivr.cineast.core.util.text; import georegression.struct.shapes.Quadrilateral_F64; import java.util.ArrayList; diff --git a/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/AssociatedLegendrePolynomialTest.java b/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/AssociatedLegendrePolynomialTest.java index d717a607e..f8276b283 100644 --- a/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/AssociatedLegendrePolynomialTest.java +++ b/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/AssociatedLegendrePolynomialTest.java @@ -6,7 +6,7 @@ import org.apache.commons.math3.util.CombinatoricsUtils; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class AssociatedLegendrePolynomialTest { diff --git a/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/RadialPolynomialTest.java b/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/RadialPolynomialTest.java index 5c588d30e..1031bfbe6 100644 --- a/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/RadialPolynomialTest.java +++ b/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/RadialPolynomialTest.java @@ -5,7 +5,7 @@ import org.apache.commons.math3.analysis.polynomials.PolynomialFunction; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.math.functions.factories.PolynomialFunctionFactory; diff --git a/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/ZernikePolynomialsTest.java b/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/ZernikePolynomialsTest.java index d5577c020..b706a4f05 100644 --- a/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/ZernikePolynomialsTest.java +++ b/cineast-core/src/test/java/org/vitrivr/cineast/core/util/math/functions/ZernikePolynomialsTest.java @@ -7,7 +7,7 @@ import org.apache.commons.math3.util.FastMath; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; public class ZernikePolynomialsTest { diff --git a/cineast-runtime/src/main/java/org/vitrivr/cineast/standalone/runtime/ContinuousQueryDispatcher.java b/cineast-runtime/src/main/java/org/vitrivr/cineast/standalone/runtime/ContinuousQueryDispatcher.java index a1822be86..8caae348f 100644 --- a/cineast-runtime/src/main/java/org/vitrivr/cineast/standalone/runtime/ContinuousQueryDispatcher.java +++ b/cineast-runtime/src/main/java/org/vitrivr/cineast/standalone/runtime/ContinuousQueryDispatcher.java @@ -28,7 +28,7 @@ import org.vitrivr.cineast.core.features.retriever.Retriever; import org.vitrivr.cineast.core.features.retriever.RetrieverInitializer; import org.vitrivr.cineast.core.util.LogHelper; -import org.vitrivr.cineast.core.util.MathHelper; +import org.vitrivr.cineast.core.util.math.MathHelper; import org.vitrivr.cineast.core.util.ScoreFusion; import org.vitrivr.cineast.standalone.config.Config; import org.vitrivr.cineast.standalone.listener.RetrievalResultListener; diff --git a/externalFiles.csv b/externalFiles.csv index 67f8ecd95..78f49b198 100644 --- a/externalFiles.csv +++ b/externalFiles.csv @@ -13,3 +13,4 @@ http://data.vitrivr.org/VisualTextCoEmbedding/text-co-embedding.tar.gz,resources http://data.vitrivr.org/VisualTextCoEmbedding/visual-co-embedding.tar.gz,resources/VisualTextCoEmbedding/visual-co-embedding.tar.gz https://tfhub.dev/google/universal-sentence-encoder/4?tf-hub-format=compressed,resources/VisualTextCoEmbedding/universal-sentence-encoder_4.tar.gz http://data.vitrivr.org/VisualTextCoEmbedding/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.tar.gz,resources/VisualTextCoEmbedding/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.tar.gz +http://data.vitrivr.org/clip/clip.tar.gz,resources/CLIP.tar.gz