diff --git a/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java new file mode 100644 index 0000000000000..efd6d844a6907 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.codecs.lucene90; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +/** + * This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure. + * It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric, + * and sorted set) for fields in the Star Tree index. + * + * @opensearch.experimental + */ +public class Lucene90DocValuesConsumerWrapper extends DocValuesConsumer { + + Lucene90DocValuesConsumer lucene90DocValuesConsumer; + + public Lucene90DocValuesConsumerWrapper( + SegmentWriteState state, + String dataCodec, + String dataExtension, + String metaCodec, + String metaExtension + ) throws IOException { + lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension); + } + + @Override + public void close() throws IOException { + lucene90DocValuesConsumer.close(); + } + + @Override + public void addNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer); + } + + @Override + public void addBinaryField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer); + } + + @Override + public void addSortedField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addSortedField(fieldInfo, docValuesProducer); + } + + @Override + public void addSortedNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addSortedNumericField(fieldInfo, docValuesProducer); + } + + @Override + public void addSortedSetField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException { + lucene90DocValuesConsumer.addSortedSetField(fieldInfo, docValuesProducer); + } +} diff --git a/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java new file mode 100644 index 0000000000000..669e85181082e --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.codecs.lucene90; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; + +import java.io.IOException; + +/** + * This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure. + * It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric, + * and sorted set) for fields in the Star Tree index. + * + * @opensearch.experimental + */ +public class Lucene90DocValuesProducerWrapper extends DocValuesProducer { + + Lucene90DocValuesProducer lucene90DocValuesProducer; + SegmentReadState state; + + public Lucene90DocValuesProducerWrapper( + SegmentReadState state, + String dataCodec, + String dataExtension, + String metaCodec, + String metaExtension + ) throws IOException { + lucene90DocValuesProducer = new Lucene90DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension); + this.state = state; + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getNumeric(field); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getBinary(field); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSorted(field); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSortedNumeric(field); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSortedSet(field); + } + + @Override + public void checkIntegrity() throws IOException { + this.lucene90DocValuesProducer.checkIntegrity(); + } + + // returns the doc id set iterator based on field name + public SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException { + return this.lucene90DocValuesProducer.getSortedNumeric(state.fieldInfos.fieldInfo(fieldName)); + } + + @Override + public void close() throws IOException { + this.lucene90DocValuesProducer.close(); + } + +} diff --git a/server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java b/server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java new file mode 100644 index 0000000000000..f7759fcced284 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.index; + +import org.apache.lucene.util.Counter; + +/** + * A wrapper class for writing sorted numeric doc values. + *

+ * This class provides a convenient way to add sorted numeric doc values to a field + * and retrieve the corresponding {@link SortedNumericDocValues} instance. + * + * @opensearch.experimental + */ +public class SortedNumericDocValuesWriterWrapper { + + private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter; + + /** + * Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance. + * + * @param fieldInfo the field information for the field being written + * @param counter a counter for tracking memory usage + */ + public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) { + sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter); + } + + /** + * Adds a value to the sorted numeric doc values for the specified document. + * + * @param docID the document ID + * @param value the value to add + */ + public void addValue(int docID, long value) { + sortedNumericDocValuesWriter.addValue(docID, value); + } + + /** + * Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values + * + * @return the {@link SortedNumericDocValues} instance + */ + public SortedNumericDocValues getDocValues() { + return sortedNumericDocValuesWriter.getDocValues(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java index 216ed4f68f333..d4b0c0d1f4b80 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat { private final DocValuesFormat delegate; private final MapperService mapperService; + /** Data codec name for Composite Doc Values Format */ + public static final String DATA_CODEC_NAME = "Composite99FormatData"; + + /** Meta codec name for Composite Doc Values Format */ + public static final String META_CODEC_NAME = "Composite99FormatMeta"; + + /** Filename extension for the composite index data */ + public static final String DATA_EXTENSION = "cid"; + + /** Filename extension for the composite index meta */ + public static final String META_EXTENSION = "cim"; + + /** Data doc values codec name for Composite Doc Values Format */ + public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData"; + + /** Meta doc values codec name for Composite Doc Values Format */ + public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata"; + + /** Filename extension for the composite index data doc values */ + public static final String DATA_DOC_VALUES_EXTENSION = "cidvd"; + + /** Filename extension for the composite index meta doc values */ + public static final String META_DOC_VALUES_EXTENSION = "cidvm"; + + /** Initial version for the Composite90DocValuesFormat */ + public static final int VERSION_START = 0; + + /** Current version for the Composite90DocValuesFormat */ + public static final int VERSION_CURRENT = VERSION_START; + // needed for SPI public Composite99DocValuesFormat() { this(new Lucene90DocValuesFormat(), null); diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java index df5008a7f294e..795ae8066579a 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java @@ -8,19 +8,54 @@ package org.opensearch.index.codec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducerWrapper; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.MetricEntry; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.ReadDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.Tree; +import org.opensearch.index.mapper.CompositeMappedFieldType; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfoList; /** * Reader for star tree index and star tree doc values from the segments @@ -29,11 +64,153 @@ */ @ExperimentalApi public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader { - private DocValuesProducer delegate; + private static final Logger logger = LogManager.getLogger(Composite99DocValuesReader.class); + + private final DocValuesProducer delegate; + private IndexInput dataIn; + private ChecksumIndexInput metaIn; + private final Map compositeIndexInputMap = new LinkedHashMap<>(); + private final Map compositeIndexMetadataMap = new LinkedHashMap<>(); + private final List fields; + private Lucene90DocValuesProducerWrapper compositeDocValuesProducer; + private final List compositeFieldInfos = new ArrayList<>(); + private final SegmentReadState readState; - public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException { + public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException { this.delegate = producer; - // TODO : read star tree files + this.readState = readState; + this.fields = new ArrayList<>(); + + String metaFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + + String dataFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + + boolean success = false; + try { + + // initialize meta input + dataIn = readState.directory.openInput(dataFileName, readState.context); + CodecUtil.checkIndexHeader( + dataIn, + Composite99DocValuesFormat.DATA_CODEC_NAME, + Composite99DocValuesFormat.VERSION_START, + Composite99DocValuesFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + // initialize data input + metaIn = readState.directory.openChecksumInput(metaFileName, readState.context); + Throwable priorE = null; + try { + CodecUtil.checkIndexHeader( + metaIn, + Composite99DocValuesFormat.META_CODEC_NAME, + Composite99DocValuesFormat.VERSION_START, + Composite99DocValuesFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + while (true) { + + // validate magic marker + long magicMarker = metaIn.readLong(); + if (magicMarker == -1) { + logger.info("EOF reached for composite index metadata"); + break; + } else if (magicMarker < 0) { + throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn); + } else if (COMPOSITE_FIELD_MARKER != magicMarker) { + logger.error("Invalid composite field magic marker"); + throw new IOException("Invalid composite field magic marker"); + } + + int version = metaIn.readVInt(); + if (VERSION != version) { + logger.error("Invalid composite field version"); + throw new IOException("Invalid composite field version"); + } + + // construct composite index metadata + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + switch (compositeFieldType) { + case STAR_TREE: + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + compositeFieldInfos.add(new CompositeIndexFieldInfo(compositeFieldName, compositeFieldType)); + + IndexInput starTreeIndexInput = dataIn.slice( + "star-tree data slice for respective star-tree fields", + starTreeMetadata.getDataStartFilePointer(), + starTreeMetadata.getDataLength() + ); + compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput); + compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata); + + List dimensionFields = starTreeMetadata.getDimensionFields(); + + // generating star tree unique fields (fully qualified name for dimension and metrics) + for (String dimensions : dimensionFields) { + fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions)); + } + + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + fields.add( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + compositeFieldName, + metricEntry.getMetricFieldName(), + metricEntry.getMetricStat().getTypeName() + ) + ); + } + + break; + default: + throw new CorruptIndexException("Invalid composite field type found in the file", dataIn); + } + } + + // populates the dummy list of field infos to fetch doc id set iterators for respective fields. + // the dummy field info is used to fetch the doc id set iterators for respective fields based on field name + FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields)); + SegmentReadState segmentReadState = new SegmentReadState( + readState.directory, + readState.segmentInfo, + fieldInfos, + readState.context + ); + + // initialize star-tree doc values producer + compositeDocValuesProducer = new Lucene90DocValuesProducerWrapper( + segmentReadState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + } catch (Throwable t) { + priorE = t; + } finally { + CodecUtil.checkFooter(metaIn, priorE); + } + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } } @Override @@ -64,24 +241,117 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { @Override public void checkIntegrity() throws IOException { delegate.checkIntegrity(); - // Todo : check integrity of composite index related [star tree] files + CodecUtil.checksumEntireFile(dataIn); } @Override public void close() throws IOException { delegate.close(); - // Todo: close composite index related files [star tree] files + boolean success = false; + try { + IOUtils.close(metaIn, dataIn); + IOUtils.close(compositeDocValuesProducer); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(metaIn, dataIn); + } + compositeIndexInputMap.clear(); + compositeIndexMetadataMap.clear(); + fields.clear(); + metaIn = null; + dataIn = null; + } } @Override public List getCompositeIndexFields() { - // todo : read from file formats and get the field names. - return new ArrayList<>(); + return compositeFieldInfos; } @Override public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException { - // TODO : read compositeIndexValues [starTreeValues] from star tree files - throw new UnsupportedOperationException(); + + switch (compositeIndexFieldInfo.getType()) { + case STAR_TREE: + // building star tree values + StarTreeMetadata starTreeMetadata = (StarTreeMetadata) compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()); + + // build skip star node dimensions + Set skipStarNodeCreationInDims = starTreeMetadata.getSkipStarNodeCreationInDims(); + + // build dimensions + List readDimensions = new ArrayList<>(); + for (String dimension : starTreeMetadata.getDimensionFields()) { + readDimensions.add(new ReadDimension(dimension)); + } + + // build metrics + Map starTreeMetricMap = new LinkedHashMap<>(); + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + String metricName = metricEntry.getMetricFieldName(); + Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); + metric.getMetrics().add(metricEntry.getMetricStat()); + } + List starTreeMetrics = new ArrayList<>(starTreeMetricMap.values()); + + // star-tree field + StarTreeField starTreeField = new StarTreeField( + starTreeMetadata.getCompositeFieldName(), + readDimensions, + starTreeMetrics, + new StarTreeFieldConfiguration( + starTreeMetadata.getMaxLeafDocs(), + skipStarNodeCreationInDims, + starTreeMetadata.getStarTreeBuildMode() + ) + ); + + // star-tree root node + IndexInput compositeIndexIn = compositeIndexInputMap.get(starTreeMetadata.getCompositeFieldName()); + Tree starTree = new StarTree(compositeIndexIn, starTreeMetadata); + StarTreeNode rootNode = starTree.getRoot(); + + // get doc id set iterators for metrics and dimensions + Map dimensionsDocIdSetIteratorMap = new LinkedHashMap<>(); + Map metricsDocIdSetIteratorMap = new LinkedHashMap<>(); + + // get doc id set iterators for dimensions + for (String dimension : starTreeMetadata.getDimensionFields()) { + dimensionsDocIdSetIteratorMap.put( + dimension, + compositeDocValuesProducer.getSortedNumeric( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues(starTreeField.getName(), dimension) + ) + ); + } + + // get doc id set iterators for metrics + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeField.getName(), + metricEntry.getMetricFieldName(), + metricEntry.getMetricStat().getTypeName() + ); + metricsDocIdSetIteratorMap.put(metricFullName, compositeDocValuesProducer.getSortedNumeric(metricFullName)); + } + + // create star-tree attributes map + Map starTreeAttributes = new HashMap<>(); + starTreeAttributes.put(SEGMENT_DOCS_COUNT, String.valueOf(starTreeMetadata.getSegmentAggregatedDocCount())); + + // return star-tree values + return new StarTreeValues( + starTreeField, + rootNode, + dimensionsDocIdSetIteratorMap, + metricsDocIdSetIteratorMap, + starTreeAttributes + ); + + default: + throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName()); + } + } } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java index 3859d3c998573..6daa7a90112db 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java @@ -10,24 +10,30 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder; import org.opensearch.index.mapper.CompositeMappedFieldType; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; import java.io.IOException; -import java.util.Collections; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -49,12 +55,17 @@ public class Composite99DocValuesWriter extends DocValuesConsumer { AtomicReference mergeState = new AtomicReference<>(); private final Set compositeMappedFieldTypes; private final Set compositeFieldSet; + private DocValuesConsumer composite99DocValuesConsumer; + + public IndexOutput dataOut; + public IndexOutput metaOut; private final Set segmentFieldSet; private final Map fieldProducerMap = new HashMap<>(); private static final Logger logger = LogManager.getLogger(Composite99DocValuesWriter.class); - public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) { + public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) + throws IOException { this.delegate = delegate; this.state = segmentWriteState; @@ -70,6 +81,51 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState for (CompositeMappedFieldType type : compositeMappedFieldTypes) { compositeFieldSet.addAll(type.fields()); } + + boolean success = false; + try { + this.composite99DocValuesConsumer = new Lucene90DocValuesConsumerWrapper( + segmentWriteState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + + String dataFileName = IndexFileNames.segmentFileName( + segmentWriteState.segmentInfo.name, + segmentWriteState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = segmentWriteState.directory.createOutput(dataFileName, segmentWriteState.context); + CodecUtil.writeIndexHeader( + dataOut, + Composite99DocValuesFormat.DATA_CODEC_NAME, + Composite99DocValuesFormat.VERSION_CURRENT, + segmentWriteState.segmentInfo.getId(), + segmentWriteState.segmentSuffix + ); + + String metaFileName = IndexFileNames.segmentFileName( + segmentWriteState.segmentInfo.name, + segmentWriteState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = segmentWriteState.directory.createOutput(metaFileName, segmentWriteState.context); + CodecUtil.writeIndexHeader( + metaOut, + Composite99DocValuesFormat.META_CODEC_NAME, + Composite99DocValuesFormat.VERSION_CURRENT, + segmentWriteState.segmentInfo.getId(), + segmentWriteState.segmentSuffix + ); + + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } } @Override @@ -104,6 +160,26 @@ public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) @Override public void close() throws IOException { delegate.close(); + boolean success = false; + try { + if (metaOut != null) { + metaOut.writeLong(-1); // write EOF marker + CodecUtil.writeFooter(metaOut); // write checksum + } + if (dataOut != null) { + CodecUtil.writeFooter(dataOut); // write checksum + } + + success = true; + } finally { + if (success) { + IOUtils.close(dataOut, metaOut, composite99DocValuesConsumer); + } else { + IOUtils.closeWhileHandlingException(dataOut, metaOut, composite99DocValuesConsumer); + } + metaOut = dataOut = null; + composite99DocValuesConsumer = null; + } } private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, FieldInfo field) throws IOException { @@ -128,9 +204,9 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) { // we have all the required fields to build composite fields if (compositeFieldSet.isEmpty()) { for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) { - if (mappedType.getCompositeIndexType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) { + if (mappedType instanceof StarTreeMapper.StarTreeFieldType) { try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService)) { - starTreesBuilder.build(fieldProducerMap); + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, composite99DocValuesConsumer); } } } @@ -147,6 +223,7 @@ public void merge(MergeState mergeState) throws IOException { /** * Merges composite fields from multiple segments + * * @param mergeState merge state */ private void mergeCompositeFields(MergeState mergeState) throws IOException { @@ -155,6 +232,7 @@ private void mergeCompositeFields(MergeState mergeState) throws IOException { /** * Merges star tree data fields from multiple segments + * * @param mergeState merge state */ private void mergeStarTreeFields(MergeState mergeState) throws IOException { @@ -177,7 +255,7 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { CompositeIndexValues compositeIndexValues = reader.getCompositeIndexValues(fieldInfo); if (compositeIndexValues instanceof StarTreeValues) { StarTreeValues starTreeValues = (StarTreeValues) compositeIndexValues; - List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), Collections.emptyList()); + List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), new ArrayList<>()); if (starTreeField == null) { starTreeField = starTreeValues.getStarTreeField(); } @@ -196,7 +274,7 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { } } try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService)) { - starTreesBuilder.buildDuringMerge(starTreeSubsPerField); + starTreesBuilder.buildDuringMerge(metaOut, dataOut, starTreeSubsPerField, composite99DocValuesConsumer); } } } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/MetricEntry.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/MetricEntry.java new file mode 100644 index 0000000000000..ecc92f2eada83 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/MetricEntry.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite.datacube.startree.fileformats.meta; + +import org.opensearch.index.compositeindex.datacube.MetricStat; + +/** + * Holds the pair of metric name and it's associated stat + * + * @opensearch.experimental + */ +public class MetricEntry { + + private final String metricFieldName; + private final MetricStat metricStat; + + public MetricEntry(String metricFieldName, MetricStat metricStat) { + this.metricFieldName = metricFieldName; + this.metricStat = metricStat; + } + + public String getMetricFieldName() { + return metricFieldName; + } + + public MetricStat getMetricStat() { + return metricStat; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/StarTreeMetadata.java new file mode 100644 index 0000000000000..4f05f5b144672 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -0,0 +1,220 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats.meta; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Holds the associated metadata for the building of star-tree + * + * @opensearch.experimental + */ +public class StarTreeMetadata extends CompositeIndexMetadata { + private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class); + private final IndexInput meta; + private final String starTreeFieldName; + private final String starTreeFieldType; + private final List dimensionFields; + private final List metricEntries; + private final Integer segmentAggregatedDocCount; + private final Integer maxLeafDocs; + private final Set skipStarNodeCreationInDims; + private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; + private final long dataStartFilePointer; + private final long dataLength; + + public StarTreeMetadata(IndexInput meta, String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) + throws IOException { + super(compositeFieldName, compositeFieldType); + this.meta = meta; + try { + this.starTreeFieldName = this.getCompositeFieldName(); + this.starTreeFieldType = this.getCompositeFieldType().getName(); + this.dimensionFields = readStarTreeDimensions(); + this.metricEntries = readMetricEntries(); + this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); + this.maxLeafDocs = readMaxLeafDocs(); + this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); + this.starTreeBuildMode = readBuildMode(); + this.dataStartFilePointer = readDataStartFilePointer(); + this.dataLength = readDataLength(); + } catch (Exception e) { + logger.error("Unable to read star-tree metadata from the file"); + throw new CorruptIndexException("Unable to read star-tree metadata from the file", meta); + } + } + + private int readDimensionsCount() throws IOException { + return meta.readVInt(); + } + + private List readStarTreeDimensions() throws IOException { + int dimensionCount = readDimensionsCount(); + List dimensionFields = new ArrayList<>(); + + for (int i = 0; i < dimensionCount; i++) { + dimensionFields.add(meta.readString()); + } + + return dimensionFields; + } + + private int readMetricsCount() throws IOException { + return meta.readVInt(); + } + + private List readMetricEntries() throws IOException { + int metricCount = readMetricsCount(); + List metricEntries = new ArrayList<>(); + + for (int i = 0; i < metricCount; i++) { + String metricFieldName = meta.readString(); + int metricStatOrdinal = meta.readVInt(); + metricEntries.add(new MetricEntry(metricFieldName, MetricStat.fromMetricOrdinal(metricStatOrdinal))); + } + + return metricEntries; + } + + private int readSegmentAggregatedDocCount() throws IOException { + return meta.readVInt(); + } + + private int readMaxLeafDocs() throws IOException { + return meta.readVInt(); + } + + private int readSkipStarNodeCreationInDimsCount() throws IOException { + return meta.readVInt(); + } + + private Set readSkipStarNodeCreationInDims() throws IOException { + + int skipStarNodeCreationInDimsCount = readSkipStarNodeCreationInDimsCount(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (int i = 0; i < skipStarNodeCreationInDimsCount; i++) { + skipStarNodeCreationInDims.add(meta.readString()); + } + return skipStarNodeCreationInDims; + } + + private StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { + return StarTreeFieldConfiguration.StarTreeBuildMode.fromBuildModeOrdinal(meta.readByte()); + } + + private long readDataStartFilePointer() throws IOException { + return meta.readVLong(); + } + + private long readDataLength() throws IOException { + return meta.readVLong(); + } + + /** + * Returns the name of the star-tree field. + * + * @return star-tree field name + */ + public String getStarTreeFieldName() { + return starTreeFieldName; + } + + /** + * Returns the type of the star tree field. + * + * @return star-tree field type + */ + public String getStarTreeFieldType() { + return starTreeFieldType; + } + + /** + * Returns the list of dimension field numbers. + * + * @return star-tree dimension field numbers + */ + public List getDimensionFields() { + return dimensionFields; + } + + /** + * Returns the list of metric entries. + * + * @return star-tree metric entries + */ + public List getMetricEntries() { + return metricEntries; + } + + /** + * Returns the aggregated document count for the star-tree. + * + * @return the aggregated document count for the star-tree. + */ + public Integer getSegmentAggregatedDocCount() { + return segmentAggregatedDocCount; + } + + /** + * Returns the max leaf docs for the star-tree. + * + * @return the max leaf docs. + */ + public Integer getMaxLeafDocs() { + return maxLeafDocs; + } + + /** + * Returns the set of dimensions for which star node will not be created in the star-tree. + * + * @return the set of dimensions. + */ + public Set getSkipStarNodeCreationInDims() { + return skipStarNodeCreationInDims; + } + + /** + * Returns the build mode for the star-tree. + * + * @return the star-tree build mode. + */ + public StarTreeFieldConfiguration.StarTreeBuildMode getStarTreeBuildMode() { + return starTreeBuildMode; + } + + /** + * Returns the file pointer to the start of the star-tree data. + * + * @return start file pointer for star-tree data + */ + public long getDataStartFilePointer() { + return dataStartFilePointer; + } + + /** + * Returns the length of star-tree data + * + * @return star-tree length + */ + public long getDataLength() { + return dataLength; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/package-info.java new file mode 100644 index 0000000000000..314dd57864f20 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/meta/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Meta package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats.meta; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java new file mode 100644 index 0000000000000..a8becb5a02dca --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * File formats for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeDataWriter.java new file mode 100644 index 0000000000000..844921a05c47e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeDataWriter.java @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; +import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Utility class for serializing a star-tree data structure. + * + * @opensearch.experimental + */ +public class StarTreeDataWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeDataWriter.class); + + /** + * Writes the star-tree data structure. + * + * @param indexOutput the IndexOutput to write the star-tree data + * @param rootNode the root node of the star-tree + * @param numNodes the total number of nodes in the star-tree + * @param name the name of the star-tree field + * @return the total size in bytes of the serialized star-tree data + * @throws IOException if an I/O error occurs while writing the star-tree data + */ + public static long writeStarTree(IndexOutput indexOutput, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + long totalSizeInBytes = 0L; + totalSizeInBytes += computeStarTreeDataHeaderByteSize(); + totalSizeInBytes += (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; + + if (logger.isDebugEnabled()) { + logger.debug("Star tree data size in bytes : {} for star-tree field {}", totalSizeInBytes, name); + } + + writeStarTreeHeader(indexOutput, numNodes); + writeStarTreeNodes(indexOutput, rootNode); + return totalSizeInBytes; + } + + /** + * Computes the byte size of the star-tree data header. + * + * @return the byte size of the star-tree data header + */ + public static int computeStarTreeDataHeaderByteSize() { + // Magic marker (8), version (4) + int headerSizeInBytes = 12; + + // For number of nodes. + headerSizeInBytes += Integer.BYTES; + return headerSizeInBytes; + } + + /** + * Writes the star-tree data header. + * + * @param output the IndexOutput to write the header + * @param numNodes the total number of nodes in the star-tree + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeStarTreeHeader(IndexOutput output, int numNodes) throws IOException { + output.writeLong(COMPOSITE_FIELD_MARKER); + output.writeInt(VERSION); + output.writeInt(numNodes); + } + + /** + * Writes the star-tree nodes in a breadth-first order. + * + * @param output the IndexOutput to write the nodes + * @param rootNode the root node of the star-tree + * @throws IOException if an I/O error occurs while writing the nodes + */ + private static void writeStarTreeNodes(IndexOutput output, InMemoryTreeNode rootNode) throws IOException { + Queue queue = new LinkedList<>(); + queue.add(rootNode); + + int currentNodeId = 0; + while (!queue.isEmpty()) { + InMemoryTreeNode node = queue.remove(); + + if (node.children == null || node.children.isEmpty()) { + writeStarTreeNode(output, node, ALL, ALL); + } else { + + // Sort all children nodes based on dimension value + List sortedChildren = new ArrayList<>(node.children.values()); + sortedChildren.sort( + Comparator.comparingInt(InMemoryTreeNode::getNodeType).thenComparingLong(InMemoryTreeNode::getDimensionValue) + ); + + int firstChildId = currentNodeId + queue.size() + 1; + int lastChildId = firstChildId + sortedChildren.size() - 1; + writeStarTreeNode(output, node, firstChildId, lastChildId); + + queue.addAll(sortedChildren); + } + + currentNodeId++; + } + } + + /** + * Writes a single star-tree node + * + * @param output the IndexOutput to write the node + * @param node the star tree node to write + * @param firstChildId the ID of the first child node + * @param lastChildId the ID of the last child node + * @throws IOException if an I/O error occurs while writing the node + */ + private static void writeStarTreeNode(IndexOutput output, InMemoryTreeNode node, int firstChildId, int lastChildId) throws IOException { + output.writeInt(node.dimensionId); + output.writeLong(node.dimensionValue); + output.writeInt(node.startDocId); + output.writeInt(node.endDocId); + output.writeInt(node.aggregatedDocId); + output.writeByte(node.nodeType); + output.writeInt(firstChildId); + output.writeInt(lastChildId); + } + +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeMetaWriter.java new file mode 100644 index 0000000000000..c7a11b60a6a14 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeMetaWriter.java @@ -0,0 +1,163 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.List; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; + +/** + * The utility class for serializing the metadata of a star-tree data structure. + * The metadata includes information about the dimensions, metrics, and other relevant details + * related to the star tree. + * + * @opensearch.experimental + */ +public class StarTreeMetaWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeMetaWriter.class); + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param starTreeField the star-tree field + * @param writeState the segment write state + * @param metricAggregatorInfos the list of metric aggregator information + * @param segmentAggregatedCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star tree data + * @param dataFileLength the length of the star tree data file + * @throws IOException if an I/O error occurs while serializing the metadata + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + long initialMetaFilePointer = metaOut.getFilePointer(); + + writeMetaHeader(metaOut, CompositeMappedFieldType.CompositeFieldType.STAR_TREE, starTreeField.getName()); + writeMeta(metaOut, writeState, metricAggregatorInfos, starTreeField, segmentAggregatedCount, dataFilePointer, dataFileLength); + + if (logger.isDebugEnabled()) { + logger.debug( + "Star tree meta size in bytes : {} for star-tree field {}", + metaOut.getFilePointer() - initialMetaFilePointer, + starTreeField.getName() + ); + } + } + + /** + * Writes the star-tree metadata header. + * + * @param metaOut the IndexOutput to write the header + * @param compositeFieldType the composite field type of the star-tree field + * @param starTreeFieldName the name of the star-tree field + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeMetaHeader( + IndexOutput metaOut, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + String starTreeFieldName + ) throws IOException { + // magic marker for sanity + metaOut.writeLong(COMPOSITE_FIELD_MARKER); + + // version + metaOut.writeVInt(VERSION); + + // star tree field name + metaOut.writeString(starTreeFieldName); + + // star tree field type + metaOut.writeString(compositeFieldType.getName()); + } + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param writeState the segment write state + * @param metricAggregatorInfos the list of metric aggregator information + * @param starTreeField the star tree field + * @param segmentAggregatedDocCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star-tree data + * @param dataFileLength the length of the star-tree data file + * @throws IOException if an I/O error occurs while writing the metadata + */ + private static void writeMeta( + IndexOutput metaOut, + SegmentWriteState writeState, + List metricAggregatorInfos, + StarTreeField starTreeField, + Integer segmentAggregatedDocCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + // number of dimensions + metaOut.writeVInt(starTreeField.getDimensionsOrder().size()); + + // dimensions + for (Dimension dimension : starTreeField.getDimensionsOrder()) { + metaOut.writeString(dimension.getField()); + } + + // number of metrics + metaOut.writeVInt(metricAggregatorInfos.size()); + + // metric - metric stat pair + for (MetricAggregatorInfo metricAggregatorInfo : metricAggregatorInfos) { + metaOut.writeString(metricAggregatorInfo.getField()); + int metricStatOrdinal = metricAggregatorInfo.getMetricStat().getMetricOrdinal(); + metaOut.writeVInt(metricStatOrdinal); + } + + // segment aggregated document count + metaOut.writeVInt(segmentAggregatedDocCount); + + // max leaf docs + metaOut.writeVInt(starTreeField.getStarTreeConfig().maxLeafDocs()); + + // number of skip star node creation dimensions + metaOut.writeVInt(starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size()); + + // skip star node creations + for (String dimension : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + metaOut.writeString(dimension); + } + + // star tree build-mode + metaOut.writeByte(starTreeField.getStarTreeConfig().getBuildMode().getBuildModeOrdinal()); + + // star-tree data file pointer + metaOut.writeVLong(dataFilePointer); + + // star-tree data file length + metaOut.writeVLong(dataFileLength); + + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java new file mode 100644 index 0000000000000..12d9cde18dab3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/StarTreeWriter.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; + +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeWriter { + + private StarTreeWriter() {} + + /** + * Write star tree to index output stream + * + * @param dataOut data index output + * @param rootNode root star-tree node + * @param numNodes number of nodes in the tree + * @param name name of the star-tree field + * @return total size of the three + * @throws IOException when star-tree data serialization fails + */ + public static long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); + } + + /** + * Write star tree metadata to index output stream + * + * @param metaOut meta index output + * @param starTreeField star tree field + * @param writeState segment write state + * @param metricAggregatorInfos metric aggregator infos + * @param segmentAggregatedCount segment aggregated count + * @param dataFilePointer data file pointer + * @param dataFileLength data file length + * @throws IOException when star-tree data serialization fails + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + StarTreeMetaWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + segmentAggregatedCount, + dataFilePointer, + dataFileLength + ); + } + +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java new file mode 100644 index 0000000000000..c1635b9a3aaf1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/fileformats/writer/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Writer package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.codec.composite.datacube.startree.fileformats.writer; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java index 67808ad51289a..27793a6d5615c 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java @@ -7,6 +7,6 @@ */ /** - * classes responsible for handling all star tree structures and operations as part of codec + * Classes responsible for handling all star tree structures and operations as part of codec */ package org.opensearch.index.codec.composite.datacube.startree; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java new file mode 100644 index 0000000000000..a8f2c76353966 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +/** + * This class contains constants used in the Composite Index implementation. + */ +public class CompositeIndexConstants { + + /** + * The magic marker value used for sanity checks in the Composite Index implementation. + */ + public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field + + /** + * The version of the Composite Index implementation. + */ + public static final int VERSION = 0; + + /** + * Represents the key to fetch number of documents in a segment. + */ + public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount"; + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java new file mode 100644 index 0000000000000..6ba401afe0e6f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +import org.opensearch.index.mapper.CompositeMappedFieldType; + +/** + * This class represents the metadata of a Composite Index, which includes information about + * the composite field name, type, and the specific metadata for the type of composite field + * (e.g., Tree metadata). + * + * @opensearch.experimental + */ +public class CompositeIndexMetadata { + + private final String compositeFieldName; + private final CompositeMappedFieldType.CompositeFieldType compositeFieldType; + + /** + * Constructs a CompositeIndexMetadata object with the provided composite field name and type. + * + * @param compositeFieldName the name of the composite field + * @param compositeFieldType the type of the composite field + */ + public CompositeIndexMetadata(String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) { + this.compositeFieldName = compositeFieldName; + this.compositeFieldType = compositeFieldType; + } + + /** + * Returns the name of the composite field. + * + * @return the composite field name + */ + public String getCompositeFieldName() { + return compositeFieldName; + } + + /** + * Returns the type of the composite field. + * + * @return the composite field type + */ + public CompositeMappedFieldType.CompositeFieldType getCompositeFieldType() { + return compositeFieldType; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java index fbde296b15f7e..632f8de21d26d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java @@ -17,22 +17,28 @@ */ @ExperimentalApi public enum MetricStat { - COUNT("count"), - AVG("avg"), - SUM("sum"), - MIN("min"), - MAX("max"); + COUNT("count", 0), + AVG("avg", 1), + SUM("sum", 2), + MIN("min", 3), + MAX("max", 4); private final String typeName; + private final int metricOrdinal; - MetricStat(String typeName) { + MetricStat(String typeName, int metricOrdinal) { this.typeName = typeName; + this.metricOrdinal = metricOrdinal; } public String getTypeName() { return typeName; } + public int getMetricOrdinal() { + return metricOrdinal; + } + public static MetricStat fromTypeName(String typeName) { for (MetricStat metric : MetricStat.values()) { if (metric.getTypeName().equalsIgnoreCase(typeName)) { @@ -41,4 +47,13 @@ public static MetricStat fromTypeName(String typeName) { } throw new IllegalArgumentException("Invalid metric stat: " + typeName); } + + public static MetricStat fromMetricOrdinal(int metricOrdinal) { + for (MetricStat metric : MetricStat.values()) { + if (metric.getMetricOrdinal() == metricOrdinal) { + return metric; + } + } + throw new IllegalArgumentException("Invalid metric stat: " + metricOrdinal); + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java new file mode 100644 index 0000000000000..5b6a20b8d1078 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube; + +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.mapper.CompositeDataCubeFieldType; + +import java.io.IOException; +import java.util.Objects; + +/** + * Composite index merge dimension class + * + * @opensearch.experimental + */ +public class ReadDimension implements Dimension { + public static final String READ = "read"; + private final String field; + + public ReadDimension(String field) { + this.field = field; + } + + public String getField() { + return field; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + builder.field(CompositeDataCubeFieldType.NAME, field); + builder.field(CompositeDataCubeFieldType.TYPE, READ); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ReadDimension dimension = (ReadDimension) o; + return Objects.equals(field, dimension.getField()); + } + + @Override + public int hashCode() { + return Objects.hash(field); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java index 755c064c2c60a..d732a8598d711 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java @@ -56,19 +56,25 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @ExperimentalApi public enum StarTreeBuildMode { // TODO : remove onheap support unless this proves useful - ON_HEAP("onheap"), - OFF_HEAP("offheap"); + ON_HEAP("onheap", (byte) 0), + OFF_HEAP("offheap", (byte) 1); private final String typeName; + private final byte buildModeOrdinal; - StarTreeBuildMode(String typeName) { + StarTreeBuildMode(String typeName, byte buildModeOrdinal) { this.typeName = typeName; + this.buildModeOrdinal = buildModeOrdinal; } public String getTypeName() { return typeName; } + public byte getBuildModeOrdinal() { + return buildModeOrdinal; + } + public static StarTreeBuildMode fromTypeName(String typeName) { for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { if (starTreeBuildMode.getTypeName().equalsIgnoreCase(typeName)) { @@ -77,6 +83,16 @@ public static StarTreeBuildMode fromTypeName(String typeName) { } throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", typeName)); } + + public static StarTreeBuildMode fromBuildModeOrdinal(byte buildModeOrdinal) { + for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { + if (starTreeBuildMode.getBuildModeOrdinal() == buildModeOrdinal) { + return starTreeBuildMode; + } + } + throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", buildModeOrdinal)); + } + } public int maxLeafDocs() { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index 5390b6728b9b6..bedd6a4b374b4 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -46,7 +46,17 @@ public Long mergeAggregatedValueAndSegmentValue(Long value, Long segmentDocValue @Override public Long mergeAggregatedValues(Long value, Long aggregatedValue) { - return value + aggregatedValue; + + long totalCount = getIdentityMetricValue(); + if (value != null) { + totalCount += value; + } + + if (aggregatedValue != null) { + totalCount += aggregatedValue; + } + + return totalCount; } @Override @@ -68,4 +78,9 @@ public Long toLongValue(Long value) { public Long toStarTreeNumericTypeValue(Long value) { return value; } + + @Override + public long getIdentityMetricValue() { + return 0L; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java index a9209a38eca82..b4327122d6fb7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java @@ -79,7 +79,12 @@ public StarTreeNumericType getAggregatedValueType() { * @return field name with metric type and field */ public String toFieldName() { - return starFieldName + DELIMITER + field + DELIMITER + metricStat.getTypeName(); + return toFieldName(starFieldName, field, metricStat.getTypeName()); + + } + + public static String toFieldName(String starFieldName, String field, String metricName) { + return starFieldName + DELIMITER + field + DELIMITER + metricName; } @Override @@ -94,7 +99,7 @@ public boolean equals(Object obj) { } if (obj instanceof MetricAggregatorInfo) { MetricAggregatorInfo anotherPair = (MetricAggregatorInfo) obj; - return metricStat == anotherPair.metricStat && field.equals(anotherPair.field); + return metricStat.equals(anotherPair.metricStat) && field.equals(anotherPair.field); } return false; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index 385549216e4d6..730adae49fbed 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -63,7 +63,11 @@ public Double mergeAggregatedValueAndSegmentValue(Double value, Long segmentDocV public Double mergeAggregatedValues(Double value, Double aggregatedValue) { assert kahanSummation.value() == aggregatedValue; kahanSummation.reset(sum, compensation); - kahanSummation.add(value); + if (value != null) { + kahanSummation.add(value); + } else { + kahanSummation.add(getIdentityMetricValue()); + } compensation = kahanSummation.delta(); sum = kahanSummation.value(); return kahanSummation.value(); @@ -103,4 +107,9 @@ public Double toStarTreeNumericTypeValue(Long value) { throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); } } + + @Override + public long getIdentityMetricValue() { + return 0; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index 93230ed012b13..f6e1761a8a85f 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -61,4 +61,9 @@ public interface ValueAggregator { * Converts an aggregated value from a Long type. */ A toStarTreeNumericTypeValue(Long rawValue); + + /** + * Fetches a value that does not alter the result of aggregations + */ + long getIdentityMetricValue(); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 7187fade882ea..fdacc9c5b2498 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -9,14 +9,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; -import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedNumericDocValuesWriterWrapper; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.NumericUtils; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -25,8 +29,10 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; @@ -35,7 +41,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -43,8 +48,11 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; -import static org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; /** * Builder for star tree. Defines the algorithm to construct star-tree @@ -70,23 +78,34 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected int numStarTreeNodes; protected final int maxLeafDocuments; - protected final TreeNode rootNode = getNewNode(); + protected final InMemoryTreeNode rootNode = getNewNode(); private final StarTreeField starTreeField; private final MapperService mapperService; - private final SegmentWriteState state; - static String NUM_SEGMENT_DOCS = "numSegmentDocs"; + private final SegmentWriteState writeState; + + private final IndexOutput metaOut; + private final IndexOutput dataOut; /** * Reads all the configuration related to dimensions and metrics, builds a star-tree based on the different construction parameters. * - * @param starTreeField holds the configuration for the star tree - * @param state stores the segment write state - * @param mapperService helps to find the original type of the field + * @param starTreeField holds the configuration for the star tree + * @param writeState stores the segment write writeState + * @param mapperService helps to find the original type of the field */ - protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) { + protected BaseStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + MapperService mapperService + ) { logger.debug("Building star tree : {}", starTreeField.getName()); + this.metaOut = metaOut; + this.dataOut = dataOut; + this.starTreeField = starTreeField; StarTreeFieldConfiguration starTreeFieldSpec = starTreeField.getStarTreeConfig(); @@ -94,9 +113,9 @@ protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState sta this.numDimensions = dimensionsSplitOrder.size(); this.skipStarNodeCreationForDimensions = new HashSet<>(); - this.totalSegmentDocs = state.segmentInfo.maxDoc(); + this.totalSegmentDocs = writeState.segmentInfo.maxDoc(); this.mapperService = mapperService; - this.state = state; + this.writeState = writeState; Set skipStarNodeCreationForDimensions = starTreeFieldSpec.getSkipStarNodeCreationInDims(); @@ -141,6 +160,220 @@ public List generateMetricAggregatorInfos(MapperService ma return metricAggregatorInfos; } + /** + * Generates the configuration required to perform aggregation for all the metrics on a field + * + * @return list of MetricAggregatorInfo + */ + public List getMetricReaders(SegmentWriteState state, Map fieldProducerMap) + throws IOException { + + List metricReaders = new ArrayList<>(); + for (Metric metric : this.starTreeField.getMetrics()) { + for (MetricStat metricStat : metric.getMetrics()) { + SequentialDocValuesIterator metricReader; + FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); + if (metricStat != MetricStat.COUNT) { + if (metricFieldInfo == null) { + metricFieldInfo = StarTreeUtils.getFieldInfo(metric.getField(), 1); + } + metricReader = new SequentialDocValuesIterator( + fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) + ); + } else { + metricReader = new SequentialDocValuesIterator(); + } + + metricReaders.add(metricReader); + } + } + return metricReaders; + } + + /** + * Builds the star tree from the original segment documents + * + * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues + * @throws IOException when we are unable to build star-tree + */ + public void build( + Map fieldProducerMap, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + long startTime = System.currentTimeMillis(); + logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); + + List metricReaders = getMetricReaders(writeState, fieldProducerMap); + List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); + SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; + for (int i = 0; i < numDimensions; i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + FieldInfo dimensionFieldInfo = writeState.fieldInfos.fieldInfo(dimension); + if (dimensionFieldInfo == null) { + dimensionFieldInfo = StarTreeUtils.getFieldInfo(dimension, 0); + } + dimensionReaders[i] = new SequentialDocValuesIterator( + fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) + ); + } + Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); + logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + build(starTreeDocumentIterator, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); + logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + } + + /** + * Builds the star tree using sorted and aggregated star-tree Documents + * + * @param starTreeDocumentIterator contains the sorted and aggregated documents + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues + * @throws IOException when we are unable to build star-tree + */ + public void build( + Iterator starTreeDocumentIterator, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + int numSegmentStarTreeDocument = totalSegmentDocs; + + while (starTreeDocumentIterator.hasNext()) { + appendToStarTree(starTreeDocumentIterator.next()); + } + int numStarTreeDocument = numStarTreeDocs; + logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); + + if (numStarTreeDocs == 0) { + // serialize the star tree data + serializeStarTree(numStarTreeDocument); + return; + } + + constructStarTree(rootNode, 0, numStarTreeDocs); + int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; + logger.debug( + "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", + numStarTreeNodes, + numStarTreeDocumentUnderStarNode + ); + + createAggregatedDocs(rootNode); + int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; + logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); + + // Create doc values indices in disk + createSortedDocValuesIndices(starTreeDocValuesConsumer, fieldNumberAcrossStarTrees); + + // serialize star-tree + serializeStarTree(numStarTreeDocument); + } + + private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException { + // serialize the star tree data + long dataFilePointer = dataOut.getFilePointer(); + long totalStarTreeDataLength = StarTreeWriter.writeStarTree(dataOut, rootNode, numStarTreeNodes, starTreeField.getName()); + + // serialize the star tree meta + StarTreeWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + numSegmentStarTreeDocument, + dataFilePointer, + totalStarTreeDataLength + ); + } + + private void createSortedDocValuesIndices(DocValuesConsumer docValuesConsumer, AtomicInteger fieldNumberAcrossStarTrees) + throws IOException { + List dimensionWriters = new ArrayList<>(); + List metricWriters = new ArrayList<>(); + FieldInfo[] dimensionFieldInfoList = new FieldInfo[starTreeField.getDimensionsOrder().size()]; + FieldInfo[] metricFieldInfoList = new FieldInfo[metricAggregatorInfos.size()]; + for (int i = 0; i < dimensionFieldInfoList.length; i++) { + final FieldInfo fi = StarTreeUtils.getFieldInfo( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues( + starTreeField.getName(), + starTreeField.getDimensionsOrder().get(i).getField() + ), + fieldNumberAcrossStarTrees.getAndIncrement() + ); + dimensionFieldInfoList[i] = fi; + dimensionWriters.add(new SortedNumericDocValuesWriterWrapper(fi, Counter.newCounter())); + } + for (int i = 0; i < metricAggregatorInfos.size(); i++) { + + final FieldInfo fi = StarTreeUtils.getFieldInfo( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeField.getName(), + metricAggregatorInfos.get(i).getField(), + metricAggregatorInfos.get(i).getMetricStat().getTypeName() + ), + fieldNumberAcrossStarTrees.getAndIncrement() + ); + + metricFieldInfoList[i] = fi; + metricWriters.add(new SortedNumericDocValuesWriterWrapper(fi, Counter.newCounter())); + } + + for (int docId = 0; docId < numStarTreeDocs; docId++) { + StarTreeDocument starTreeDocument = getStarTreeDocumentForCreatingDocValues(docId); + for (int i = 0; i < starTreeDocument.dimensions.length; i++) { + Long val = starTreeDocument.dimensions[i]; + if (val != null) { + dimensionWriters.get(i).addValue(docId, val); + } + } + + for (int i = 0; i < starTreeDocument.metrics.length; i++) { + try { + switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { + case LONG: + if (starTreeDocument.metrics[i] != null) { + metricWriters.get(i).addValue(docId, (Long) starTreeDocument.metrics[i]); + } + break; + case DOUBLE: + if (starTreeDocument.metrics[i] != null) { + metricWriters.get(i) + .addValue(docId, NumericUtils.doubleToSortableLong((Double) starTreeDocument.metrics[i])); + } + break; + default: + throw new IllegalStateException("Unknown metric doc value type"); + } + } catch (IllegalArgumentException e) { + logger.info("could not parse the value, exiting creation of star tree"); + } + } + } + + addStarTreeDocValueFields(docValuesConsumer, dimensionWriters, dimensionFieldInfoList, starTreeField.getDimensionsOrder().size()); + addStarTreeDocValueFields(docValuesConsumer, metricWriters, metricFieldInfoList, metricAggregatorInfos.size()); + } + + private void addStarTreeDocValueFields( + DocValuesConsumer docValuesConsumer, + List docValuesWriters, + FieldInfo[] fieldInfoList, + int fieldCount + ) throws IOException { + for (int i = 0; i < fieldCount; i++) { + final int increment = i; + DocValuesProducer docValuesProducer = new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) { + return docValuesWriters.get(increment).getDocValues(); + } + }; + docValuesConsumer.addSortedNumericField(fieldInfoList[i], docValuesProducer); + } + } + /** * Adds a document to the star-tree. * @@ -158,6 +391,15 @@ public List generateMetricAggregatorInfos(MapperService ma */ public abstract StarTreeDocument getStarTreeDocument(int docId) throws IOException; + /** + * Returns the star-tree document for the given doc id while creating doc values + * + * @param docId document id + * @return star tree document + * @throws IOException if an I/O error occurs while fetching the star-tree document + */ + public abstract StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException; + /** * Retrieves the list of star-tree documents in the star-tree. * @@ -179,7 +421,7 @@ public List generateMetricAggregatorInfos(MapperService ma * aggregated star-tree documents. * * @param dimensionReaders List of docValues readers to read dimensions from the segment - * @param metricReaders List of docValues readers to read metrics from the segment + * @param metricReaders List of docValues readers to read metrics from the segment * @return Iterator for the aggregated star-tree document */ public abstract Iterator sortAndAggregateSegmentDocuments( @@ -200,7 +442,6 @@ public abstract Iterator generateStarTreeDocumentsForStarNode( /** * Returns the star-tree document from the segment based on the current doc id - * */ protected StarTreeDocument getSegmentStarTreeDocument( int currentDocId, @@ -290,7 +531,9 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( if (isMerge) { metrics[i] = metricValueAggregator.getInitialAggregatedValue(segmentDocument.metrics[i]); } else { - metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue(getLong(segmentDocument.metrics[i])); + metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue( + getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()) + ); } } catch (Exception e) { @@ -311,7 +554,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( } else { aggregatedSegmentDocument.metrics[i] = metricValueAggregator.mergeAggregatedValueAndSegmentValue( aggregatedSegmentDocument.metrics[i], - getLong(segmentDocument.metrics[i]) + getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()) ); } } catch (Exception e) { @@ -326,27 +569,24 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( /** * Safely converts the metric value of object type to long. * - * @param metric value of the metric + * @param metric value of the metric + * @param identityMetricValue value of the metric which does not really affect the aggregations * @return converted metric value to long */ - private static long getLong(Object metric) { + private static long getLong(Object metric, long identityMetricValue) { - Long metricValue = null; + long metricValue; try { if (metric instanceof Long) { metricValue = (long) metric; - } else if (metric != null) { - metricValue = Long.valueOf(String.valueOf(metric)); + } else { + logger.debug("metric value is null, returning identity metric value for the aggregator"); + return identityMetricValue; } } catch (Exception e) { throw new IllegalStateException("unable to cast segment metric", e); } - if (metricValue == null) { - return 0; - // TODO: handle this properly - // throw new IllegalStateException("unable to cast segment metric"); - } return metricValue; } @@ -390,128 +630,6 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum } } - /** - * Builds the star tree from the original segment documents - * - * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field - * - * @throws IOException when we are unable to build star-tree - */ - public void build(Map fieldProducerMap) throws IOException { - long startTime = System.currentTimeMillis(); - logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); - if (totalSegmentDocs == 0) { - logger.debug("No documents found in the segment"); - return; - } - List metricReaders = getMetricReaders(state, fieldProducerMap); - List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); - SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; - for (int i = 0; i < numDimensions; i++) { - String dimension = dimensionsSplitOrder.get(i).getField(); - FieldInfo dimensionFieldInfo = state.fieldInfos.fieldInfo(dimension); - if (dimensionFieldInfo == null) { - dimensionFieldInfo = getFieldInfo(dimension); - } - dimensionReaders[i] = new SequentialDocValuesIterator( - fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) - ); - } - Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); - logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - build(starTreeDocumentIterator); - logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - } - - private static FieldInfo getFieldInfo(String field) { - return new FieldInfo( - field, - 1, - false, - false, - false, - IndexOptions.NONE, - DocValuesType.SORTED_NUMERIC, - -1, - Collections.emptyMap(), - 0, - 0, - 0, - 0, - VectorEncoding.FLOAT32, - VectorSimilarityFunction.EUCLIDEAN, - false, - false - ); - } - - /** - * Generates the configuration required to perform aggregation for all the metrics on a field - * - * @return list of MetricAggregatorInfo - */ - public List getMetricReaders(SegmentWriteState state, Map fieldProducerMap) - throws IOException { - List metricReaders = new ArrayList<>(); - for (Metric metric : this.starTreeField.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { - FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); - if (metricFieldInfo == null) { - metricFieldInfo = getFieldInfo(metric.getField()); - } - // TODO - // if (metricStat != MetricStat.COUNT) { - // Need not initialize the metric reader for COUNT metric type - SequentialDocValuesIterator metricReader = new SequentialDocValuesIterator( - fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) - ); - // } - - metricReaders.add(metricReader); - } - } - return metricReaders; - } - - /** - * Builds the star tree using Star-Tree Document - * - * @param starTreeDocumentIterator contains the sorted and aggregated documents - * @throws IOException when we are unable to build star-tree - */ - void build(Iterator starTreeDocumentIterator) throws IOException { - int numSegmentStarTreeDocument = totalSegmentDocs; - - while (starTreeDocumentIterator.hasNext()) { - appendToStarTree(starTreeDocumentIterator.next()); - } - int numStarTreeDocument = numStarTreeDocs; - logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); - - if (numStarTreeDocs == 0) { - // TODO: Uncomment when segment codec and file formats is ready - // StarTreeBuilderUtils.serializeTree(indexOutput, rootNode, dimensionsSplitOrder, numNodes); - return; - } - - constructStarTree(rootNode, 0, numStarTreeDocs); - int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; - logger.debug( - "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", - numStarTreeNodes, - numStarTreeDocumentUnderStarNode - ); - - createAggregatedDocs(rootNode); - int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; - logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); - - // TODO: When StarTree Codec is ready - // Create doc values indices in disk - // Serialize and save in disk - // Write star tree metadata for off heap implementation - } - /** * Adds a document to star-tree * @@ -528,9 +646,9 @@ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOExcept * * @return return new star-tree node */ - private TreeNode getNewNode() { + private InMemoryTreeNode getNewNode() { numStarTreeNodes++; - return new TreeNode(); + return new InMemoryTreeNode(); } /** @@ -541,7 +659,7 @@ private TreeNode getNewNode() { * @param endDocId end document id * @throws IOException throws an exception if we are unable to construct the tree */ - private void constructStarTree(TreeNode node, int startDocId, int endDocId) throws IOException { + private void constructStarTree(InMemoryTreeNode node, int startDocId, int endDocId) throws IOException { int childDimensionId = node.dimensionId + 1; if (childDimensionId == numDimensions) { @@ -550,7 +668,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro // Construct all non-star children nodes node.childDimensionId = childDimensionId; - Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); + Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); node.children = children; // Construct star-node if required @@ -559,7 +677,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro } // Further split on child nodes if required - for (TreeNode child : children.values()) { + for (InMemoryTreeNode child : children.values()) { if (child.endDocId - child.startDocId > maxLeafDocuments) { constructStarTree(child, child.startDocId, child.endDocId); } @@ -575,16 +693,21 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro * @return root node with non-star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { - Map nodes = new HashMap<>(); + private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { + Map nodes = new HashMap<>(); int nodeStartDocId = startDocId; Long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); for (int i = startDocId + 1; i < endDocId; i++) { Long dimensionValue = getDimensionValue(i, dimensionId); if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { - TreeNode child = getNewNode(); + InMemoryTreeNode child = getNewNode(); child.dimensionId = dimensionId; - child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; + if (nodeDimensionValue == null) { + child.dimensionValue = ALL; + child.nodeType = StarTreeNodeType.NULL.getValue(); + } else { + child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; + } child.startDocId = nodeStartDocId; child.endDocId = i; nodes.put(nodeDimensionValue, child); @@ -593,7 +716,7 @@ private Map constructNonStarNodes(int startDocId, int endDocId, nodeDimensionValue = dimensionValue; } } - TreeNode lastNode = getNewNode(); + InMemoryTreeNode lastNode = getNewNode(); lastNode.dimensionId = dimensionId; lastNode.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; lastNode.startDocId = nodeStartDocId; @@ -611,11 +734,11 @@ private Map constructNonStarNodes(int startDocId, int endDocId, * @return root node with star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { - TreeNode starNode = getNewNode(); + private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { + InMemoryTreeNode starNode = getNewNode(); starNode.dimensionId = dimensionId; starNode.dimensionValue = ALL; - starNode.isStarNode = true; + starNode.nodeType = StarTreeNodeType.STAR.getValue(); starNode.startDocId = numStarTreeDocs; Iterator starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId); while (starTreeDocumentIterator.hasNext()) { @@ -632,7 +755,7 @@ private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId * @return aggregated star-tree documents * @throws IOException throws an exception upon failing to create new aggregated docs based on star tree */ - private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException { + private StarTreeDocument createAggregatedDocs(InMemoryTreeNode node) throws IOException { StarTreeDocument aggregatedStarTreeDocument = null; if (node.children == null) { @@ -659,8 +782,8 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException // For non-leaf node if (node.children.containsKey((long) ALL)) { // If it has star child, use the star child aggregated document directly - for (TreeNode child : node.children.values()) { - if (child.isStarNode) { + for (InMemoryTreeNode child : node.children.values()) { + if (child.nodeType == StarTreeNodeType.STAR.getValue()) { aggregatedStarTreeDocument = createAggregatedDocs(child); node.aggregatedDocId = child.aggregatedDocId; } else { @@ -670,12 +793,12 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException } else { // If no star child exists, aggregate all aggregated documents from non-star children if (node.children.values().size() == 1) { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); node.aggregatedDocId = child.aggregatedDocId; } } else { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); } if (null == aggregatedStarTreeDocument) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index 1599be2e76a56..194be5a2928ae 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -7,8 +7,10 @@ */ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; @@ -24,6 +26,9 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; /** * On heap single tree builder @@ -42,8 +47,14 @@ public class OnHeapStarTreeBuilder extends BaseStarTreeBuilder { * @param segmentWriteState segment write state * @param mapperService helps with the numeric type of field */ - public OnHeapStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService) { - super(starTreeField, segmentWriteState, mapperService); + public OnHeapStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + super(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); } @Override @@ -84,12 +95,16 @@ public Iterator sortAndAggregateSegmentDocuments( // TODO : we can save empty iterator for dimensions which are not part of segment starTreeDocuments[currentDocId] = getSegmentStarTreeDocument(currentDocId, dimensionReaders, metricReaders); } - return sortAndAggregateStarTreeDocuments(starTreeDocuments); + return sortAndAggregateStarTreeDocuments(starTreeDocuments, false); } @Override - public void build(List starTreeValuesSubs) throws IOException { - build(mergeStarTrees(starTreeValuesSubs)); + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + build(mergeStarTrees(starTreeValuesSubs), fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } /** @@ -127,10 +142,9 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); } - boolean endOfDoc = false; int currentDocId = 0; int numSegmentDocs = Integer.parseInt( - starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) + starTreeValues.getAttributes().getOrDefault(SEGMENT_DOCS_COUNT, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) ); while (currentDocId < numSegmentDocs) { Long[] dims = new Long[dimensionsSplitOrder.size()]; @@ -162,8 +176,9 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal return starTreeDocuments.toArray(starTreeDocumentsArr); } - Iterator sortAndAggregateStarTreeDocuments(StarTreeDocument[] starTreeDocuments) { - return sortAndAggregateStarTreeDocuments(starTreeDocuments, false); + @Override + public StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException { + return starTreeDocuments.get(docId); } /** diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java index 94c9c9f2efb18..e9d6b4f8dd741 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; @@ -16,6 +17,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; /** * A star-tree builder that builds a single star-tree. @@ -27,17 +29,29 @@ public interface StarTreeBuilder extends Closeable { /** * Builds the star tree from the original segment documents * - * @param fieldProducerMap contains the docValues producer to get docValues associated with each field + * @param fieldProducerMap contains the docValues producer to get docValues associated with each field + * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree + * @param starTreeDocValuesConsumer consumer of star-tree doc values * @throws IOException when we are unable to build star-tree */ - void build(Map fieldProducerMap) throws IOException; + void build( + Map fieldProducerMap, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException; /** - * Builds the star tree using StarTree values from multiple segments + * Builds the star tree using Tree values from multiple segments * * @param starTreeValuesSubs contains the star tree values from multiple segments + * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree + * @param starTreeDocValuesConsumer consumer of star-tree doc values * @throws IOException when we are unable to build star-tree */ - void build(List starTreeValuesSubs) throws IOException; + void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java index 6c3d476aa3a55..80cc8623c8acb 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java @@ -10,8 +10,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; @@ -25,6 +27,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; /** * Builder to construct star-trees based on multiple star-tree fields. @@ -39,6 +42,7 @@ public class StarTreesBuilder implements Closeable { private final List starTreeFields; private final SegmentWriteState state; private final MapperService mapperService; + private AtomicInteger fieldNumberAcrossStarTrees; public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mapperService) { List starTreeFields = new ArrayList<>(); @@ -58,12 +62,24 @@ public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mappe this.starTreeFields = starTreeFields; this.state = segmentWriteState; this.mapperService = mapperService; + this.fieldNumberAcrossStarTrees = new AtomicInteger(); } /** - * Builds the star-trees. + * Builds all star-trees for given star-tree fields. + * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data + * @param fieldProducerMap fetches iterators for the fields (dimensions and metrics) + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values + * @throws IOException when an error occurs while building the star-trees */ - public void build(Map fieldProducerMap) throws IOException { + public void build( + IndexOutput metaOut, + IndexOutput dataOut, + Map fieldProducerMap, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { if (starTreeFields.isEmpty()) { logger.debug("no star-tree fields found, returning from star-tree builder"); return; @@ -75,8 +91,8 @@ public void build(Map fieldProducerMap) throws IOExce // Build all star-trees for (StarTreeField starTreeField : starTreeFields) { - try (StarTreeBuilder starTreeBuilder = getSingleTreeBuilder(starTreeField, state, mapperService)) { - starTreeBuilder.build(fieldProducerMap); + try (StarTreeBuilder starTreeBuilder = getSingleTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService)) { + starTreeBuilder.build(fieldProducerMap, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } } logger.debug("Took {} ms to build {} star-trees with star-tree fields", System.currentTimeMillis() - startTime, numStarTrees); @@ -90,9 +106,17 @@ public void close() throws IOException { /** * Merges star tree fields from multiple segments * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data * @param starTreeValuesSubsPerField starTreeValuesSubs per field + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values */ - public void buildDuringMerge(final Map> starTreeValuesSubsPerField) throws IOException { + public void buildDuringMerge( + IndexOutput metaOut, + IndexOutput dataOut, + final Map> starTreeValuesSubsPerField, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { logger.debug("Starting merge of {} star-trees with star-tree fields", starTreeValuesSubsPerField.size()); long startTime = System.currentTimeMillis(); for (Map.Entry> entry : starTreeValuesSubsPerField.entrySet()) { @@ -102,8 +126,9 @@ public void buildDuringMerge(final Map> starTreeVal continue; } StarTreeField starTreeField = starTreeValuesList.get(0).getStarTreeField(); - StarTreeBuilder builder = getSingleTreeBuilder(starTreeField, state, mapperService); - builder.build(starTreeValuesList); + StarTreeBuilder builder = getSingleTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService); + builder.build(starTreeValuesList, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); + builder.close(); builder.close(); } logger.debug( @@ -116,11 +141,16 @@ public void buildDuringMerge(final Map> starTreeVal /** * Get star-tree builder based on build mode. */ - StarTreeBuilder getSingleTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) - throws IOException { + StarTreeBuilder getSingleTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState state, + MapperService mapperService + ) throws IOException { switch (starTreeField.getStarTreeConfig().getBuildMode()) { case ON_HEAP: - return new OnHeapStarTreeBuilder(starTreeField, state, mapperService); + return new OnHeapStarTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService); case OFF_HEAP: // TODO // return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java new file mode 100644 index 0000000000000..8159d2039121c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -0,0 +1,186 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.RandomAccessInput; + +import java.io.IOException; +import java.util.Iterator; + +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Fixed Length implementation of {@link StarTreeNode} + * + * @opensearch.experimental + */ +public class FixedLengthStarTreeNode implements StarTreeNode { + public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1; + public static final long SERIALIZABLE_DATA_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES + * NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES); + private static final int DIMENSION_ID_OFFSET = 0; + private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; + private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; + private static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; + private static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; + private static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; + private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; + private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + + public static final int INVALID_ID = -1; + + private final int nodeId; + private final int firstChildId; + + RandomAccessInput in; + + public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { + this.in = in; + this.nodeId = nodeId; + firstChildId = getInt(FIRST_CHILD_ID_OFFSET); + } + + private int getInt(int fieldOffset) throws IOException { + return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + private long getLong(int fieldOffset) throws IOException { + return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + private byte getByte(int fieldOffset) throws IOException { + return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + @Override + public int getDimensionId() throws IOException { + return getInt(DIMENSION_ID_OFFSET); + } + + @Override + public long getDimensionValue() throws IOException { + return getLong(DIMENSION_VALUE_OFFSET); + } + + @Override + public int getChildDimensionId() throws IOException { + if (firstChildId == INVALID_ID) { + return INVALID_ID; + } else { + return in.readInt(firstChildId * SERIALIZABLE_DATA_SIZE_IN_BYTES); + } + } + + @Override + public int getStartDocId() throws IOException { + return getInt(START_DOC_ID_OFFSET); + } + + @Override + public int getEndDocId() throws IOException { + return getInt(END_DOC_ID_OFFSET); + } + + @Override + public int getAggregatedDocId() throws IOException { + return getInt(AGGREGATE_DOC_ID_OFFSET); + } + + @Override + public int getNumChildren() throws IOException { + if (firstChildId == INVALID_ID) { + return 0; + } else { + return getInt(LAST_CHILD_ID_OFFSET) - firstChildId + 1; + } + } + + @Override + public boolean isLeaf() { + return firstChildId == INVALID_ID; + } + + @Override + public byte getStarTreeNodeType() throws IOException { + return getByte(STAR_NODE_TYPE_OFFSET); + } + + @Override + public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException { + // there will be no children for leaf nodes + if (isLeaf()) { + return null; + } + + // Specialize star node for performance + if (isStar) { + return handleStarNode(); + } + + return binarySearchChild(dimensionValue); + } + + private FixedLengthStarTreeNode handleStarNode() throws IOException { + FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); + if (firstNode.getDimensionValue() == ALL) { + return firstNode; + } else { + return null; + } + } + + private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + // Binary search to find child node + int low = firstChildId; + int high = getInt(LAST_CHILD_ID_OFFSET); + + while (low <= high) { + int mid = low + (high - low) / 2; + FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); + long midNodeDimensionValue = midNode.getDimensionValue(); + + if (midNodeDimensionValue == dimensionValue) { + return midNode; + } else if (midNodeDimensionValue < dimensionValue) { + low = mid + 1; + } else { + high = mid - 1; + } + } + return null; + } + + @Override + public Iterator getChildrenIterator() throws IOException { + return new Iterator<>() { + private int currentChildId = firstChildId; + private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); + + @Override + public boolean hasNext() { + return currentChildId <= lastChildId; + } + + @Override + public FixedLengthStarTreeNode next() { + try { + return new FixedLengthStarTreeNode(in, currentChildId++); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java similarity index 73% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java index 5cf737c61ab2d..7aa5406029ca8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java @@ -5,12 +5,14 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.utils; +package org.opensearch.index.compositeindex.datacube.startree.node; import org.opensearch.common.annotation.ExperimentalApi; import java.util.Map; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + /** * /** * Represents a node in a tree data structure, specifically designed for a star-tree implementation. @@ -19,9 +21,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class TreeNode { - - public static final int ALL = -1; +public class InMemoryTreeNode { /** * The dimension id for the dimension (field) associated with this star-tree node. @@ -54,12 +54,20 @@ public class TreeNode { public long dimensionValue = ALL; /** - * A flag indicating whether this node is a star node (a node that represents an aggregation of all dimensions). + * A byte indicating whether the node is star node, null node or default node (with dimension value present). */ - public boolean isStarNode = false; + public byte nodeType = 0; /** * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ - public Map children; + public Map children; + + public long getDimensionValue() { + return dimensionValue; + } + + public byte getNodeType() { + return nodeType; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java new file mode 100644 index 0000000000000..f2d4ce2fef83a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeDataWriter; + +import java.io.IOException; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; + +/** + * Off heap implementation of the star-tree. + * + * @opensearch.experimental + */ +public class StarTree implements Tree { + private static final Logger logger = LogManager.getLogger(StarTree.class); + private final FixedLengthStarTreeNode root; + private final Integer numNodes; + + public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { + long magicMarker = data.readLong(); + if (COMPOSITE_FIELD_MARKER != magicMarker) { + logger.error("Invalid magic marker"); + throw new IOException("Invalid magic marker"); + } + int version = data.readInt(); + if (VERSION != version) { + logger.error("Invalid star tree version"); + throw new IOException("Invalid version"); + } + numNodes = data.readInt(); // num nodes + + RandomAccessInput in = data.randomAccessSlice( + StarTreeDataWriter.computeStarTreeDataHeaderByteSize(), + starTreeMetadata.getDataLength() - StarTreeDataWriter.computeStarTreeDataHeaderByteSize() + ); + root = new FixedLengthStarTreeNode(in, 0); + } + + @Override + public StarTreeNode getRoot() { + return root; + } + + /** + * Returns the number of nodes in star-tree + * + * @return number of nodes in te star-tree + */ + public Integer getNumNodes() { + return numNodes; + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index 59522ffa4be89..3c2acadff6a96 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -20,7 +20,6 @@ */ @ExperimentalApi public interface StarTreeNode { - long ALL = -1l; /** * Returns the dimension ID of the current star-tree node. @@ -86,12 +85,12 @@ public interface StarTreeNode { boolean isLeaf(); /** - * Checks if the current node is a star node. + * Checks if the current node is a star node, null node or a node with actual dimension value. * - * @return true if the node is a star node, false otherwise - * @throws IOException if an I/O error occurs while reading the star node status + * @return the node type value based on the star-tree node type + * @throws IOException if an I/O error occurs while reading the node type */ - boolean isStarNode() throws IOException; + byte getStarTreeNodeType() throws IOException; /** * Returns the child star-tree node for the given dimension value. @@ -100,7 +99,7 @@ public interface StarTreeNode { * @return the child node for the given dimension value or null if child is not present * @throws IOException if an I/O error occurs while retrieving the child node */ - StarTreeNode getChildForDimensionValue(long dimensionValue) throws IOException; + StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException; /** * Returns an iterator over the children of the current star-tree node. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java new file mode 100644 index 0000000000000..856fee072cd9e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Represents the different types of nodes in a StarTree data structure. + *

+ * In order to handle different node types, we use a byte value to represent the node type. + * This enum provides a convenient way to map byte values to their corresponding node types. + *

+ * Star and Null Nodes are represented as special cases. Default is the general case. + * Star and Null nodes are represented with negative ordinals so that first node is Star, second node is Null Node + * and the rest of the default nodes are sorted based on dimension values. + *

+ * By default, we want to consider nodes as default node. + * + * @opensearch.experimental + * @see StarTreeNode + */ +public enum StarTreeNodeType { + + /** + * Represents a star node type. + * + */ + STAR("star", (byte) -2), + + /** + * Represents a null node type. + */ + NULL("null", (byte) -1), + + /** + * Represents a default node type. + */ + DEFAULT("default", (byte) 0); + + private final String name; + private final byte value; + + /** + * Constructs a new StarTreeNodeType with the given name and value. + * + * @param name the name of the node type + * @param value the value associated with the node type + */ + StarTreeNodeType(String name, byte value) { + this.name = name; + this.value = value; + } + + /** + * Returns the name of the node type. + * + * @return the name of the node type + */ + public String getName() { + return name; + } + + /** + * Returns the value associated with the node type. + * + * @return the value associated with the node type + */ + public byte getValue() { + return value; + } + + /** + * Returns the StarTreeNodeType enum constant with the specified value. + * + * @param value the value of the enum constant to return + * @return the enum constant with the specified value, or null if no such constant exists + */ + public static StarTreeNodeType fromValue(byte value) { + for (StarTreeNodeType nodeType : StarTreeNodeType.values()) { + if (nodeType.getValue() == value) { + return nodeType; + } + } + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java new file mode 100644 index 0000000000000..811150da64bec --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Interface for star-tree. + * + * @opensearch.experimental + */ +public interface Tree { + + /** + * Fetches the root node of the star-tree. + * @return the root of the star-tree + */ + StarTreeNode getRoot(); + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java index 516d5b5a012ab..19d12bc6318d7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java @@ -8,5 +8,7 @@ /** * Holds classes associated with star tree node + * + * @opensearch.experimental */ package org.opensearch.index.compositeindex.datacube.startree.node; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java index 400d7a1c00104..bf1f1d52b4eee 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java @@ -9,6 +9,7 @@ package org.opensearch.index.compositeindex.datacube.startree.utils; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.opensearch.common.annotation.ExperimentalApi; @@ -28,6 +29,11 @@ public class SequentialDocValuesIterator { */ private final DocIdSetIterator docIdSetIterator; + /** + * The value associated with the latest document. + */ + private Long docValue; + /** * The id of the latest document. */ @@ -42,15 +48,50 @@ public SequentialDocValuesIterator(DocIdSetIterator docIdSetIterator) { this.docIdSetIterator = docIdSetIterator; } + /** + * Constructs a new SequentialDocValuesIterator instance with an empty sorted numeric. + * + */ + public SequentialDocValuesIterator() { + this.docIdSetIterator = DocValues.emptySortedNumeric(); + } + + /** + * Returns the value associated with the latest document. + * + * @return the value associated with the latest document + */ + public Long getDocValue() { + return docValue; + } + + /** + * Sets the value associated with the latest document. + * + * @param docValue the value to be associated with the latest document + */ + public void setDocValue(Long docValue) { + this.docValue = docValue; + } + /** * Returns the id of the latest document. * * @return the id of the latest document */ - int getDocId() { + public int getDocId() { return docId; } + /** + * Sets the id of the latest document. + * + * @param docId the ID of the latest document + */ + public void setDocId(int docId) { + this.docId = docId; + } + /** * Returns the DocIdSetIterator associated with this instance. * @@ -65,7 +106,7 @@ public int nextDoc(int currentDocId) throws IOException { if (docId >= currentDocId) { return docId; } - docId = this.docIdSetIterator.nextDoc(); + setDocId(this.docIdSetIterator.nextDoc()); return docId; } @@ -81,7 +122,12 @@ public Long value(int currentDocId) throws IOException { if (docId == DocIdSetIterator.NO_MORE_DOCS || docId != currentDocId) { return null; } - return sortedNumericDocValues.nextValue(); + if (docValue == null) { + docValue = sortedNumericDocValues.nextValue(); + } + Long nextValue = docValue; + docValue = null; + return nextValue; } else { throw new IllegalStateException("Unsupported Iterator requested for SequentialDocValuesIterator"); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java new file mode 100644 index 0000000000000..dc155df4eafca --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.util.Collections; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeUtils { + + private StarTreeUtils() {} + + public static final int ALL = -1; + + /** + * The suffix appended to dimension field names in the Star Tree index. + */ + public static final String DIMENSION_SUFFIX = "dim"; + + /** + * The suffix appended to metric field names in the Star Tree index. + */ + public static final String METRIC_SUFFIX = "metric"; + + /** + * Returns the full field name for a dimension in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param dimensionName name of the dimension + * @return full field name for the dimension in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeDimensionsDocValues(String starTreeFieldName, String dimensionName) { + return starTreeFieldName + "_" + dimensionName + "_" + DIMENSION_SUFFIX; + } + + /** + * Returns the full field name for a metric in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param fieldName name of the metric field + * @param metricName name of the metric + * @return full field name for the metric in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeMetricsDocValues(String starTreeFieldName, String fieldName, String metricName) { + return MetricAggregatorInfo.toFieldName(starTreeFieldName, fieldName, metricName) + "_" + METRIC_SUFFIX; + } + + /** + * Get field infos from field names + * + * @param fields field names + * @return field infos + */ + public static FieldInfo[] getFieldInfoList(List fields) { + FieldInfo[] fieldInfoList = new FieldInfo[fields.size()]; + + // field number is not really used. We depend on unique field names to get the desired iterator + int fieldNumber = 0; + + for (String fieldName : fields) { + fieldInfoList[fieldNumber] = getFieldInfo(fieldName, fieldNumber); + fieldNumber++; + } + return fieldInfoList; + } + + /** + * Get new field info instance for a given field name and field number + * @param fieldName name of the field + * @param fieldNumber number of the field + * @return new field info instance + */ + public static FieldInfo getFieldInfo(String fieldName, int fieldNumber) { + return new FieldInfo( + fieldName, + fieldNumber, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/package-info.java index 59f18efec26b1..9a88f88d9850a 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/package-info.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/package-info.java @@ -8,6 +8,7 @@ /** * Core classes for handling composite indices. - * @opensearch.experimental + * + * @opensearch.experimental */ package org.opensearch.index.compositeindex; diff --git a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java index 049d91bc42d9c..b01d239a99671 100644 --- a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java @@ -23,6 +23,7 @@ import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.Rounding; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; @@ -30,16 +31,27 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.MapperTestUtils; import org.opensearch.index.codec.composite.Composite99Codec; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; import org.opensearch.indices.IndicesModule; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; +import static org.opensearch.test.OpenSearchTestCase.randomFrom; /** * Star tree doc values Lucene tests @@ -68,7 +80,7 @@ protected Codec getCodec() { final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); try { - createMapperService(getExpandedMapping("status", "size")); + createMapperService(getExpandedMapping()); } catch (IOException e) { throw new RuntimeException(e); } @@ -76,6 +88,32 @@ protected Codec getCodec() { return codec; } + private StarTreeMapper.StarTreeFieldType getStarTreeFieldType() { + List m1 = new ArrayList<>(); + m1.add(MetricStat.MAX); + Metric metric = new Metric("sndv", m1); + List d1CalendarIntervals = new ArrayList<>(); + d1CalendarIntervals.add(Rounding.DateTimeUnit.HOUR_OF_DAY); + StarTreeField starTreeField = getStarTreeField(d1CalendarIntervals, metric); + + return new StarTreeMapper.StarTreeFieldType("star_tree", starTreeField); + } + + private static StarTreeField getStarTreeField(List d1CalendarIntervals, Metric metric1) { + DateDimension d1 = new DateDimension("field", d1CalendarIntervals); + NumericDimension d2 = new NumericDimension("dv"); + + List metrics = List.of(metric1); + List dims = List.of(d1, d2); + StarTreeFieldConfiguration config = new StarTreeFieldConfiguration( + 100, + Collections.emptySet(), + randomFrom(StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) // TODO : change it + ); + + return new StarTreeField("starTree", dims, metrics, config); + } + public void testStarTreeDocValues() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); @@ -106,7 +144,7 @@ public void testStarTreeDocValues() throws IOException { directory.close(); } - private XContentBuilder getExpandedMapping(String dim, String metric) throws IOException { + private XContentBuilder getExpandedMapping() throws IOException { return topMapping(b -> { b.startObject("composite"); b.startObject("startree"); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index 76a7875919a8b..7cc0f578c3fab 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.DocValues; @@ -15,6 +16,7 @@ import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; @@ -24,11 +26,14 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Version; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.composite.Composite99DocValuesFormat; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.CompositeIndexConstants; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -36,8 +41,9 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.Mapper; @@ -61,8 +67,8 @@ import java.util.Queue; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; -import static org.opensearch.index.compositeindex.datacube.startree.builder.BaseStarTreeBuilder.NUM_SEGMENT_DOCS; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -76,11 +82,15 @@ public abstract class AbstractStarTreeBuilderTests extends OpenSearchTestCase { protected StarTreeField compositeField; protected Map fieldProducerMap; protected SegmentWriteState writeState; - private BaseStarTreeBuilder builder; + protected BaseStarTreeBuilder builder; + protected IndexOutput dataOut; + protected IndexOutput metaOut; + protected DocValuesConsumer docValuesConsumer; @Before public void setup() throws IOException { fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + docValuesConsumer = mock(DocValuesConsumer.class); dimensionsOrder = List.of( new NumericDimension("field1"), @@ -130,6 +140,20 @@ public void setup() throws IOException { } writeState = getWriteState(5); + String dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + String metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); + mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); when(mapperService.documentMapper()).thenReturn(documentMapper); @@ -665,7 +689,7 @@ public void test_build_halfFloatMetrics() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -718,7 +742,7 @@ public void test_build_floatMetrics() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -772,7 +796,7 @@ public void test_build_longMetrics() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -821,7 +845,7 @@ public void test_build() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); @@ -944,13 +968,13 @@ public void test_build_starTreeDataset() throws IOException { dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments(); Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); Map> dimValueToDocIdMap = new HashMap<>(); - builder.rootNode.isStarNode = true; + builder.rootNode.nodeType = StarTreeNodeType.STAR.getValue(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); Map> expectedDimToValueMap = getExpectedDimToValueMap(); @@ -1055,7 +1079,7 @@ public void testFlushFlow() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(6), mapperService); SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimDvs, @@ -1126,7 +1150,7 @@ public void testFlushFlowBuild() throws IOException { DocValuesProducer d2vp = getDocValuesProducer(d2sndv); DocValuesProducer m1vp = getDocValuesProducer(m1sndv); Map fieldProducerMap = Map.of("field1", d1vp, "field3", d2vp, "field2", m1vp); - builder.build(fieldProducerMap); + builder.build(fieldProducerMap, new AtomicInteger(), docValuesConsumer); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] [0, 0] | [0.0] @@ -1209,7 +1233,7 @@ public void testMergeFlowWithSum() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] @@ -1259,7 +1283,7 @@ public void testMergeFlowWithCount() throws IOException { sf, "6" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(6), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(6), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1298,7 +1322,7 @@ private StarTreeValues getStarTreeValues( null, dimDocIdSetIterators, metricDocIdSetIterators, - Map.of("numSegmentDocs", number) + Map.of(CompositeIndexConstants.SEGMENT_DOCS_COUNT, number) ); return starTreeValues; } @@ -1336,7 +1360,7 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1396,7 +1420,7 @@ public void testMergeFlowWithMissingDocs() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1456,7 +1480,7 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(4), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1517,7 +1541,7 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { sf, "4" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1569,7 +1593,7 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { sf, "0" ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, getWriteState(0), mapperService); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, getWriteState(0), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1664,8 +1688,8 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); int count = 0; @@ -1774,8 +1798,8 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); /** 635 docs get generated @@ -1892,8 +1916,8 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); builder.close(); @@ -1991,8 +2015,8 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsWithField, sf ); - OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(sf, writeState, mapperService); - builder.build(List.of(starTreeValues, starTreeValues2)); + OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); + builder.build(List.of(starTreeValues, starTreeValues2), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); Map> dimValueToDocIdMap = new HashMap<>(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); @@ -2027,13 +2051,13 @@ private static StarTreeField getStarTreeField(int maxLeafDocs) { return sf; } - private void traverseStarTree(TreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { - TreeNode starTree = root; + private void traverseStarTree(InMemoryTreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { + InMemoryTreeNode starTree = root; // Use BFS to traverse the star tree - Queue queue = new ArrayDeque<>(); + Queue queue = new ArrayDeque<>(); queue.add(starTree); int currentDimensionId = -1; - TreeNode starTreeNode; + InMemoryTreeNode starTreeNode; List docIds = new ArrayList<>(); while ((starTreeNode = queue.poll()) != null) { int dimensionId = starTreeNode.dimensionId; @@ -2044,17 +2068,17 @@ private void traverseStarTree(TreeNode root, Map> di // store aggregated document of the node int docId = starTreeNode.aggregatedDocId; Map map = dimValueToDocIdMap.getOrDefault(dimensionId, new HashMap<>()); - if (starTreeNode.isStarNode) { + if (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()) { map.put(Long.MAX_VALUE, docId); } else { map.put(starTreeNode.dimensionValue, docId); } dimValueToDocIdMap.put(dimensionId, map); - if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.isStarNode)) { - Iterator childrenIterator = starTreeNode.children.values().iterator(); + if (starTreeNode.children != null && (!traverStarNodes || (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()))) { + Iterator childrenIterator = starTreeNode.children.values().iterator(); while (childrenIterator.hasNext()) { - TreeNode childNode = childrenIterator.next(); + InMemoryTreeNode childNode = childrenIterator.next(); queue.add(childNode); } } @@ -2171,7 +2195,7 @@ public void testMergeFlow() throws IOException { } Map getAttributes(int numSegmentDocs) { - return Map.of(String.valueOf(NUM_SEGMENT_DOCS), String.valueOf(numSegmentDocs)); + return Map.of(CompositeIndexConstants.SEGMENT_DOCS_COUNT, String.valueOf(numSegmentDocs)); } private static StarTreeField getStarTreeField(MetricStat count) { @@ -2246,6 +2270,8 @@ public void tearDown() throws Exception { if (builder != null) { builder.close(); } + metaOut.close(); + dataOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java index 51ebc02ea8243..42f195f41b1af 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java @@ -8,20 +8,24 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.common.settings.Settings; +import org.opensearch.index.codec.composite.Composite99DocValuesFormat; import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; @@ -51,6 +55,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -75,9 +80,12 @@ public class BaseStarTreeBuilderTests extends OpenSearchTestCase { private static List metrics; private static Directory directory; private static FieldInfo[] fieldsInfo; - private static SegmentWriteState state; + private static SegmentWriteState writeState; private static StarTreeField starTreeField; + private static IndexOutput dataOut; + private static IndexOutput metaOut; + @BeforeClass public static void setup() throws IOException { @@ -138,7 +146,21 @@ public static void setup() throws IOException { fieldProducerMap.put(fields.get(i), docValuesProducer); } FieldInfos fieldInfos = new FieldInfos(fieldsInfo); - state = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + + String dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + String metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); @@ -157,9 +179,13 @@ public static void setup() throws IOException { ); when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new BaseStarTreeBuilder(starTreeField, state, mapperService) { + builder = new BaseStarTreeBuilder(metaOut, dataOut, starTreeField, writeState, mapperService) { @Override - public void build(List starTreeValuesSubs) throws IOException {} + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException {} @Override public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException {} @@ -169,6 +195,11 @@ public StarTreeDocument getStarTreeDocument(int docId) throws IOException { return null; } + @Override + public StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException { + return null; + } + @Override public List getStarTreeDocuments() { return List.of(); @@ -224,6 +255,8 @@ public void test_reduceStarTreeDocuments() { @Override public void tearDown() throws Exception { super.tearDown(); + dataOut.close(); + metaOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java index aed08b7727be7..315f8cb94b6e5 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java @@ -12,13 +12,16 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.mapper.MapperService; +import java.io.IOException; + public class OnHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { + @Override public BaseStarTreeBuilder getStarTreeBuilder( StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService - ) { - return new OnHeapStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + ) throws IOException { + return new OnHeapStarTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java index 564ab110fa7a5..455cd84bb44f8 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.FieldInfo; @@ -15,6 +16,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; @@ -45,9 +47,13 @@ public class StarTreesBuilderTests extends OpenSearchTestCase { private StarTreeField starTreeField; private Map fieldProducerMap; private Directory directory; + private IndexOutput dataOut; + private IndexOutput metaOut; public void setUp() throws Exception { super.setUp(); + metaOut = mock(IndexOutput.class); + dataOut = mock(IndexOutput.class); mapperService = mock(MapperService.class); directory = newFSDirectory(createTempDir()); SegmentInfo segmentInfo = new SegmentInfo( @@ -89,7 +95,7 @@ public void test_buildWithNoStarTreeFields() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(new HashSet<>()); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - starTreesBuilder.build(fieldProducerMap); + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, mock(DocValuesConsumer.class)); verifyNoInteractions(docValuesProducer); } @@ -97,7 +103,7 @@ public void test_buildWithNoStarTreeFields() throws IOException { public void test_getStarTreeBuilder() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - StarTreeBuilder starTreeBuilder = starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService); + StarTreeBuilder starTreeBuilder = starTreesBuilder.getSingleTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); assertTrue(starTreeBuilder instanceof OnHeapStarTreeBuilder); } @@ -106,7 +112,7 @@ public void test_getStarTreeBuilder_illegalArgument() { StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(1, new HashSet<>(), StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP); StarTreeField starTreeField = new StarTreeField("star_tree", new ArrayList<>(), new ArrayList<>(), starTreeFieldConfiguration); StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - assertThrows(IllegalArgumentException.class, () -> starTreesBuilder.getSingleTreeBuilder(starTreeField, segmentWriteState, mapperService)); + assertThrows(IllegalArgumentException.class, () -> starTreesBuilder.getSingleTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService)); } public void test_closeWithNoStarTreeFields() throws IOException { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java new file mode 100644 index 0000000000000..36e37e452b3f1 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java @@ -0,0 +1,210 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.meta; + +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.MetricEntry; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeWriter; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.UUID; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; + +public class StarTreeMetaTests extends OpenSearchTestCase { + + private IndexOutput metaOut; + private IndexInput metaIn; + private StarTreeField starTreeField; + private SegmentWriteState writeState; + private Directory directory; + private FieldInfo[] fieldsInfo; + private List dimensionsOrder; + private List fields = List.of(); + private List metrics; + private List metricAggregatorInfos = new ArrayList<>(); + private int segmentDocumentCount; + private long dataFilePointer; + private long dataFileLength; + + @Before + public void setup() throws IOException { + fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 6, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + } + + public void test_starTreeMetadata() throws IOException { + dimensionsOrder = List.of( + new NumericDimension("field1"), + new NumericDimension("field3"), + new NumericDimension("field5"), + new NumericDimension("field8") + ); + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.COUNT)) + ); + int maxLeafDocs = randomNonNegativeInt(); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + maxLeafDocs, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration); + + for (Metric metric : metrics) { + for (MetricStat metricType : metric.getMetrics()) { + MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo( + metricType, + metric.getField(), + starTreeField.getName(), + IndexNumericFieldData.NumericType.DOUBLE + ); + metricAggregatorInfos.add(metricAggregatorInfo); + } + } + + dataFileLength = randomNonNegativeLong(); + dataFilePointer = randomNonNegativeLong(); + segmentDocumentCount = randomNonNegativeInt(); + metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); + StarTreeWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + writeState, + metricAggregatorInfos, + segmentDocumentCount, + dataFilePointer, + dataFileLength + ); + metaOut.close(); + metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); + assertEquals(COMPOSITE_FIELD_MARKER, metaIn.readLong()); + assertEquals(VERSION, metaIn.readVInt()); + + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + assertEquals(starTreeField.getName(), starTreeMetadata.getCompositeFieldName()); + assertEquals(STAR_TREE, starTreeMetadata.getCompositeFieldType()); + + assertNotNull(starTreeMetadata); + + for (int i = 0; i < dimensionsOrder.size(); i++) { + assertEquals(dimensionsOrder.get(i).getField(), starTreeMetadata.getDimensionFields().get(i)); + } + + for (int i = 0; i < metricAggregatorInfos.size(); i++) { + MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i); + assertEquals(metricAggregatorInfos.get(i).getField(), metricEntry.getMetricFieldName()); + assertEquals(metricAggregatorInfos.get(i).getMetricStat(), metricEntry.getMetricStat()); + } + assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); + assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); + assertEquals( + starTreeFieldConfiguration.getSkipStarNodeCreationInDims().size(), + starTreeMetadata.getSkipStarNodeCreationInDims().size() + ); + for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDims)); + } + assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode()); + assertEquals(dataFileLength, starTreeMetadata.getDataLength()); + assertEquals(dataFilePointer, starTreeMetadata.getDataStartFilePointer()); + + metaIn.close(); + + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + metaOut.close(); + metaIn.close(); + directory.close(); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java new file mode 100644 index 0000000000000..5d5f59b62a3b7 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java @@ -0,0 +1,207 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.codec.composite.datacube.startree.fileformats.writer.StarTreeWriter; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Queue; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FixedLengthStarTreeNodeTests extends OpenSearchTestCase { + + private IndexOutput dataOut; + private IndexInput dataIn; + private Directory directory; + + @Before + public void setup() throws IOException { + directory = newFSDirectory(createTempDir()); + } + + public void test_StarTreeNode() throws IOException { + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); + long starTreeDataLength = StarTreeWriter.writeStarTree(dataOut, root, 7, "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 247); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + StarTree starTree = new StarTree(dataIn, starTreeMetadata); + + StarTreeNode starTreeNode = starTree.getRoot(); + Queue queue = new ArrayDeque<>(); + queue.add(starTreeNode); + + while ((starTreeNode = queue.poll()) != null) { + + // verify the star node + assertStarTreeNode(starTreeNode, levelOrderStarTreeNodeMap.get(starTreeNode.getDimensionValue())); + + Iterator childrenIterator = starTreeNode.getChildrenIterator(); + + if (starTreeNode.getChildDimensionId() != -1) { + while (childrenIterator.hasNext()) { + StarTreeNode child = childrenIterator.next(); + assertStarTreeNode( + starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), + levelOrderStarTreeNodeMap.get(child.getDimensionValue()) + ); + queue.add(child); + } + } + } + + dataIn.close(); + + } + + private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { + assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); + assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); + assertEquals(starTreeNode.getStartDocId(), treeNode.startDocId); + assertEquals(starTreeNode.getEndDocId(), treeNode.endDocId); + assertEquals(starTreeNode.getChildDimensionId(), treeNode.childDimensionId); + assertEquals(starTreeNode.getAggregatedDocId(), treeNode.aggregatedDocId); + + if (starTreeNode.getChildDimensionId() != -1) { + assertFalse(starTreeNode.isLeaf()); + if (treeNode.children != null) { + assertEquals(starTreeNode.getNumChildren(), treeNode.children.values().size()); + } + } else { + assertTrue(starTreeNode.isLeaf()); + } + + } + + private InMemoryTreeNode generateSampleTree(Map levelOrderStarTreeNode) { + // Create the root node + InMemoryTreeNode root = new InMemoryTreeNode(); + root.dimensionId = 0; + root.startDocId = 0; + root.endDocId = 100; + root.childDimensionId = 1; + root.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + root.children = new HashMap<>(); + + levelOrderStarTreeNode.put(root.dimensionValue, root); + + // Create child nodes for dimension 1 + InMemoryTreeNode dim1Node1 = new InMemoryTreeNode(); + dim1Node1.dimensionId = 1; + dim1Node1.dimensionValue = 1; + dim1Node1.startDocId = 0; + dim1Node1.endDocId = 50; + dim1Node1.childDimensionId = 2; + dim1Node1.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim1Node1.children = new HashMap<>(); + + InMemoryTreeNode dim1Node2 = new InMemoryTreeNode(); + dim1Node2.dimensionId = 1; + dim1Node2.dimensionValue = 2; + dim1Node2.startDocId = 50; + dim1Node2.endDocId = 100; + dim1Node2.childDimensionId = 2; + dim1Node2.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim1Node2.children = new HashMap<>(); + + root.children.put(1L, dim1Node1); + root.children.put(2L, dim1Node2); + + levelOrderStarTreeNode.put(dim1Node1.dimensionValue, dim1Node1); + levelOrderStarTreeNode.put(dim1Node2.dimensionValue, dim1Node2); + + // Create child nodes for dimension 2 + InMemoryTreeNode dim2Node1 = new InMemoryTreeNode(); + dim2Node1.dimensionId = 2; + dim2Node1.dimensionValue = 3; + dim2Node1.startDocId = 0; + dim2Node1.endDocId = 25; + dim2Node1.childDimensionId = -1; + dim2Node1.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node1.children = null; + + InMemoryTreeNode dim2Node2 = new InMemoryTreeNode(); + dim2Node2.dimensionId = 2; + dim2Node2.dimensionValue = 4; + dim2Node2.startDocId = 25; + dim2Node2.endDocId = 50; + dim2Node2.childDimensionId = -1; + dim2Node2.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node2.children = null; + + InMemoryTreeNode dim2Node3 = new InMemoryTreeNode(); + dim2Node3.dimensionId = 2; + dim2Node3.dimensionValue = 5; + dim2Node3.startDocId = 50; + dim2Node3.endDocId = 75; + dim2Node3.childDimensionId = -1; + dim2Node3.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node3.children = null; + + InMemoryTreeNode dim2Node4 = new InMemoryTreeNode(); + dim2Node4.dimensionId = 2; + dim2Node4.dimensionValue = 6; + dim2Node4.startDocId = 75; + dim2Node4.endDocId = 100; + dim2Node4.childDimensionId = -1; + dim2Node4.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node4.children = null; + + dim1Node1.children.put(3L, dim2Node1); + dim1Node1.children.put(4L, dim2Node2); + dim1Node2.children.put(5L, dim2Node3); + dim1Node2.children.put(6L, dim2Node4); + + levelOrderStarTreeNode.put(dim2Node1.dimensionValue, dim2Node1); + levelOrderStarTreeNode.put(dim2Node2.dimensionValue, dim2Node2); + levelOrderStarTreeNode.put(dim2Node3.dimensionValue, dim2Node3); + levelOrderStarTreeNode.put(dim2Node4.dimensionValue, dim2Node4); + + return root; + } + + public void tearDown() throws Exception { + super.tearDown(); + dataIn.close(); + dataOut.close(); + directory.close(); + } + +} diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index 6afc7c23d9e66..e07d7ef3c5a7b 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -815,6 +815,14 @@ public static long randomNonNegativeLong() { return randomLong == Long.MIN_VALUE ? 0 : Math.abs(randomLong); } + /** + * @return a int between 0 and Integer.MAX_VALUE (inclusive) chosen uniformly at random. + */ + public static int randomNonNegativeInt() { + int randomInt = randomInt(); + return randomInt == Integer.MIN_VALUE ? 0 : Math.abs(randomInt); + } + public static float randomFloat() { return random().nextFloat(); }