From 0eabaa0ef228e22437707f7f9923371ab2784bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 4 Sep 2024 09:48:40 +0100 Subject: [PATCH 01/19] storage: Add DataFields for INDELs and AltCoordinates #TASK-6765 --- .../storage/IndexFieldConfiguration.java | 1 + .../storage/core/io/bit/BitBuffer.java | 11 +- .../storage/core/io/bit/BitInputStream.java | 9 ++ .../io/bit/ExposedByteArrayOutputStream.java | 9 ++ .../storage/hadoop/app/SampleIndexMain.java | 25 +++- .../SampleIndexOnlyVariantQueryExecutor.java | 135 +++++++++++++----- .../hadoop/variant/index/core/DataField.java | 134 +++++++++++++++++ .../hadoop/variant/index/core/DataSchema.java | 131 +++++++++++++++++ .../variant/index/core/DynamicDataField.java | 16 +++ .../index/core/FixedSizeIndexSchema.java | 12 +- .../variant/index/core/IntegerDataField.java | 64 +++++++++ .../variant/index/core/VarcharDataField.java | 82 +++++++++++ ...endelianErrorSampleIndexEntryIterator.java | 21 ++- .../index/sample/FileDataIndexSchema.java | 118 +++++++++++++++ .../sample/HBaseToSampleIndexConverter.java | 49 +++++-- .../index/sample/SampleIndexDBAdaptor.java | 3 + .../index/sample/SampleIndexDBLoader.java | 9 +- .../index/sample/SampleIndexDriver.java | 23 +-- .../index/sample/SampleIndexEntry.java | 34 +++++ .../sample/SampleIndexEntryIterator.java | 14 +- .../sample/SampleIndexEntryPutBuilder.java | 41 ++++-- .../index/sample/SampleIndexSchema.java | 12 ++ .../sample/SampleIndexVariantBiConverter.java | 31 +++- .../index/sample/SampleVariantIndexEntry.java | 63 +++++--- ... => SampleVariantIndexEntryConverter.java} | 69 ++++++++- .../hadoop/variant/VariantHbaseTestUtils.java | 29 ++-- .../variant/index/core/DataSchemaTest.java | 105 ++++++++++++++ .../SampleIndexEntryPutBuilderTest.java | 22 +-- ...SampleVariantIndexEntryConverterTest.java} | 28 ++-- .../sample/SampleVariantIndexEntryTest.java | 2 +- 30 files changed, 1155 insertions(+), 147 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{VariantFileIndexConverter.java => SampleVariantIndexEntryConverter.java} (61%) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{VariantFileIndexEntryConverterTest.java => SampleVariantIndexEntryConverterTest.java} (64%) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java index f0a836b560c..12578efab7a 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java @@ -175,6 +175,7 @@ public void validate() { public enum Source { VARIANT, META, + STUDY, FILE, SAMPLE, ANNOTATION diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitBuffer.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitBuffer.java index c6e8971043f..981dfca611a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitBuffer.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitBuffer.java @@ -2,6 +2,8 @@ import org.apache.commons.lang3.StringUtils; +import java.io.ByteArrayOutputStream; + public class BitBuffer { private final byte[] buffer; @@ -35,6 +37,12 @@ public BitBuffer(byte[] buffer) { this.bitLength = buffer.length * Byte.SIZE; } + public BitBuffer(ByteArrayOutputStream stream) { + this.buffer = stream.toByteArray(); + this.bitOffset = 0; + this.bitLength = buffer.length * Byte.SIZE; + } + protected BitBuffer(byte[] buffer, int bitsOffset, int bitsLength) { this.buffer = buffer; this.bitOffset = bitsOffset; @@ -135,7 +143,7 @@ public void setByte(byte value, int bitOffset) { setBytePartial(value, bitOffset, Byte.SIZE); } - public void setBitBuffer(BitBuffer value, int bitOffset) { + public int setBitBuffer(BitBuffer value, int bitOffset) { if (value.getBitLength() + bitOffset > this.bitLength) { throw new IndexOutOfBoundsException("Bit offset request: " + bitOffset + ", bit length request: " + value.getBitLength() @@ -151,6 +159,7 @@ public void setBitBuffer(BitBuffer value, int bitOffset) { if (bits != 0) { setBytePartial(value.getBytePartial(bytes * Byte.SIZE, bits), bytes * Byte.SIZE + bitOffset, bits); } + return bitOffset + value.getBitLength(); } public void setBytePartial(byte value, int bitOffset, int length) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitInputStream.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitInputStream.java index 89e28fd755c..0178e6d414a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitInputStream.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/BitInputStream.java @@ -64,6 +64,15 @@ public int readIntPartial(int length) { return r; } + /** + * Read 32 bits (one int). + * + * @return read value + */ + public int readInt() { + return readIntPartial(Integer.SIZE); + } + /** * Read up to 8 bits (one byte). * diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/ExposedByteArrayOutputStream.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/ExposedByteArrayOutputStream.java index d94694a74b0..d8f806fee25 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/ExposedByteArrayOutputStream.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/io/bit/ExposedByteArrayOutputStream.java @@ -1,9 +1,14 @@ package org.opencb.opencga.storage.core.io.bit; import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; public class ExposedByteArrayOutputStream extends ByteArrayOutputStream { + public ExposedByteArrayOutputStream() { + super(); + } + public ExposedByteArrayOutputStream(int size) { super(size); } @@ -12,6 +17,10 @@ public byte[] getBuffer() { return buf; } + public ByteBuffer toByteByffer() { + return ByteBuffer.wrap(buf, 0, length()); + } + public int length() { return this.count; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java index 259824c7163..6a2af48a917 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java @@ -24,6 +24,7 @@ import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; import org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator; +import java.nio.ByteBuffer; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.TimeUnit; @@ -204,6 +205,7 @@ private void indexStats(SampleIndexDBAdaptor dbAdaptor, ObjectMap argsMap) throw addLength(gt, counts, countsByGt, "variants", new BitInputStream( gtEntry.getVariants(), gtEntry.getVariantsOffset(), gtEntry.getVariantsLength())); addLength(gt, counts, countsByGt, "fileIndex", gtEntry.getFileIndexStream()); + addLength(gt, counts, countsByGt, "fileDataIndex", gtEntry.getFileDataIndexBuffer()); addLength(gt, counts, countsByGt, "populationFrequencyIndex", gtEntry.getPopulationFrequencyIndexStream()); addLength(gt, counts, countsByGt, "ctBtTfIndex", gtEntry.getCtBtTfIndexStream()); addLength(gt, counts, countsByGt, "biotypeIndex", gtEntry.getBiotypeIndexStream()); @@ -230,13 +232,24 @@ private void indexStats(SampleIndexDBAdaptor dbAdaptor, ObjectMap argsMap) throw private void addLength(String gt, Map counts, Map> countsByGt, String key, BitInputStream stream) { if (stream != null) { - counts.merge(key + "_bytes", stream.getByteLength(), Integer::sum); - counts.merge(key + "_bytes_max", stream.getByteLength(), Math::max); - counts.merge(key + "_count", 1, Integer::sum); - Map gtCounts = countsByGt.computeIfAbsent(gt, k -> new TreeMap<>()); - gtCounts.merge(gt + "_" + key + "_bytes", stream.getByteLength(), Integer::sum); - gtCounts.merge(gt + "_" + key + "_count", 1, Integer::sum); + addLength(gt, counts, countsByGt, key, stream.getByteLength()); } } + private void addLength(String gt, Map counts, Map> countsByGt, + String key, ByteBuffer bb) { + if (bb != null) { + addLength(gt, counts, countsByGt, key, bb.limit()); + } + } + + private void addLength(String gt, Map counts, Map> countsByGt, + String key, int byteLength) { + counts.merge(key + "_bytes", byteLength, Integer::sum); + counts.merge(key + "_bytes_max", byteLength, Math::max); + counts.merge(key + "_count", 1, Integer::sum); + Map gtCounts = countsByGt.computeIfAbsent(gt, k -> new TreeMap<>()); + gtCounts.merge(gt + "_" + key + "_bytes", byteLength, Integer::sum); + gtCounts.merge(gt + "_" + key + "_count", 1, Integer::sum); + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 371d57ac4da..dd7a46d044e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -6,7 +6,9 @@ import org.apache.commons.lang3.time.StopWatch; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.AlternateCoordinate; import org.opencb.biodata.models.variant.avro.FileEntry; +import org.opencb.biodata.models.variant.avro.OriginalCall; import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.biodata.tools.commons.Converter; import org.opencb.commons.datastore.core.ObjectMap; @@ -42,6 +44,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.*; import java.util.concurrent.*; import java.util.function.BiConsumer; @@ -276,17 +279,18 @@ enum FamilyRole { } private final boolean includeStudy; - private String studyName; - private List familyRoleOrder; + private final boolean includeFiles; + private final boolean includeAll; + private final String studyName; + private final List familyRoleOrder; private String sampleName; private String motherName; private String fatherName; - private LinkedHashMap samplesPosition; - private List sampleFiles; - private IndexField filterField; - private IndexField qualField; + private final LinkedHashMap samplesPosition; + private final List sampleFiles; + private final IndexField filterField; + private final IndexField qualField; private final SampleIndexSchema schema; - private final boolean includeAll; SampleVariantIndexEntryToVariantConverter(ParsedVariantQuery parseQuery, SampleIndexQuery sampleIndexQuery, @@ -298,6 +302,8 @@ enum FamilyRole { includeStudy = !projection.getStudyIds().isEmpty(); if (includeStudy) { int studyId = projection.getStudyIds().get(0); // only one study + // force includeFiles if "includeAll" + includeFiles = includeAll || !projection.getStudy(studyId).getFiles().isEmpty(); VariantQueryProjection.StudyVariantQueryProjection projectionStudy = projection.getStudy(studyId); studyName = projectionStudy.getStudyMetadata().getName(); @@ -343,7 +349,7 @@ enum FamilyRole { this.fatherName = null; } - if (includeAll) { + if (includeFiles) { if (sampleMetadata == null) { sampleMetadata = metadataManager.getSampleMetadata(studyId, sampleId); } @@ -363,11 +369,27 @@ enum FamilyRole { sampleFiles = Collections.singletonList(fileName); } } + } else { + sampleFiles = null; + } + + if (includeAll) { filterField = schema.getFileIndex() .getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER); qualField = schema.getFileIndex() .getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL); + } else { + filterField = null; + qualField = null; } + } else { + samplesPosition = null; + sampleFiles = null; + includeFiles = false; + studyName = null; + filterField = null; + qualField = null; + familyRoleOrder = null; } } @@ -400,29 +422,60 @@ public Variant convert(SampleVariantIndexEntry entry) { throw new IllegalStateException("Unexpected value: " + role); } } - if (includeAll) { - HashMap fileAttributes = new HashMap<>(); - for (BitBuffer fileIndexBitBuffer : entry.getFilesIndex()) { - String filter = filterField.readAndDecode(fileIndexBitBuffer); - if (filter == null) { - filter = "NA"; + List> allAlternateCoordinates = new ArrayList<>(); + HashMap fileAttributes = new HashMap<>(); + Iterator fileDataIterator = entry.getFileData().iterator(); + for (BitBuffer fileIndexBitBuffer : entry.getFilesIndex()) { + ByteBuffer fileDataBitBuffer = fileDataIterator.next(); + + if (includeFiles) { + if (includeAll) { + String filter = filterField.readAndDecode(fileIndexBitBuffer); + if (filter == null) { + filter = "NA"; + } + fileAttributes.put(StudyEntry.FILTER, filter); + String qual = qualField.readAndDecode(fileIndexBitBuffer); + if (qual == null) { + qual = "NA"; + } + fileAttributes.put(StudyEntry.QUAL, qual); } - fileAttributes.put(StudyEntry.FILTER, filter); - String qual = qualField.readAndDecode(fileIndexBitBuffer); - if (qual == null) { - qual = "NA"; + OriginalCall call = null; + if (schema.getFileData().isIncludeOriginalCall()) { + call = schema.getFileData().readOriginalCall(fileDataBitBuffer); + } + if (schema.getFileData().isIncludeSecondaryAlternates()) { + allAlternateCoordinates.add(schema.getFileData().readSecondaryAlternates(fileDataBitBuffer)); } - fileAttributes.put(StudyEntry.QUAL, qual); - Integer idx = schema.getFileIndex().getFilePositionIndex().readAndDecode(fileIndexBitBuffer); String fileName = sampleFiles.get(idx); studyEntry.setFiles(new ArrayList<>()); - studyEntry.getFiles().add(new FileEntry(fileName, null, fileAttributes)); + studyEntry.getFiles().add(new FileEntry(fileName, call, fileAttributes)); if (sampleEntry != null) { sampleEntry.setFileIndex(0); } } } + + if (allAlternateCoordinates.size() == 1) { + studyEntry.setSecondaryAlternates(allAlternateCoordinates.get(0)); + } else if (allAlternateCoordinates.size() > 1) { + List alternateCoordinates = allAlternateCoordinates.get(0); + boolean allSame = true; + for (int i = 1; i < allAlternateCoordinates.size(); i++) { + List thisAltCoord = allAlternateCoordinates.get(i); + if (!thisAltCoord.equals(alternateCoordinates)) { + allSame = false; + break; + } + } + if (allSame) { + studyEntry.setSecondaryAlternates(alternateCoordinates); + } else { + logger.warn("Multiple conflicting alternates from different files!"); + } + } studyEntry.setSortedSamplesPosition(samplesPosition); v.setStudies(Collections.singletonList(studyEntry)); } @@ -483,17 +536,23 @@ public List apply(List variants) { List indels = new ArrayList<>(); for (Variant variant : variants) { boolean secAlt = false; - for (SampleEntry sample : variant.getStudies().get(0).getSamples()) { - if (GenotypeClass.SEC.test(sample.getData().get(0))) { - secAlt = true; - break; + StudyEntry studyEntry = variant.getStudies().get(0); + if (studyEntry.getSecondaryAlternates() == null || studyEntry.getSecondaryAlternates().isEmpty()) { + for (SampleEntry sample : studyEntry.getSamples()) { + if (GenotypeClass.SEC.test(sample.getData().get(0))) { + secAlt = true; + break; + } } } if (secAlt) { multiAllelic.add(variant); } else { if (variant.getLengthReference() == 0 || variant.getLengthAlternate() == 0) { - indels.add(variant); + if (studyEntry.getFiles().isEmpty() || studyEntry.getFiles().get(0).getCall() == null) { + // Missing call. + indels.add(variant); + } } } } @@ -512,19 +571,21 @@ public List apply(List variants) { } } - StopWatch stopWatch = StopWatch.createStarted(); - for (Future future : futures) { - try { - // Should end in few seconds - future.get(90, TimeUnit.SECONDS); - } catch (InterruptedException | ExecutionException | TimeoutException e) { - throw new VariantQueryException("Error fetching extra data", e); + if (!futures.isEmpty()) { + StopWatch stopWatch = StopWatch.createStarted(); + for (Future future : futures) { + try { + // Should end in few seconds + future.get(90, TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + throw new VariantQueryException("Error fetching extra data", e); + } } + logger.info("Fetch {} ({} multi-allelic and {} indels) partial variants in {} in {} threads", + multiAllelic.size() + indels.size(), multiAllelic.size(), indels.size(), + TimeUtils.durationToString(stopWatch), + futures.size()); } - logger.info("Fetch {} partial variants in {} in {} threads", - multiAllelic.size() + indels.size(), - TimeUtils.durationToString(stopWatch), - futures.size()); return variants; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java new file mode 100644 index 00000000000..836270ebb86 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java @@ -0,0 +1,134 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.function.Function; + +/** + * Field of the DataSchema. + * Similar to {@link IndexField}, but for the DataSchema. + * This field does not allow filters. + *

+ * This class is used to read and write the data of the DataSchema. + * The ByteBuffer contains a set of entries, each entry contains a set of fields. + * @param + */ +public abstract class DataField { + + private final IndexFieldConfiguration configuration; + private int fieldPosition; + + public DataField(IndexFieldConfiguration configuration) { + this(configuration, -1); + } + + public DataField(IndexFieldConfiguration configuration, int fieldPosition) { + this.configuration = configuration; + this.fieldPosition = fieldPosition; + } + + void setFieldPosition(int fieldPosition) { + this.fieldPosition = fieldPosition; + } + + public String getId() { + return configuration.getId(); + } + + public IndexFieldConfiguration.Source getSource() { + return configuration.getSource(); + } + + public String getKey() { + return configuration.getKey(); + } + + public IndexFieldConfiguration getConfiguration() { + return configuration; + } + + public IndexFieldConfiguration.Type getType() { + return configuration.getType(); + } + + public int getFieldPosition() { + return fieldPosition; + } + + public void move(ByteBuffer bb) { + read(bb); + } + + public abstract ByteBuffer read(ByteBuffer bb); + + public T readAndDecode(ByteBuffer bb) { + return decode(read(bb)); + } + + public ByteBuffer write(T value) { + ByteBuffer buffer = ByteBuffer.allocate(getByteLength(value)); + write(value, buffer); + return buffer; + } + + public abstract int getByteLength(T value); + + public abstract void write(T value, ByteBuffer buffer); + + public abstract void write(T value, ByteArrayOutputStream stream); + + public abstract ByteBuffer encode(T value); + + public abstract T decode(ByteBuffer code); + + public DataField from(Function converter, Function deconverter) { + return new DataField(configuration, fieldPosition) { + + @Override + public void move(ByteBuffer bb) { + DataField.this.move(bb); + } + + @Override + public ByteBuffer read(ByteBuffer bb) { + return DataField.this.read(bb); + } + + @Override + public int getByteLength(R value) { + return DataField.this.getByteLength(converter.apply(value)); + } + + @Override + public void write(R value, ByteBuffer buffer) { + DataField.this.write(converter.apply(value), buffer); + } + + @Override + public void write(R value, ByteArrayOutputStream stream) { + DataField.this.write(converter.apply(value), stream); + } + + @Override + public ByteBuffer encode(R value) { + return DataField.this.encode(converter.apply(value)); + } + + @Override + public R decode(ByteBuffer code) { + return deconverter.apply(DataField.this.decode(code)); + } + }; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("IndexField{"); + sb.append("configuration=").append(configuration); + sb.append(", valuePosition=").append(fieldPosition); + sb.append('}'); + return sb.toString(); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java new file mode 100644 index 00000000000..b0132cf1adc --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java @@ -0,0 +1,131 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +/** + * Data schema. + * This class contains the schema of the data stored in the index. The schema is defined by a set of fields. + *

+ * The generated data is stored in a ByteBuffer, and this class is used to read and write the data. + * The ByteBuffer contains a set of entries, each entry contains a set of fields. + *

+ * The fields of each entry are stored in the same order as they are added to the schema. + *

+ * - ByteBuffer + * - Entry 1 + * - Entry length + * - Field 1 + * - ... + * - Field n + * - ... + * - Entry n + */ +public abstract class DataSchema { + + private List> fields; + protected final DataField entryLengthField; + +// private boolean sparse = false; + + public DataSchema() { + fields = new ArrayList<>(); + entryLengthField = new IntegerDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "ENTRY_LENGTH", null)); + fields.add(entryLengthField); + } + + protected void addField(DataField field) { + field.setFieldPosition(fields.size()); + fields.add(field); + } + +// public boolean isSparse() { +// return sparse; +// } + + public DataField getField(IndexFieldConfiguration.Source source, String key) { + return fields.stream().filter(i -> i.getSource() == source && i.getKey().equals(key)).findFirst().orElse(null); + } + + public List> getFields() { + return fields; + } + + public void writeEntry(ByteBuffer buffer, ByteBuffer entryBuffer) { + int entryLength = entryBuffer.limit(); + entryLengthField.write(entryLength, buffer); + buffer.put(entryBuffer.array(), buffer.arrayOffset(), entryLength); + } + + public void writeEntry(ByteArrayOutputStream stream, ByteBuffer entryBuffer) { + int entryLength = entryBuffer.limit(); + entryLengthField.write(entryLength, stream); + stream.write(entryBuffer.array(), entryBuffer.arrayOffset(), entryLength); + } + + public ByteBuffer readEntry(ByteBuffer buffer, int entryPosition) { + try { + buffer.rewind(); + for (int i = 0; i < entryPosition; i++) { + if (!buffer.hasRemaining()) { + return ByteBuffer.allocate(0); + } + int entryLength = entryLengthField.readAndDecode(buffer); + buffer.position(buffer.position() + entryLength); + } + return readNextEntry(buffer); + } catch (Exception e) { + throw e; + } + } + + public ByteBuffer readNextEntry(ByteBuffer buffer) { + try { + if (!buffer.hasRemaining()) { + return ByteBuffer.allocate(0); + } + int elementSize = entryLengthField.readAndDecode(buffer); + ByteBuffer elementBuffer = ByteBuffer.allocate(elementSize); + buffer.get(elementBuffer.array(), elementBuffer.arrayOffset(), elementSize); + elementBuffer.rewind(); + return elementBuffer; + } catch (Exception e) { + throw e; + } + } + + public ByteBuffer readField(ByteBuffer buffer, int fieldPosition) { + buffer.rewind(); + for (DataField field : fields) { + if (field == entryLengthField) { + // Skip entry length field + continue; + } else if (field.getFieldPosition() == fieldPosition) { + return field.read(buffer); + } else { + field.move(buffer); + } + } + throw new IllegalArgumentException("Unknown field position " + fieldPosition); + } + + public T readField(ByteBuffer buffer, DataField field) { + buffer.rewind(); + for (DataField thisField : fields) { + if (thisField == entryLengthField) { + // Skip entry length field + continue; + } else if (thisField == field) { + return field.readAndDecode(buffer); + } else { + thisField.move(buffer); + } + } + throw new IllegalArgumentException("Unknown field " + field); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java new file mode 100644 index 00000000000..dfa7c336050 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java @@ -0,0 +1,16 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +/** + * Data field with dynamic length. + * @param + */ +public abstract class DynamicDataField extends DataField { + + protected static final byte FIELD_SEPARATOR = (byte) 0; + + public DynamicDataField(IndexFieldConfiguration configuration, int valuePosition) { + super(configuration, valuePosition); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java index 1b2592caec9..0866a687797 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; import org.opencb.opencga.storage.core.io.bit.BitBuffer; +import org.opencb.opencga.storage.core.io.bit.BitInputStream; import java.util.List; @@ -34,10 +35,19 @@ public int getBitsLength() { * @param i element position * @return BitBuffer containing all fields from the index. */ - public BitBuffer read(BitBuffer buffer, int i) { + public BitBuffer readEntry(BitBuffer buffer, int i) { return buffer.getBitBuffer(i * indexSizeBits, indexSizeBits); } + /** + * Read next index element from the bit input stream. + * @param stream BitBuffer + * @return BitBuffer containing all fields from the index. + */ + public BitBuffer readEntry(BitInputStream stream) { + return stream.readBitBuffer(getBitsLength()); + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("FixedSizeIndexSchema{"); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java new file mode 100644 index 00000000000..7d43f6eba4c --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java @@ -0,0 +1,64 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.apache.hadoop.hbase.util.Bytes; +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; + +public class IntegerDataField extends DataField { + + public IntegerDataField(IndexFieldConfiguration configuration) { + super(configuration); + } + + @Override + public void move(ByteBuffer bb) { + bb.position(bb.position() + Integer.BYTES); + } + + @Override + public ByteBuffer read(ByteBuffer bb) { + ByteBuffer read = bb.slice(); + read.limit(Integer.BYTES); + move(bb); + return read; + } + + @Override + public Integer readAndDecode(ByteBuffer bb) { + return bb.getInt(); + } + + @Override + public int getByteLength(Integer value) { + return Integer.BYTES; + } + + @Override + public void write(Integer value, ByteBuffer buffer) { + buffer.putInt(value); + } + + @Override + public void write(Integer value, ByteArrayOutputStream stream) { + try { + stream.write(Bytes.toBytes(value)); + } catch (IOException e) { + // This should never happen + throw new UncheckedIOException(e); + } + } + + @Override + public ByteBuffer encode(Integer value) { + return ByteBuffer.allocate(4).putInt(value); + } + + @Override + public Integer decode(ByteBuffer code) { + return code.getInt(); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java new file mode 100644 index 00000000000..c213d37e1d8 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java @@ -0,0 +1,82 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; + +/** + * Varchar data field. + * Read until the FIELD_SEPARATOR. + */ +public class VarcharDataField extends DynamicDataField { + + public VarcharDataField(IndexFieldConfiguration configuration) { + super(configuration, -1); + } + + @Override + public void move(ByteBuffer bb) { + while (bb.hasRemaining() && bb.get() != FIELD_SEPARATOR) { + continue; + } + } + + @Override + public ByteBuffer read(ByteBuffer bb) { + bb.mark(); + int length = 0; + while (bb.hasRemaining() && bb.get() != FIELD_SEPARATOR) { + length++; + } + bb.reset(); + ByteBuffer read = (ByteBuffer) bb.slice().limit(length); + // move buffer + bb.position(bb.position() + length + 1); // move one extra to skip separator + return read; + } + + @Override + public int getByteLength(String value) { + int length = value == null ? 0 : value.length(); + // +1 for the FIELD_SEPARATOR + return length + 1; + } + + @Override + public void write(String value, ByteBuffer buffer) { + if (value != null) { + buffer.put(value.getBytes(StandardCharsets.UTF_8)); + } + buffer.put(FIELD_SEPARATOR); + } + + @Override + public void write(String value, ByteArrayOutputStream stream) { + try { + if (value != null) { + stream.write(value.getBytes(StandardCharsets.UTF_8)); + } + stream.write(FIELD_SEPARATOR); + } catch (IOException e) { + // This should never happen + throw new UncheckedIOException(e); + } + } + + @Override + public ByteBuffer encode(String value) { + if (value == null) { + return ByteBuffer.allocate(0); + } + return ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8)); + } + + @Override + public String decode(ByteBuffer code) { + return new String(code.array(), code.arrayOffset(), code.limit(), StandardCharsets.UTF_8); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java index 72d869b7ef1..4c3f6799d80 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java @@ -6,6 +6,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; import org.opencb.opencga.storage.hadoop.variant.index.sample.*; +import java.nio.ByteBuffer; import java.util.*; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexVariantBiConverter.split; @@ -52,11 +53,22 @@ public boolean hasFileIndex() { return it != null && it.hasFileIndex(); } + @Override + public boolean hasFileDataIndex() { + SampleIndexEntryIterator it = getGtIterator(); + return it != null && it.hasFileDataIndex(); + } + @Override public BitBuffer nextFileIndexEntry() { return getGtIterator().nextFileIndexEntry(); } + @Override + public ByteBuffer getFileDataEntry() { + return getGtIterator().getFileDataEntry(); + } + @Override public boolean isMultiFileIndex() { return getGtIterator().isMultiFileIndex(); @@ -132,10 +144,17 @@ public Variant nextVariant() { public SampleVariantIndexEntry nextSampleVariantIndexEntry() { AnnotationIndexEntry annotationIndexEntry = nextAnnotationIndexEntry(); List filesIndex = new ArrayList<>(); + List filesDataIndex = new ArrayList<>(); if (hasFileIndex()) { filesIndex.add(nextFileIndexEntry()); + if (hasFileDataIndex()) { + filesDataIndex.add(getFileDataEntry()); + } while (isMultiFileIndex()) { filesIndex.add(nextMultiFileIndexEntry()); + if (hasFileDataIndex()) { + filesDataIndex.add(getFileDataEntry()); + } } } String genotype = nextGenotype(); @@ -145,7 +164,7 @@ public SampleVariantIndexEntry nextSampleVariantIndexEntry() { parentsCode = nextParentsIndexEntry(); } Variant variant = next(); - return new SampleVariantIndexEntry(variant, filesIndex, genotype, annotationIndexEntry, parentsCode, meCode); + return new SampleVariantIndexEntry(variant, filesIndex, filesDataIndex, genotype, annotationIndexEntry, parentsCode, meCode); } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java new file mode 100644 index 00000000000..91b6540c79b --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java @@ -0,0 +1,118 @@ +package org.opencb.opencga.storage.hadoop.variant.index.sample; + +import org.opencb.biodata.models.variant.VariantBuilder; +import org.opencb.biodata.models.variant.avro.AlternateCoordinate; +import org.opencb.biodata.models.variant.avro.OriginalCall; +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; +import org.opencb.opencga.storage.hadoop.variant.index.core.DataField; +import org.opencb.opencga.storage.hadoop.variant.index.core.DataSchema; +import org.opencb.opencga.storage.hadoop.variant.index.core.VarcharDataField; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class FileDataIndexSchema extends DataSchema { + + private final DataField originalCallField; + private final DataField> secondaryAlternatesField; + private boolean includeOriginalCall = true; + private boolean includeSecondaryAlternates = true; + + public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileIndexConfiguration) { + if (includeOriginalCall) { + originalCallField = new VarcharDataField( + new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "ORIGINAL_CALL", null)) + .from( + (OriginalCall oc) -> oc == null ? null : oc.getVariantId() + "+" + oc.getAlleleIndex(), + (String s) -> { + if (s == null || s.isEmpty()) { + return null; + } else { + String[] split = s.split("\\+", 2); + return new OriginalCall(split[0], Integer.parseInt(split[1])); + } + }); + addField(originalCallField); + } else { + originalCallField = null; + } + if (includeSecondaryAlternates) { + secondaryAlternatesField = new VarcharDataField( + new IndexFieldConfiguration(IndexFieldConfiguration.Source.STUDY, "SECONDARY_ALTERNATES", null)) + .from((List secondaryAlternates) -> { + if (secondaryAlternates == null || secondaryAlternates.isEmpty()) { + return ""; + } + boolean needsSeparator = false; + StringBuilder sb = new StringBuilder(); + for (AlternateCoordinate alternate : secondaryAlternates) { + if (needsSeparator) { + sb.append(','); + } + sb.append(alternate.getChromosome()); + sb.append("+"); + sb.append(alternate.getStart()); + sb.append("+"); + sb.append(alternate.getEnd()); + sb.append("+"); + sb.append(alternate.getReference()); + sb.append("+"); + sb.append(alternate.getAlternate()); + needsSeparator = true; + } + + return sb.toString(); + }, (String s) -> { + if (s == null || s.isEmpty()) { + return Collections.emptyList(); + } + String[] split = s.split(","); + List alternates = new ArrayList<>(split.length); + for (String alt : split) { + String[] altSplit = alt.split("\\+", 5); + String alternate = altSplit.length == 5 ? altSplit[4] : ""; + alternates.add(new AlternateCoordinate( + altSplit[0], + Integer.parseInt(altSplit[1]), + Integer.parseInt(altSplit[2]), + altSplit[3], + alternate, + VariantBuilder.inferType(altSplit[3], alternate) + )); + } + return alternates; + }); + addField(secondaryAlternatesField); + } else { + secondaryAlternatesField = null; + } + } + + public boolean isIncludeOriginalCall() { + return includeOriginalCall; + } + + public DataField getOriginalCallField() { + return originalCallField; + } + + public boolean isIncludeSecondaryAlternates() { + return includeSecondaryAlternates; + } + + public DataField> getSecondaryAlternatesField() { + return secondaryAlternatesField; + } + + public OriginalCall readOriginalCall(ByteBuffer fileDataBitBuffer) { + return readField(fileDataBitBuffer, originalCallField); + } + + public List readSecondaryAlternates(ByteBuffer fileDataBitBuffer) { + return readField(fileDataBitBuffer, secondaryAlternatesField); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java index f6fea5173eb..f6d9fd64638 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java @@ -11,6 +11,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.IndexUtils; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexEntry.SampleIndexGtEntry; +import java.nio.ByteBuffer; import java.util.*; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema.*; @@ -26,12 +27,14 @@ public class HBaseToSampleIndexConverter implements Converter> convertToMapSampleVariantIn Map> mapVariantFileIndex = new HashMap<>(); SampleVariantIndexEntry.SampleVariantIndexEntryComparator comparator = new SampleVariantIndexEntry.SampleVariantIndexEntryComparator(schema); + Map fileIndexMap = new HashMap<>(); + Map fileDataMap = new HashMap<>(); for (Cell cell : result.rawCells()) { if (columnStartsWith(cell, FILE_PREFIX_BYTES)) { String gt = SampleIndexSchema.getGt(cell, FILE_PREFIX_BYTES); - TreeSet values = new TreeSet<>(comparator); - mapVariantFileIndex.put(gt, values); BitInputStream bis = new BitInputStream( cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); - for (Variant variant : map.get(gt)) { - BitBuffer fileIndex; - do { - fileIndex = bis.readBitBuffer(this.fileIndex.getBitsLength()); - values.add(new SampleVariantIndexEntry(variant, fileIndex)); - } while (this.fileIndex.isMultiFile(fileIndex)); - } + fileIndexMap.put(gt, bis); + } else if (columnStartsWith(cell, FILE_DATA_PREFIX_BYTES)) { + String gt = SampleIndexSchema.getGt(cell, FILE_DATA_PREFIX_BYTES); + // Slice the buffer. + // The wrap buffer contains the whole array, where the position is the offset. + // The position might be set to 0 by `.reset()` method, which would allow reading data before offset. + ByteBuffer byteBuffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()).slice(); + fileDataMap.put(gt, byteBuffer); + } + } + for (Map.Entry entry : fileIndexMap.entrySet()) { + String gt = entry.getKey(); + TreeSet values = new TreeSet<>(comparator); + mapVariantFileIndex.put(gt, values); + + BitInputStream fileIndexStream = entry.getValue(); + ByteBuffer fileDataBuffer = fileDataMap.get(gt); + + for (Variant variant : map.get(gt)) { + BitBuffer fileIndexEntry; + do { + fileIndexEntry = fileIndexSchema.readEntry(fileIndexStream); + ByteBuffer fileDataEntry = fileDataSchema.readNextEntry(fileDataBuffer); + values.add(new SampleVariantIndexEntry(variant, fileIndexEntry, fileDataEntry)); + } while (this.fileIndexSchema.isMultiFile(fileIndexEntry)); } } return mapVariantFileIndex; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 06a3ca79b9a..431d277bcd9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -621,6 +621,9 @@ private Scan parse(SingleSampleIndexQuery query, LocusQuery locusQuery, boolean if (includeAll || !query.emptyFileIndex()) { scan.addColumn(family, SampleIndexSchema.toFileIndexColumn(gt)); } + if (includeAll) { + scan.addColumn(family, SampleIndexSchema.toFileDataColumn(gt)); + } if (includeAll || query.isIncludeParentColumns() || query.hasFatherFilter() || query.hasMotherFilter() || query.getMendelianErrorType() != null) { scan.addColumn(family, SampleIndexSchema.toParentsGTColumn(gt)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java index f2700831e94..584d1952295 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java @@ -9,7 +9,6 @@ import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.YesNoAuto; -import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.variant.VariantStorageEngine.SplitData; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; @@ -43,7 +42,7 @@ public class SampleIndexDBLoader extends AbstractHBaseDataWriter { @@ -204,8 +203,8 @@ protected List convert(List variants) { if (validVariant(variant) && validGenotype(gt)) { genotypes.add(gt); Chunk chunk = buffer.computeIfAbsent(indexChunk, Chunk::new); - BitBuffer fileIndexValue = variantFileIndexConverter.createFileIndexValue(sampleIdx, fileIdxMap[sampleIdx], variant); - SampleVariantIndexEntry indexEntry = new SampleVariantIndexEntry(variant, fileIndexValue); + SampleVariantIndexEntry indexEntry = sampleVariantIndexEntryConverter + .createSampleVariantIndexEntry(sampleIdx, fileIdxMap[sampleIdx], variant); chunk.addVariant(sampleIdx, gt, indexEntry); } sampleIdx++; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java index 7b51d99fb85..ce25d55819b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java @@ -341,7 +341,7 @@ public static class SampleIndexerMapper extends VariantTableSampleIndexOrderMapp private static final String HAS_GENOTYPE = "SampleIndexerMapper.hasGenotype"; public static final int SAMPLES_TO_COUNT = 2; private Set samplesToCount; - private VariantFileIndexConverter fileIndexConverter; + private SampleVariantIndexEntryConverter sampleVariantIndexEntryConverter; private Map fixedAttributesPositions; private Map sampleDataKeyPositions; private final Map samples = new HashMap<>(); @@ -361,7 +361,7 @@ protected void setup(Context context) throws IOException, InterruptedException { hasGenotype = context.getConfiguration().getBoolean(HAS_GENOTYPE, true); schema = VariantMapReduceUtil.getSampleIndexSchema(context.getConfiguration()); studyId = context.getConfiguration().getInt(STUDY_ID, -1); - fileIndexConverter = new VariantFileIndexConverter(schema); + sampleVariantIndexEntryConverter = new SampleVariantIndexEntryConverter(schema); progressLogger = new ProgressLogger("Processing variants").setBatchSize(10000); int[] sampleIds = context.getConfiguration().getInts(SAMPLES); @@ -421,7 +421,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t progressLogger.increment(1, () -> "up to variant " + variant); // Get fileIndex for each file - Map fileIndexMap = new HashMap<>(); + Map sampleIndexentryMap = new HashMap<>(); variantRow.forEachFile(fileColumn -> { if ((partialScan && !this.files.contains(fileColumn.getFileId())) || fileColumn.getStudyId() != studyId) { @@ -432,7 +432,10 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t // Map fileAttributes = HBaseToStudyEntryConverter.convertFileAttributes(fileColumn.raw(), // fixedAttributes, includeAttributes); - BitBuffer fileIndexValue = fileIndexConverter.createFileIndexValue(variant.getType(), 0, + SampleVariantIndexEntry indexEntry = sampleVariantIndexEntryConverter.createSampleVariantIndexEntry(0, + variant, + fileColumn.getCall(), + fileColumn.getSecondaryAlternates(), (k) -> { if (k.equals(StudyEntry.QUAL)) { return fileColumn.getQualString(); @@ -449,7 +452,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t }, (k) -> null); - fileIndexMap.put(fileColumn.getFileId(), fileIndexValue); + sampleIndexentryMap.put(fileColumn.getFileId(), indexEntry); }); variantRow.forEachSample(sampleColumn -> { @@ -497,17 +500,17 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t // Add fileIndex value for this genotype boolean fileFound = false; for (Integer fileId : files) { - BitBuffer fileIndex = fileIndexMap.get(fileId); - if (fileIndex != null) { + SampleVariantIndexEntry indexEntry = sampleIndexentryMap.get(fileId); + if (indexEntry != null) { fileFound = true; // Copy bit buffer - BitBuffer sampleFileIndex = new BitBuffer(fileIndex); - fileIndexConverter.addSampleDataIndexValues(sampleFileIndex, sampleDataKeyPositions, + BitBuffer sampleFileIndex = new BitBuffer(indexEntry.getFilesIndex().get(0)); + sampleVariantIndexEntryConverter.addSampleDataIndexValues(sampleFileIndex, sampleDataKeyPositions, sampleColumn::getSampleData); if (filePosition >= 0) { schema.getFileIndex().getFilePositionIndex().write(filePosition, sampleFileIndex); } - builder.add(gt, new SampleVariantIndexEntry(variant, sampleFileIndex)); + builder.add(gt, new SampleVariantIndexEntry(variant, sampleFileIndex, indexEntry.getFileData().get(0))); countSampleGt(context, sampleId, gt); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntry.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntry.java index f13ed8b8a33..fe7bfc6b63b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntry.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntry.java @@ -5,6 +5,7 @@ import org.opencb.opencga.storage.core.io.bit.BitInputStream; import org.opencb.opencga.storage.hadoop.variant.index.IndexUtils; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -166,6 +167,10 @@ public class SampleIndexGtEntry { private int fileIndexOffset; private int fileIndexLength; + private byte[] fileData; + private int fileDataOffset; + private int fileDataLength; + private byte[] annotationIndex; private int annotationIndexOffset; private int annotationIndexLength; @@ -289,6 +294,33 @@ public SampleIndexGtEntry setFileIndex(byte[] fileIndex, int offset, int length) return this; } + public byte[] getFileData() { + return fileData; + } + + public ByteBuffer getFileDataIndexBuffer() { + // Slice the buffer. + // The wrap buffer contains the whole array, where the position is the offset. + // The position might be set to 0 by `.reset()` method, which would allow reading data before offset. + return fileData == null ? null : ByteBuffer.wrap(fileData, fileDataOffset, fileDataLength) + .slice(); + } + + public int getFileDataOffset() { + return fileDataOffset; + } + + public int getFileDataLength() { + return fileDataLength; + } + + public SampleIndexGtEntry setFileDataIndex(byte[] fileDataIndex, int offset, int length) { + this.fileData = fileDataIndex; + this.fileDataOffset = offset; + this.fileDataLength = length; + return this; + } + public byte[] getAnnotationIndex() { return annotationIndex; } @@ -480,6 +512,8 @@ public String toString() { : Bytes.toStringBinary(variants, variantsOffset, variantsLength)); sb.append(", fileIndex=").append(fileIndex == null ? "null" : Bytes.toStringBinary(fileIndex, fileIndexOffset, fileIndexLength)); + sb.append(", fileData=").append(fileData == null ? "null" + : Bytes.toStringBinary(fileData, fileDataOffset, fileDataLength)); sb.append(", annotationIndex=").append(annotationIndex == null ? "null" : Bytes.toStringBinary(annotationIndex, annotationIndexOffset, annotationIndexLength)); sb.append(", annotationIndexLength=").append(annotationIndexLength); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java index db96af41abd..ae4033a78cc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java @@ -4,6 +4,7 @@ import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -45,10 +46,17 @@ default SampleVariantIndexEntry nextSampleVariantIndexEntry() { annotationIndexEntry = new AnnotationIndexEntry(annotationIndexEntry); } List filesIndex = new ArrayList<>(); + List filesData = new ArrayList<>(); if (hasFileIndex()) { filesIndex.add(nextFileIndexEntry()); + if (hasFileDataIndex()) { + filesData.add(getFileDataEntry()); + } while (isMultiFileIndex()) { filesIndex.add(nextMultiFileIndexEntry()); + if (hasFileDataIndex()) { + filesData.add(getFileDataEntry()); + } } } Byte parentsCode = null; @@ -57,7 +65,7 @@ default SampleVariantIndexEntry nextSampleVariantIndexEntry() { } String genotype = nextGenotype(); Variant variant = next(); - return new SampleVariantIndexEntry(variant, filesIndex, genotype, annotationIndexEntry, parentsCode, null); + return new SampleVariantIndexEntry(variant, filesIndex, filesData, genotype, annotationIndexEntry, parentsCode, null); } /** @@ -81,6 +89,8 @@ default SampleVariantIndexEntry nextSampleVariantIndexEntry() { boolean hasFileIndex(); + boolean hasFileDataIndex(); + boolean isMultiFileIndex(); /** @@ -88,6 +98,8 @@ default SampleVariantIndexEntry nextSampleVariantIndexEntry() { */ BitBuffer nextFileIndexEntry(); + ByteBuffer getFileDataEntry(); + BitBuffer nextMultiFileIndexEntry(); boolean hasParentsIndex(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java index e4aeefc29f0..e62c546120f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java @@ -5,8 +5,10 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.io.bit.BitOutputStream; +import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; +import java.nio.ByteBuffer; import java.util.*; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema.INTRA_CHROMOSOME_VARIANT_COMPARATOR; @@ -22,7 +24,8 @@ public class SampleIndexEntryPutBuilder { private final SampleIndexVariantBiConverter variantConverter; private static final byte[] COLUMN_FAMILY = GenomeHelper.COLUMN_FAMILY_BYTES; private final SampleIndexSchema schema; - private final FileIndexSchema fileIndex; + private final FileIndexSchema fileIndexSchema; + private final FileDataIndexSchema fileDataIndexSchema; private final SampleVariantIndexEntry.SampleVariantIndexEntryComparator comparator; private final boolean orderedInput; private final boolean multiFileSample; @@ -54,7 +57,8 @@ public SampleIndexEntryPutBuilder(int sampleId, String chromosome, int position, gts = new HashMap<>(); variantConverter = new SampleIndexVariantBiConverter(schema); this.schema = schema; - fileIndex = this.schema.getFileIndex(); + fileIndexSchema = this.schema.getFileIndex(); + fileDataIndexSchema = this.schema.getFileData(); comparator = new SampleVariantIndexEntry.SampleVariantIndexEntryComparator(schema); } @@ -147,7 +151,11 @@ public String getGt() { public void build(Put put) { Collection gtEntries = getEntries(); - BitBuffer fileIndexBuffer = new BitBuffer(fileIndex.getBitsLength() * gtEntries.size()); + BitBuffer fileIndexBuffer = new BitBuffer(fileIndexSchema.getBitsLength() * gtEntries.size()); + ByteBuffer fileDataIndexBuffer = ByteBuffer.allocate(gtEntries.stream() + .mapToInt(SampleVariantIndexEntry::getFileDataIndexBytes) + .map(i -> i + 4) + .sum()); int offset = 0; SampleVariantIndexEntry prev = null; @@ -158,10 +166,12 @@ public void build(Put put) { variants.add(variant); } else { // Mark previous variant as MultiFile - fileIndex.setMultiFile(fileIndexBuffer, offset - fileIndex.getBitsLength()); + fileIndexSchema.setMultiFile(fileIndexBuffer, offset - fileIndexSchema.getBitsLength()); + } + offset = fileIndexBuffer.setBitBuffer(gtEntry.getFileIndex(), offset); + if (!gtEntry.getFileData().isEmpty()) { + fileDataIndexSchema.writeEntry(fileDataIndexBuffer, gtEntry.getFileData().get(0)); } - fileIndexBuffer.setBitBuffer(gtEntry.getFileIndex(), offset); - offset += fileIndex.getBitsLength(); prev = gtEntry; } @@ -170,6 +180,11 @@ public void build(Put put) { put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeColumn(gt), variantsBytes); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeCountColumn(gt), Bytes.toBytes(variants.size())); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toFileIndexColumn(gt), fileIndexBuffer.getBuffer()); + int position = fileDataIndexBuffer.position(); + fileDataIndexBuffer.rewind(); + fileDataIndexBuffer.limit(position); + put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), put.getTimestamp(), + fileDataIndexBuffer); } } @@ -290,6 +305,7 @@ private class SampleIndexGtEntryBuilderWithPartialBuilds extends SampleIndexGtEn private final ArrayList variants = new ArrayList<>(0); // This is the real issue. This might produce the "too many objects" problem. Need to run "partial builds" from time to time. private final BitOutputStream fileIndexBuffer = new BitOutputStream(); + private final ExposedByteArrayOutputStream fileDataBuffer = new ExposedByteArrayOutputStream(); SampleIndexGtEntryBuilderWithPartialBuilds(String gt) { this(gt, 10, 100); @@ -357,7 +373,7 @@ public int containsVariants(SampleIndexGtEntryBuilder other) { private void partialBuild(boolean flush) { int entriesToProcess = flush ? entries.size() : entries.size() - lowerThreshold; - BitBuffer fileIndexBuffer = new BitBuffer(fileIndex.getBitsLength() * entries.size()); + BitBuffer fileIndexBuffer = new BitBuffer(fileIndexSchema.getBitsLength() * entries.size()); int offset = 0; variants.ensureCapacity(variants.size() + entries.size()); @@ -383,15 +399,16 @@ private void partialBuild(boolean flush) { variants.add(variant); } else { // Mark previous variant as MultiFile - fileIndex.setMultiFile(fileIndexBuffer, offset - fileIndex.getBitsLength()); + fileIndexSchema.setMultiFile(fileIndexBuffer, offset - fileIndexSchema.getBitsLength()); } - fileIndexBuffer.setBitBuffer(gtEntry.getFileIndex(), offset); - offset += fileIndex.getBitsLength(); + fileIndexBuffer.setBitBuffer(gtEntry.getFilesIndex().get(0), offset); + offset += fileIndexSchema.getBitsLength(); prev = gtEntry; + fileDataIndexSchema.writeEntry(fileDataBuffer, gtEntry.getFileData().get(0)); } // Do not write the whole buffer, but only the corresponding to the processed entries. - this.fileIndexBuffer.write(fileIndexBuffer.getBitBuffer(0, fileIndex.getBitsLength() * processedEntries)); + this.fileIndexBuffer.write(fileIndexBuffer.getBitBuffer(0, fileIndexSchema.getBitsLength() * processedEntries)); } @Override @@ -405,6 +422,8 @@ public void build(Put put) { put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeColumn(gt), variantsBuffer); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeCountColumn(gt), Bytes.toBytes(variantsCount)); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toFileIndexColumn(gt), fileIndexBuffer.toByteArray()); + put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), + put.getTimestamp(), fileDataBuffer.toByteByffer()); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index 273ad40ae0d..bd57ae196fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -89,6 +89,8 @@ public final class SampleIndexSchema { static final byte[] PARENTS_PREFIX_BYTES = Bytes.toBytes(PARENTS_PREFIX); static final String FILE_PREFIX = META_PREFIX + "F_"; static final byte[] FILE_PREFIX_BYTES = Bytes.toBytes(FILE_PREFIX); + static final String FILE_DATA_PREFIX = META_PREFIX + "FD_"; + static final byte[] FILE_DATA_PREFIX_BYTES = Bytes.toBytes(FILE_DATA_PREFIX); static final String GENOTYPE_COUNT_PREFIX = META_PREFIX + "C_"; static final byte[] GENOTYPE_COUNT_PREFIX_BYTES = Bytes.toBytes(GENOTYPE_COUNT_PREFIX); static final String GENOTYPE_DISCREPANCY_COUNT = META_PREFIX + "DC"; @@ -129,6 +131,7 @@ public final class SampleIndexSchema { private final int version; private final SampleIndexConfiguration configuration; private final FileIndexSchema fileIndex; + private final FileDataIndexSchema fileData; private final PopulationFrequencyIndexSchema popFreqIndex; private final ConsequenceTypeIndexSchema ctIndex; private final BiotypeIndexSchema biotypeIndex; @@ -141,6 +144,7 @@ public SampleIndexSchema(SampleIndexConfiguration configuration, int version) { this.version = version; this.configuration = configuration; fileIndex = new FileIndexSchema(configuration.getFileIndexConfiguration()); + fileData = new FileDataIndexSchema(configuration.getFileIndexConfiguration()); // annotationSummaryIndexSchema = new AnnotationSummaryIndexSchema(); ctIndex = new ConsequenceTypeIndexSchema(configuration.getAnnotationIndexConfiguration().getConsequenceType()); biotypeIndex = new BiotypeIndexSchema(configuration.getAnnotationIndexConfiguration().getBiotype()); @@ -204,6 +208,10 @@ public FileIndexSchema getFileIndex() { return fileIndex; } + public FileDataIndexSchema getFileData() { + return fileData; + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("SampleIndexSchema{"); @@ -364,6 +372,10 @@ public static byte[] toFileIndexColumn(String genotype) { return Bytes.toBytes(FILE_PREFIX + genotype); } + public static byte[] toFileDataColumn(String genotype) { + return Bytes.toBytes(FILE_DATA_PREFIX + genotype); + } + public static String getGt(Cell cell, byte[] prefix) { return Bytes.toString( cell.getQualifierArray(), diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java index f1b167ae9c1..886e5b97bbf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java @@ -13,6 +13,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.*; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema.isGenotypeColumn; @@ -211,6 +212,7 @@ private abstract static class SampleIndexGtEntryIterator implements SampleIndexE private int nonIntergenicCount; private int clinicalCount; private BitInputStream fileIndex; + private ByteBuffer fileDataIndex; private int fileIndexCount; // Number of fileIndex elements visited private int fileIndexIdx; // Index over file index array. Index of last visited fileIndex @@ -239,6 +241,7 @@ private abstract static class SampleIndexGtEntryIterator implements SampleIndexE this.popFreq = gtEntry.getPopulationFrequencyIndexStream(); this.clinicalIndex = gtEntry.getClinicalIndexStream(); this.fileIndex = gtEntry.getFileIndexStream(); + this.fileDataIndex = gtEntry.getFileDataIndexBuffer(); } @Override @@ -251,6 +254,11 @@ public boolean hasFileIndex() { return gtEntry.getFileIndex() != null; } + @Override + public boolean hasFileDataIndex() { + return gtEntry.getFileData() != null; + } + @Override public boolean isMultiFileIndex() { return isMultiFileIndex(nextFileIndex()); @@ -278,6 +286,11 @@ public BitBuffer nextFileIndexEntry() { return getFileIndex(nextFileIndex()); } + @Override + public ByteBuffer getFileDataEntry() { + return getFileDataIndex(fileIndexIdx); + } + @Override public BitBuffer nextMultiFileIndexEntry() { if (isMultiFileIndex()) { @@ -289,7 +302,11 @@ public BitBuffer nextMultiFileIndexEntry() { } private BitBuffer getFileIndex(int i) { - return schema.getFileIndex().read(fileIndex, i); + return schema.getFileIndex().readEntry(fileIndex, i); + } + + private ByteBuffer getFileDataIndex(int i) { + return schema.getFileData().readEntry(fileDataIndex, i); } @Override @@ -357,7 +374,7 @@ public AnnotationIndexEntry nextAnnotationIndexEntry() { if (clinical) { int nextClinical = nextClinicalIndex(); // TODO: Reuse BitBuffer - annotationIndexEntry.setClinicalIndex(schema.getClinicalIndexSchema().read(clinicalIndex, nextClinical)); + annotationIndexEntry.setClinicalIndex(schema.getClinicalIndexSchema().readEntry(clinicalIndex, nextClinical)); } } @@ -454,11 +471,21 @@ public boolean hasFileIndex() { return false; } + @Override + public boolean hasFileDataIndex() { + return false; + } + @Override public BitBuffer nextFileIndexEntry() { throw new NoSuchElementException("Empty iterator"); } + @Override + public ByteBuffer getFileDataEntry() { + throw new NoSuchElementException("Empty iterator"); + } + @Override public boolean isMultiFileIndex() { return false; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java index 3d919ce846f..f346cf69180 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java @@ -1,11 +1,13 @@ package org.opencb.opencga.storage.hadoop.variant.index.sample; import org.apache.commons.collections4.CollectionUtils; +import org.apache.hadoop.hbase.util.Bytes; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAvro; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import java.nio.ByteBuffer; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -17,19 +19,20 @@ public class SampleVariantIndexEntry { private final Variant variant; private final String genotype; - @Deprecated - private final BitBuffer fileIndex; private final List filesIndex; + private final List fileData; private final AnnotationIndexEntry annotationIndexEntry; private final Integer meCode; private final Byte parentsCode; - public SampleVariantIndexEntry(Variant variant, BitBuffer fileIndex) { - this(variant, Collections.singletonList(fileIndex), null, null, null, null); + public SampleVariantIndexEntry(Variant variant, BitBuffer fileIndex, ByteBuffer fileData) { + this(variant, Collections.singletonList(fileIndex), + fileData == null ? Collections.emptyList() : Collections.singletonList(fileData), + null, null, null, null); } - public SampleVariantIndexEntry(Variant variant, List filesIndex, String genotype, AnnotationIndexEntry annotationIndexEntry, - Byte parentsCode, Integer meCode) { + public SampleVariantIndexEntry(Variant variant, List filesIndex, List fileData, String genotype, + AnnotationIndexEntry annotationIndexEntry, Byte parentsCode, Integer meCode) { if (CollectionUtils.isEmpty(variant.getImpl().getStudies())) { this.variant = variant; } else { @@ -48,11 +51,7 @@ public SampleVariantIndexEntry(Variant variant, List filesIndex, Stri null, null)); } this.filesIndex = filesIndex; - if (filesIndex == null) { - this.fileIndex = null; - } else { - this.fileIndex = filesIndex.get(0); - } + this.fileData = fileData; this.genotype = genotype; this.annotationIndexEntry = annotationIndexEntry; this.meCode = meCode; @@ -65,13 +64,21 @@ public Variant getVariant() { @Deprecated public BitBuffer getFileIndex() { - return fileIndex; + return filesIndex == null ? null : filesIndex.get(0); } public List getFilesIndex() { return filesIndex; } + public List getFileData() { + return fileData; + } + + public int getFileDataIndexBytes() { + return fileData == null ? 0 : fileData.stream().mapToInt(ByteBuffer::limit).sum(); + } + public String getGenotype() { return genotype; } @@ -102,7 +109,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(variant, fileIndex); + return Objects.hash(variant, filesIndex, fileData); } public String toString(SampleIndexSchema schema) { @@ -116,6 +123,26 @@ public String toString(SampleIndexSchema schema, String separator) { .append(this.getGenotype()); sb.append(separator).append("file: ") .append(this.getFilesIndex()); + sb.append(separator).append("fileData: "); + if (getFileData() == null) { + sb.append("null"); + } else { + sb.append("["); + for (ByteBuffer fileDatum : getFileData()) { + if (fileDatum == null) { + sb.append("null"); + } else { + sb.append(Bytes.toStringBinary(fileDatum)); + } + sb.append(" , "); + } + sb.append("]"); + } + sb.append(separator).append("me: ") + .append(this.getMeCode()); + sb.append(separator).append("parents: ") + .append(this.parentsCode); + if (annotationIndexEntry != null) { annotationIndexEntry.toString(schema, separator, sb); } @@ -136,18 +163,18 @@ public int compare(SampleVariantIndexEntry o1, SampleVariantIndexEntry o2) { if (compare != 0) { return compare; } - if (schema.getFileIndex().isMultiFile(o1.fileIndex)) { + if (schema.getFileIndex().isMultiFile(o1.getFileIndex())) { return -1; - } else if (schema.getFileIndex().isMultiFile(o2.fileIndex)) { + } else if (schema.getFileIndex().isMultiFile(o2.getFileIndex())) { return 1; } else { - int filePosition1 = schema.getFileIndex().getFilePositionIndex().read(o1.fileIndex); - int filePosition2 = schema.getFileIndex().getFilePositionIndex().read(o2.fileIndex); + int filePosition1 = schema.getFileIndex().getFilePositionIndex().read(o1.getFileIndex()); + int filePosition2 = schema.getFileIndex().getFilePositionIndex().read(o2.getFileIndex()); compare = Integer.compare(filePosition1, filePosition2); if (compare != 0) { return compare; } - return o1.fileIndex.compareTo(o2.fileIndex); + return o1.getFileIndex().compareTo(o2.getFileIndex()); } } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/VariantFileIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java similarity index 61% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/VariantFileIndexConverter.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java index 94c3ce0920e..5990cdb4f86 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/VariantFileIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java @@ -4,23 +4,51 @@ import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.AlternateCoordinate; import org.opencb.biodata.models.variant.avro.FileEntry; +import org.opencb.biodata.models.variant.avro.OriginalCall; import org.opencb.biodata.models.variant.avro.VariantType; +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.function.IntFunction; -public class VariantFileIndexConverter { +public class SampleVariantIndexEntryConverter { private final FileIndexSchema fileIndex; + private final FileDataIndexSchema fileDataSchema; - public VariantFileIndexConverter(SampleIndexSchema configuration) { + public SampleVariantIndexEntryConverter(SampleIndexSchema configuration) { fileIndex = configuration.getFileIndex(); + fileDataSchema = configuration.getFileData(); + } + + public SampleVariantIndexEntry createSampleVariantIndexEntry(int sampleIdx, int filePosition, Variant variant) { + // Expecting only one study and only one file + StudyEntry study = variant.getStudies().get(0); + FileEntry file = study.getFiles().get(0); + + BitBuffer fileIndexValue = createFileIndexValue(variant.getType(), filePosition, file.getData(), + study.getSampleDataKeyPositions(), study.getSampleData(sampleIdx)); + ByteBuffer fileDataIndexValue = createFileDataIndexValue(filePosition, file.getCall(), + study.getSecondaryAlternates()); + + return new SampleVariantIndexEntry(variant, fileIndexValue, fileDataIndexValue); + } + + public SampleVariantIndexEntry createSampleVariantIndexEntry( + int filePosition, Variant variant, OriginalCall call, List alts, + Function fileAttributes, Function sampleData) { + BitBuffer fileIndexValue = createFileIndexValue(variant.getType(), filePosition, fileAttributes, sampleData); + ByteBuffer fileDataIndexValue = createFileDataIndexValue(filePosition, call, + alts); + + return new SampleVariantIndexEntry(variant, fileIndexValue, fileDataIndexValue); } /** @@ -31,7 +59,7 @@ public VariantFileIndexConverter(SampleIndexSchema configuration) { * @param variant Full variant. * @return 16 bits of file index. */ - public BitBuffer createFileIndexValue(int sampleIdx, int filePosition, Variant variant) { + protected BitBuffer createFileIndexValue(int sampleIdx, int filePosition, Variant variant) { // Expecting only one study and only one file StudyEntry study = variant.getStudies().get(0); FileEntry file = study.getFiles().get(0); @@ -50,7 +78,7 @@ public BitBuffer createFileIndexValue(int sampleIdx, int filePosition, Variant v * @param sampleData Sample data values * @return BitBuffer of file index. */ - public BitBuffer createFileIndexValue(VariantType type, int filePosition, Map fileAttributes, + protected BitBuffer createFileIndexValue(VariantType type, int filePosition, Map fileAttributes, Map sampleDataKeyPositions, List sampleData) { return createFileIndexValue(type, filePosition, fileAttributes::get, (key) -> { Integer position = sampleDataKeyPositions.get(key); @@ -67,7 +95,7 @@ public BitBuffer createFileIndexValue(VariantType type, int filePosition, Map fileAttributes, + private BitBuffer createFileIndexValue(VariantType type, int filePosition, Function fileAttributes, Function sampleData) { BitBuffer bitBuffer = new BitBuffer(fileIndex.getBitsLength()); @@ -93,6 +121,35 @@ public BitBuffer createFileIndexValue(VariantType type, int filePosition, Functi return bitBuffer; } + /** + * Create the FileIndex value for this specific sample and variant. + * + * @param filePosition In case of having multiple files for the same sample, the cardinal value of the load order of the file. + * @param call Original call + * @param secondaryAlternates Secondary alternates + * @return BitBuffer of file index. + */ + private ByteBuffer createFileDataIndexValue(int filePosition, OriginalCall call, + List secondaryAlternates) { +// if (fileDataIndex.isSparse()) { +// } + int fileDataSize = 0; + if (fileDataSchema.isIncludeOriginalCall()) { + fileDataSize += fileDataSchema.getOriginalCallField().getByteLength(call); + } + if (fileDataSchema.isIncludeSecondaryAlternates()) { + fileDataSize += fileDataSchema.getSecondaryAlternatesField().getByteLength(secondaryAlternates); + } + ByteBuffer bb = ByteBuffer.allocate(fileDataSize); + if (fileDataSchema.isIncludeOriginalCall()) { + fileDataSchema.getOriginalCallField().write(call, bb); + } + if (fileDataSchema.isIncludeSecondaryAlternates()) { + fileDataSchema.getSecondaryAlternatesField().write(secondaryAlternates, bb); + } + return bb; + } + public BitBuffer addSampleDataIndexValues(BitBuffer bitBuffer, Map sampleDataKeyPositions, IntFunction sampleData) { for (IndexField fileDataIndexField : fileIndex.getCustomFieldsSourceSample()) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java index 8da9d0911f0..d52b17d3c97 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java @@ -451,20 +451,24 @@ public static void printSampleIndexTable2(VariantHadoopDBAdaptor dbAdaptor, Path try ( FileOutputStream fos = new FileOutputStream(fileName.toFile()); PrintStream out = new PrintStream(fos) ) { + String studyName = dbAdaptor.getMetadataManager().getStudyName(studyId); + SampleIndexDBAdaptor sampleIndexDBAdaptor = new SampleIndexDBAdaptor(dbAdaptor.getHBaseManager(), dbAdaptor.getTableNameGenerator(), dbAdaptor.getMetadataManager()); SampleIndexSchema schema = sampleIndexDBAdaptor.getSchemaLatest(studyId); for (Integer sampleId : dbAdaptor.getMetadataManager().getIndexedSamples(studyId)) { String sampleName = dbAdaptor.getMetadataManager().getSampleName(studyId, sampleId); - RawSingleSampleIndexVariantDBIterator it = sampleIndexDBAdaptor.rawIterator(dbAdaptor.getMetadataManager().getStudyName(studyId), sampleName); - - out.println(""); - out.println(""); - out.println(""); - out.println("SAMPLE: " + sampleName + " , " + sampleId); - while (it.hasNext()) { - SampleVariantIndexEntry entry = it.next(); - out.println("_______________________"); - out.println(entry.toString(schema)); + try (RawSingleSampleIndexVariantDBIterator it = sampleIndexDBAdaptor.rawIterator(studyName, sampleName)) { + out.println(""); + out.println(""); + out.println(""); + out.println("SAMPLE: " + sampleName + " (id=" + sampleId + ")"); + while (it.hasNext()) { + SampleVariantIndexEntry entry = it.next(); + out.println("_______________________"); + out.println(entry.toString(schema)); + } + } catch (Exception e) { + throw new RuntimeException(e); } } } @@ -499,9 +503,12 @@ public static void printSampleIndexTable(VariantHadoopDBAdaptor dbAdaptor, Path } else if (s.startsWith("_")) { StringBuilder sb = new StringBuilder(); for (byte b : value) { + if (sb.length() > 0) { + sb.append(" - "); + } sb.append(IndexUtils.byteToString(b)); - sb.append(" - "); } + sb.append(" : ").append(Bytes.toStringBinary(value)); map.put(s, sb.toString()); } else if (s.startsWith(Bytes.toString(SampleIndexSchema.toMendelianErrorColumn()))) { map.put(s, MendelianErrorSampleIndexConverter.toVariants(value, 0, value.length).toString()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java new file mode 100644 index 00000000000..fb8b2ca06f6 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java @@ -0,0 +1,105 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.nio.ByteBuffer; + +public class DataSchemaTest { + + + private DataSchema dataSchema; + private VarcharDataField key1; + private VarcharDataField key2; + private IntegerDataField key3; + private VarcharDataField key4; + + @Before + public void setUp() throws Exception { + key1 = new VarcharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key1", null)); + key2 = new VarcharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key2", null)); + key3 = new IntegerDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key3", null)); + key4 = new VarcharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key4", null)); + dataSchema = new DataSchema() { + { + addField(key1); + addField(key2); + addField(key3); + addField(key4); + } + }; + } + + @Test + public void readWrite() { + + ByteBuffer bbEntry1 = ByteBuffer.allocate(100); + ByteBuffer bbEntry2 = ByteBuffer.allocate(100); + ByteBuffer bb = ByteBuffer.allocate(100); + + key1.write("key1_value", bbEntry1); + key2.write("key2_value", bbEntry1); + key3.write(1234255, bbEntry1); + key4.write("key4_value", bbEntry1); + bbEntry1.limit(bbEntry1.position()); + + key1.write("key1_value", bbEntry2); + key2.write("key2_value", bbEntry2); + key3.write(32, bbEntry2); + key4.write("key4_value", bbEntry2); + bbEntry2.limit(bbEntry2.position()); + + dataSchema.writeEntry(bb, bbEntry1); + dataSchema.writeEntry(bb, bbEntry2); + + bbEntry1.rewind(); + bbEntry2.rewind(); + bb.rewind(); + +// System.out.println("Bytes.toStringBinary(bbEntry) = " + Bytes.toStringBinary(bbEntry)); +// System.out.println("Bytes.toStringBinary(bbEntry) = " + Bytes.toStringBinary(bb)); + + // Read entries sequentially + ByteBuffer readEntry = dataSchema.readNextEntry(bb); + checkEntry(bbEntry1, readEntry, 1234255); + + ByteBuffer readEntry2 = dataSchema.readNextEntry(bb); + checkEntry(bbEntry2, readEntry2, 32); + + // Read entries random + readEntry2 = dataSchema.readEntry(bb, 1); + checkEntry(bbEntry2, readEntry2, 32); + + readEntry = dataSchema.readEntry(bb, 0); + checkEntry(bbEntry1, readEntry, 1234255); + } + + private void checkEntry(ByteBuffer expected, ByteBuffer readEntry, int key3NumberValue) { + Assert.assertEquals(expected, readEntry); +// System.out.println("Bytes.toStringBinary(readEntry) = " + Bytes.toStringBinary(readEntry)); + + // Sequential field read order + Assert.assertEquals("key1_value", key1.readAndDecode(readEntry)); + Assert.assertEquals("key2_value", key2.readAndDecode(readEntry)); + Assert.assertEquals(key3NumberValue, key3.readAndDecode(readEntry).intValue()); + Assert.assertEquals("key4_value", key4.readAndDecode(readEntry)); + + readEntry.rewind(); + + // Wrong order. + Assert.assertEquals("key1_value", key4.readAndDecode(readEntry)); + Assert.assertEquals("key2_value", key1.readAndDecode(readEntry)); + Assert.assertEquals(key3NumberValue, key3.readAndDecode(readEntry).intValue()); + Assert.assertEquals("key4_value", key2.readAndDecode(readEntry)); + + readEntry.rewind(); + + // Random field access order + Assert.assertEquals("key4_value", dataSchema.readField(readEntry, key4)); + Assert.assertEquals("key1_value", dataSchema.readField(readEntry, key1)); + Assert.assertEquals(key3NumberValue, dataSchema.readField(readEntry, key3).intValue()); + Assert.assertEquals("key2_value", dataSchema.readField(readEntry, key2)); + } +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java index 9086c670d22..1f64db0132b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java @@ -40,24 +40,24 @@ public void testContains() { protected SampleVariantIndexEntry newVariantIndexEntry(String s, short i) { byte[] v = new byte[2]; Bytes.putShort(v, 0, i); - return new SampleVariantIndexEntry(new Variant(s), new BitBuffer(v)); + return new SampleVariantIndexEntry(new Variant(s), new BitBuffer(v), null); } @Test public void testBuild() { SampleIndexSchema schema = SampleIndexSchema.defaultSampleIndexSchema(); - VariantFileIndexConverter c = new VariantFileIndexConverter(schema); + SampleVariantIndexEntryConverter c = new SampleVariantIndexEntryConverter(schema); SampleIndexEntryPutBuilder builder = new SampleIndexEntryPutBuilder(1, "1", 10, schema, false, true); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:100:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 1, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - assertTrue(builder.add("1/1", new SampleVariantIndexEntry(new Variant("1:300:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - assertTrue(builder.add("1/1", new SampleVariantIndexEntry(new Variant("1:400:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - - assertFalse(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); - assertFalse(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList())))); + assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:100:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 1, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("1/1", new SampleVariantIndexEntry(new Variant("1:300:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("1/1", new SampleVariantIndexEntry(new Variant("1:400:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + + assertFalse(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertFalse(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); Put build = builder.build(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/VariantFileIndexEntryConverterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java similarity index 64% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/VariantFileIndexEntryConverterTest.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java index bdda507c67d..d7aeb1890ca 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/VariantFileIndexEntryConverterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java @@ -16,14 +16,14 @@ import static org.junit.Assert.assertEquals; @Category(ShortTests.class) -public class VariantFileIndexEntryConverterTest { +public class SampleVariantIndexEntryConverterTest { - private VariantFileIndexConverter fileIndexConverter; + private SampleVariantIndexEntryConverter converter; private FileIndexSchema fileIndex; @Before public void setUp() throws Exception { - fileIndexConverter = new VariantFileIndexConverter(SampleIndexSchema.defaultSampleIndexSchema()); + converter = new SampleVariantIndexEntryConverter(SampleIndexSchema.defaultSampleIndexSchema()); fileIndex = (SampleIndexSchema.defaultSampleIndexSchema()).getFileIndex(); } @@ -33,51 +33,51 @@ public void testConvert() { fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").build())); fileIndex.getTypeIndex().write(VariantType.INDEL, bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:-").addSample("s1", "0/1", ".").build())); + converter.createFileIndexValue(0, 0, v("1:100:A:-").addSample("s1", "0/1", ".").build())); fileIndex.getTypeIndex().write(VariantType.DELETION, bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100-200:A:").addSample("s1", "0/1", ".").build())); + converter.createFileIndexValue(0, 0, v("1:100-200:A:").addSample("s1", "0/1", ".").build())); fileIndex.getTypeIndex().write(VariantType.INSERTION, bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100-200:A:").addSample("s1", "0/1", ".").build())); + converter.createFileIndexValue(0, 0, v("1:100-200:A:").addSample("s1", "0/1", ".").build())); fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER).write("PASS", bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setFilter("PASS").build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setFilter("PASS").build())); fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER).write(null, bitBuffer); fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).write("2000.0", bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(2000.0).build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(2000.0).build())); fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).write("10.0", bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(10.0).build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(10.0).build())); bitBuffer.clear(); fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); fileIndex.getFilePositionIndex().write(3, bitBuffer); fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).write("10.0", bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 3, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(10.0).build())); + converter.createFileIndexValue(0, 3, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(10.0).build())); bitBuffer.clear(); fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); for (Integer dp : IntStream.range(0, 60).toArray()) { fileIndex.getCustomField(IndexFieldConfiguration.Source.SAMPLE, "DP").write(String.valueOf(dp), bitBuffer); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).build())); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).addFileData("DP", 10000).build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).addFileData("DP", 10000).build())); assertEquals(bitBuffer, - fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).addFileData("DP", 0).build())); + converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).addFileData("DP", 0).build())); // assertEquals(bitBuffer, // fileIndexConverter.createFileIndexValue(0, 0, v("1:100:A:C").setSampleDataKeys("GT").addSample("s1", "0/1").addFileData("DP", dp).build())); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java index 33bf8dd93a1..7b5165ab5c6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java @@ -58,6 +58,6 @@ protected SampleVariantIndexEntry newVariantIndexEntry(String s, int i, boolean if (multiFileIndex) { this.fileIndex.setMultiFile(fileIndex, 0); } - return new SampleVariantIndexEntry(new Variant(s), fileIndex); + return new SampleVariantIndexEntry(new Variant(s), fileIndex, null); } } \ No newline at end of file From 5250927c1e9473e40fd2aedb3b413ab195b8db16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 4 Sep 2024 13:37:03 +0100 Subject: [PATCH 02/19] storage: Add varint to store DataSchema entry length. #TASK-6765 --- .../hadoop/variant/index/core/DataField.java | 19 +-- .../hadoop/variant/index/core/DataSchema.java | 20 +-- .../variant/index/core/DynamicDataField.java | 16 --- .../variant/index/core/VarIntDataField.java | 118 ++++++++++++++++++ .../variant/index/core/VarcharDataField.java | 6 +- .../index/core/VariableWidthDataField.java | 14 +++ .../index/sample/SampleIndexDBAdaptor.java | 2 +- .../index/core/VarIntDataFieldTest.java | 93 ++++++++++++++ 8 files changed, 234 insertions(+), 54 deletions(-) delete mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java index 836270ebb86..4bc48fa4952 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java @@ -18,19 +18,9 @@ public abstract class DataField { private final IndexFieldConfiguration configuration; - private int fieldPosition; public DataField(IndexFieldConfiguration configuration) { - this(configuration, -1); - } - - public DataField(IndexFieldConfiguration configuration, int fieldPosition) { this.configuration = configuration; - this.fieldPosition = fieldPosition; - } - - void setFieldPosition(int fieldPosition) { - this.fieldPosition = fieldPosition; } public String getId() { @@ -53,10 +43,6 @@ public IndexFieldConfiguration.Type getType() { return configuration.getType(); } - public int getFieldPosition() { - return fieldPosition; - } - public void move(ByteBuffer bb) { read(bb); } @@ -84,7 +70,7 @@ public ByteBuffer write(T value) { public abstract T decode(ByteBuffer code); public DataField from(Function converter, Function deconverter) { - return new DataField(configuration, fieldPosition) { + return new DataField(configuration) { @Override public void move(ByteBuffer bb) { @@ -125,9 +111,8 @@ public R decode(ByteBuffer code) { @Override public String toString() { - final StringBuilder sb = new StringBuilder("IndexField{"); + final StringBuilder sb = new StringBuilder("DataField{"); sb.append("configuration=").append(configuration); - sb.append(", valuePosition=").append(fieldPosition); sb.append('}'); return sb.toString(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java index b0132cf1adc..44904715883 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java @@ -27,19 +27,18 @@ */ public abstract class DataSchema { - private List> fields; + private final List> fields; protected final DataField entryLengthField; // private boolean sparse = false; public DataSchema() { fields = new ArrayList<>(); - entryLengthField = new IntegerDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "ENTRY_LENGTH", null)); + entryLengthField = new VarIntDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "ENTRY_LENGTH", null)); fields.add(entryLengthField); } protected void addField(DataField field) { - field.setFieldPosition(fields.size()); fields.add(field); } @@ -98,21 +97,6 @@ public ByteBuffer readNextEntry(ByteBuffer buffer) { } } - public ByteBuffer readField(ByteBuffer buffer, int fieldPosition) { - buffer.rewind(); - for (DataField field : fields) { - if (field == entryLengthField) { - // Skip entry length field - continue; - } else if (field.getFieldPosition() == fieldPosition) { - return field.read(buffer); - } else { - field.move(buffer); - } - } - throw new IllegalArgumentException("Unknown field position " + fieldPosition); - } - public T readField(ByteBuffer buffer, DataField field) { buffer.rewind(); for (DataField thisField : fields) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java deleted file mode 100644 index dfa7c336050..00000000000 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DynamicDataField.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.opencb.opencga.storage.hadoop.variant.index.core; - -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; - -/** - * Data field with dynamic length. - * @param - */ -public abstract class DynamicDataField extends DataField { - - protected static final byte FIELD_SEPARATOR = (byte) 0; - - public DynamicDataField(IndexFieldConfiguration configuration, int valuePosition) { - super(configuration, valuePosition); - } -} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java new file mode 100644 index 00000000000..1329468ef9d --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java @@ -0,0 +1,118 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; + +/** + * Variable width int data field. + * https://protobuf.dev/programming-guides/encoding/#varints + */ +public class VarIntDataField extends VariableWidthDataField { + + private static final int VALUE_MASK = 0b0111_1111; + private static final int CONTINUATION_BIT_MASK = 0b1000_0000; + + public VarIntDataField(IndexFieldConfiguration configuration) { + super(configuration); + } + + @Override + public void move(ByteBuffer bb) { + bb.position(bb.position() + Integer.BYTES); + } + + @Override + public ByteBuffer read(ByteBuffer bb) { + bb.mark(); + int length = 0; + while (bb.hasRemaining()) { + byte b = bb.get(); + length++; + if ((b & CONTINUATION_BIT_MASK) == 0) { + break; + } + } + bb.reset(); + ByteBuffer read = (ByteBuffer) bb.slice().limit(length); + // move buffer + bb.position(bb.position() + length); + return read; + } + + @Override + public Integer readAndDecode(ByteBuffer bb) { + int result = 0; + int shift = 0; + while (bb.hasRemaining()) { + byte b = bb.get(); + result |= (b & VALUE_MASK) << shift; + if ((b & CONTINUATION_BIT_MASK) == 0) { + return result; + } + shift += 7; + } + return result; + } + + @Override + public int getByteLength(Integer value) { + if (value == null) { + return 0; + } else if (value < 0) { + return 5; + } else if (value < (1 << 7)) { + return 1; + } else if (value < (1 << 14)) { + return 2; + } else if (value < (1 << 21)) { + return 3; + } else if (value < (1 << 28)) { + return 4; + } else { + return 5; + } + } + + @Override + public void write(Integer value, ByteBuffer buffer) { + // While "value" without the value_mask is not 0 (i.e. has more than 7 bits) + while ((value & ~VALUE_MASK) != 0) { + buffer.put((byte) ((value & VALUE_MASK) | CONTINUATION_BIT_MASK)); + value >>>= 7; + } + buffer.put((byte) (value & VALUE_MASK)); + } + + @Override + public void write(Integer value, ByteArrayOutputStream stream) { + // While "value" without the value_mask is not 0 (i.e. has more than 7 bits) + while ((value & ~VALUE_MASK) != 0) { + stream.write((value & VALUE_MASK) | CONTINUATION_BIT_MASK); + value >>>= 7; + } + stream.write(value & VALUE_MASK); + } + + @Override + public ByteBuffer encode(Integer value) { + ByteBuffer buffer = ByteBuffer.allocate(getByteLength(value)); + write(value, buffer); + buffer.flip(); + return buffer; + } + + @Override + public Integer decode(ByteBuffer code) { + int result = 0; + for (int shift = 0; shift < Integer.SIZE; shift += 7) { + byte b = code.get(); + result |= (b & VALUE_MASK) << shift; + if ((b & CONTINUATION_BIT_MASK) == 0) { + return result; + } + } + return result; + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java index c213d37e1d8..294f34473b2 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java @@ -12,10 +12,12 @@ * Varchar data field. * Read until the FIELD_SEPARATOR. */ -public class VarcharDataField extends DynamicDataField { +public class VarcharDataField extends VariableWidthDataField { + + protected static final byte FIELD_SEPARATOR = (byte) 0; public VarcharDataField(IndexFieldConfiguration configuration) { - super(configuration, -1); + super(configuration); } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java new file mode 100644 index 00000000000..8c940f6ffcb --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java @@ -0,0 +1,14 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +/** + * Data field with variable data length. + * @param + */ +public abstract class VariableWidthDataField extends DataField { + + public VariableWidthDataField(IndexFieldConfiguration configuration) { + super(configuration); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 431d277bcd9..1280bc187d3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -636,7 +636,7 @@ private Scan parse(SingleSampleIndexQuery query, LocusQuery locusQuery, boolean scan.setCaching(hBaseManager.getConf().getInt("hbase.client.scanner.caching", 100)); logger.info("---------"); - logger.info("Sample = \"" + query.getSample() + "\" , schema version = " + query.getSchema().getVersion()); + logger.info("Sample = \"" + query.getSample() + "\" (id=" + sampleId + ") , schema version = " + query.getSchema().getVersion()); logger.info("Table = " + getSampleIndexTableName(query)); printScan(scan); printQuery(locusQuery); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java new file mode 100644 index 00000000000..4afea856064 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java @@ -0,0 +1,93 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.nio.ByteBuffer; + +public class VarIntDataFieldTest { + + private VarIntDataField field; + + @Before + public void setUp() { + field = new VarIntDataField(null); + } + + @Test + public void shouldHandlePositiveValues() { + testVarint(5, 1); + } + + @Test + public void shouldHandlePositive2bValues() { + testVarint(0x0600, 2); + } + + @Test + public void shouldHandlePositive3bValues() { + testVarint(0x060000, 3); + } + + @Test + public void shouldHandlePositive4bValues() { + testVarint(0x06000000, 4); + } + + + @Test + public void shouldHandleAllValues() { + int prevLength = 0; + for (int expected = -2000000; expected < 4000000; expected++) { + int thisLength = field.getByteLength(expected); + if (expected % 1000000 == 0 || prevLength != thisLength) { + System.out.println(expected + " _ " + thisLength); + } + testVarint(expected, thisLength); + prevLength = thisLength; + } + } + + @Test + public void shouldHandleNegativeValues() { + testVarint(-5, 5); + } + + @Test + public void shouldHandleZero() { + testVarint(0, 1); + } + + @Test + public void shouldHandleMaximumInteger() { + testVarint(Integer.MAX_VALUE, 5); + } + + @Test + public void shouldHandleMinimumInteger() { + testVarint(Integer.MIN_VALUE, 5); + } + + private void testVarint(int expected, int expectedLenght) { + int actualLength = field.getByteLength(expected); + Assert.assertEquals(expectedLenght, actualLength); + ByteBuffer buffer = ByteBuffer.allocate(actualLength); + field.write(expected, buffer); + ByteBuffer buffer2 = field.encode(expected); + + buffer.rewind(); + Assert.assertEquals(buffer, buffer2); + + int actualOutput = field.readAndDecode(buffer); + Assert.assertEquals(expected, actualOutput); + Assert.assertEquals(0, buffer.remaining()); + + buffer.rewind(); + actualOutput = field.decode(field.read(buffer)); + Assert.assertEquals(expected, actualOutput); + Assert.assertEquals(0, buffer.remaining()); + } + + +} \ No newline at end of file From d9a7bf1d70ce6bca238abb047bef9b1fe84eebc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 4 Sep 2024 15:15:02 +0100 Subject: [PATCH 03/19] storage: Skip data entries if equals to defaults. #TASK-6765 --- .../hadoop/variant/index/core/DataField.java | 21 ++++++++++ .../hadoop/variant/index/core/DataSchema.java | 41 +++++++++++++++++-- .../variant/index/core/IntegerDataField.java | 5 +++ .../variant/index/core/VarIntDataField.java | 10 +++++ .../variant/index/core/VarcharDataField.java | 10 +++++ .../variant/index/core/DataSchemaTest.java | 15 +++++++ 6 files changed, 98 insertions(+), 4 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java index 4bc48fa4952..11d4f95a9f2 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java @@ -4,6 +4,7 @@ import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; +import java.util.Objects; import java.util.function.Function; /** @@ -65,11 +66,26 @@ public ByteBuffer write(T value) { public abstract void write(T value, ByteArrayOutputStream stream); + public boolean isDefault(ByteBuffer buffer) { + return isDefault(decode(buffer)); + } + + public boolean isDefault(T value) { + return Objects.equals(value, getDefault()); + } + + public abstract T getDefault(); + + public ByteBuffer getDefaultEncoded() { + return encode(getDefault()); + } + public abstract ByteBuffer encode(T value); public abstract T decode(ByteBuffer code); public DataField from(Function converter, Function deconverter) { + R defaultValue = deconverter.apply(DataField.this.getDefault()); return new DataField(configuration) { @Override @@ -97,6 +113,11 @@ public void write(R value, ByteArrayOutputStream stream) { DataField.this.write(converter.apply(value), stream); } + @Override + public R getDefault() { + return defaultValue; + } + @Override public ByteBuffer encode(R value) { return DataField.this.encode(converter.apply(value)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java index 44904715883..cf83f85ccef 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java @@ -1,6 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -29,17 +30,28 @@ public abstract class DataSchema { private final List> fields; protected final DataField entryLengthField; + private ByteBuffer defaultEntry; // private boolean sparse = false; public DataSchema() { fields = new ArrayList<>(); entryLengthField = new VarIntDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "ENTRY_LENGTH", null)); - fields.add(entryLengthField); + defaultEntry = ByteBuffer.allocate(0); } protected void addField(DataField field) { fields.add(field); + ExposedByteArrayOutputStream defaultEntryStream = new ExposedByteArrayOutputStream(); + for (DataField dataField : fields) { + writeDefaultValue(dataField, defaultEntryStream); + } + defaultEntry = defaultEntryStream.toByteByffer().asReadOnlyBuffer(); + } + + private static void writeDefaultValue(DataField dataField, ByteArrayOutputStream defaultEntry) { + T defaultValue = dataField.getDefault(); + dataField.write(defaultValue, defaultEntry); } // public boolean isSparse() { @@ -55,17 +67,35 @@ public List> getFields() { } public void writeEntry(ByteBuffer buffer, ByteBuffer entryBuffer) { + entryBuffer.rewind(); + if (isDefaultEntry(entryBuffer)) { + // This is the default entry + entryLengthField.write(0, buffer); + return; + } int entryLength = entryBuffer.limit(); entryLengthField.write(entryLength, buffer); buffer.put(entryBuffer.array(), buffer.arrayOffset(), entryLength); } + public void writeEntry(ByteArrayOutputStream stream, ByteBuffer entryBuffer) { + entryBuffer.rewind(); + if (isDefaultEntry(entryBuffer)) { + // This is the default entry + entryLengthField.write(0, stream); + return; + } int entryLength = entryBuffer.limit(); entryLengthField.write(entryLength, stream); stream.write(entryBuffer.array(), entryBuffer.arrayOffset(), entryLength); } + private boolean isDefaultEntry(ByteBuffer entryBuffer) { + return defaultEntry.limit() == entryBuffer.limit() + && defaultEntry.compareTo(entryBuffer) == 0; + } + public ByteBuffer readEntry(ByteBuffer buffer, int entryPosition) { try { buffer.rewind(); @@ -87,9 +117,12 @@ public ByteBuffer readNextEntry(ByteBuffer buffer) { if (!buffer.hasRemaining()) { return ByteBuffer.allocate(0); } - int elementSize = entryLengthField.readAndDecode(buffer); - ByteBuffer elementBuffer = ByteBuffer.allocate(elementSize); - buffer.get(elementBuffer.array(), elementBuffer.arrayOffset(), elementSize); + int entryLength = entryLengthField.readAndDecode(buffer); + if (entryLength == 0) { + return defaultEntry; + } + ByteBuffer elementBuffer = ByteBuffer.allocate(entryLength); + buffer.get(elementBuffer.array(), elementBuffer.arrayOffset(), entryLength); elementBuffer.rewind(); return elementBuffer; } catch (Exception e) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java index 7d43f6eba4c..944c69b6310 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java @@ -52,6 +52,11 @@ public void write(Integer value, ByteArrayOutputStream stream) { } } + @Override + public Integer getDefault() { + return 0; + } + @Override public ByteBuffer encode(Integer value) { return ByteBuffer.allocate(4).putInt(value); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java index 1329468ef9d..0fe7f0c7192 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java @@ -95,6 +95,16 @@ public void write(Integer value, ByteArrayOutputStream stream) { stream.write(value & VALUE_MASK); } + @Override + public boolean isDefault(ByteBuffer buffer) { + return buffer.get(buffer.position() + 1) == 0; + } + + @Override + public Integer getDefault() { + return 0; + } + @Override public ByteBuffer encode(Integer value) { ByteBuffer buffer = ByteBuffer.allocate(getByteLength(value)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java index 294f34473b2..ed3a13b9c9a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java @@ -69,6 +69,16 @@ public void write(String value, ByteArrayOutputStream stream) { } } + @Override + public boolean isDefault(ByteBuffer buffer) { + return buffer.get(buffer.position() + 1) == FIELD_SEPARATOR; + } + + @Override + public String getDefault() { + return ""; + } + @Override public ByteBuffer encode(String value) { if (value == null) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java index fb8b2ca06f6..3ecfaa7952e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java @@ -4,6 +4,7 @@ import org.junit.Before; import org.junit.Test; import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; import java.nio.ByteBuffer; @@ -32,6 +33,20 @@ public void setUp() throws Exception { }; } + @Test + public void readWriteDefault() { + ExposedByteArrayOutputStream stream = new ExposedByteArrayOutputStream(); + for (DataField field : dataSchema.getFields()) { + field.write(field.getDefault(), stream); + } + ByteBuffer byteByffer = stream.toByteByffer(); + + ExposedByteArrayOutputStream stream2 = new ExposedByteArrayOutputStream(); + dataSchema.writeEntry(stream2, byteByffer); + Assert.assertEquals(1, stream2.toByteByffer().limit()); + Assert.assertEquals(0, stream2.toByteByffer().get()); + } + @Test public void readWrite() { From 1a281daf0fc357f5b10d744f7608064830efe427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 9 Sep 2024 14:18:13 +0100 Subject: [PATCH 04/19] storage: Replace string with binary file data encoding. #TASK-6765 --- .../SampleIndexOnlyVariantQueryExecutor.java | 15 +- .../hadoop/variant/index/core/DataField.java | 155 +++++------ .../variant/index/core/DataFieldBase.java | 90 +++++++ .../index/core/DataFieldWithContext.java | 43 +++ .../hadoop/variant/index/core/DataSchema.java | 50 +++- .../index/core/VarBinaryDataField.java | 93 +++++++ ...arDataField.java => VarCharDataField.java} | 19 +- .../variant/index/core/VarIntDataField.java | 5 - .../variant/index/core/VarSIntDataField.java | 72 +++++ .../variant/index/sample/AlleleCodec.java | 162 +++++++++--- .../variant/index/sample/AlleleSnvCodec.java | 59 +++++ .../index/sample/FileDataIndexSchema.java | 248 +++++++++++++----- .../sample/SampleIndexVariantBiConverter.java | 28 +- .../SampleVariantIndexEntryConverter.java | 14 +- .../variant/index/core/DataSchemaTest.java | 22 +- .../index/core/VarBinaryDataFieldTest.java | 53 ++++ .../index/core/VarIntDataFieldTest.java | 3 + .../variant/index/sample/AlleleCodecTest.java | 52 ---- .../index/sample/AlleleSnvCodecTest.java | 52 ++++ .../index/sample/FileDataIndexSchemaTest.java | 50 ++++ 20 files changed, 992 insertions(+), 293 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/{VarcharDataField.java => VarCharDataField.java} (79%) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodec.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataFieldTest.java delete mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodecTest.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodecTest.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index dd7a46d044e..0c9420f2eda 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -426,7 +426,12 @@ public Variant convert(SampleVariantIndexEntry entry) { HashMap fileAttributes = new HashMap<>(); Iterator fileDataIterator = entry.getFileData().iterator(); for (BitBuffer fileIndexBitBuffer : entry.getFilesIndex()) { - ByteBuffer fileDataBitBuffer = fileDataIterator.next(); + ByteBuffer fileDataBitBuffer; + if (fileDataIterator.hasNext()) { + fileDataBitBuffer = fileDataIterator.next(); + } else { + fileDataBitBuffer = null; + } if (includeFiles) { if (includeAll) { @@ -442,11 +447,11 @@ public Variant convert(SampleVariantIndexEntry entry) { fileAttributes.put(StudyEntry.QUAL, qual); } OriginalCall call = null; - if (schema.getFileData().isIncludeOriginalCall()) { - call = schema.getFileData().readOriginalCall(fileDataBitBuffer); + if (fileDataBitBuffer != null && schema.getFileData().isIncludeOriginalCall()) { + call = schema.getFileData().readOriginalCall(fileDataBitBuffer, v); } - if (schema.getFileData().isIncludeSecondaryAlternates()) { - allAlternateCoordinates.add(schema.getFileData().readSecondaryAlternates(fileDataBitBuffer)); + if (fileDataBitBuffer != null && schema.getFileData().isIncludeSecondaryAlternates()) { + allAlternateCoordinates.add(schema.getFileData().readSecondaryAlternates(fileDataBitBuffer, v)); } Integer idx = schema.getFileIndex().getFilePositionIndex().readAndDecode(fileIndexBitBuffer); String fileName = sampleFiles.get(idx); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java index 11d4f95a9f2..6f1fc2ed6d4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java @@ -1,92 +1,93 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; +import org.apache.commons.lang3.tuple.Pair; +import org.opencb.biodata.tools.commons.BiConverter; import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; -import java.util.Objects; import java.util.function.Function; /** - * Field of the DataSchema. - * Similar to {@link IndexField}, but for the DataSchema. - * This field does not allow filters. - *

- * This class is used to read and write the data of the DataSchema. - * The ByteBuffer contains a set of entries, each entry contains a set of fields. - * @param + * Context-less Data field. + * + * @param Value type */ -public abstract class DataField { - - private final IndexFieldConfiguration configuration; +public abstract class DataField extends DataFieldBase { public DataField(IndexFieldConfiguration configuration) { - this.configuration = configuration; - } - - public String getId() { - return configuration.getId(); - } - - public IndexFieldConfiguration.Source getSource() { - return configuration.getSource(); + super(configuration); } - public String getKey() { - return configuration.getKey(); + public T readAndDecode(ByteBuffer bb) { + return decode(read(bb)); } - public IndexFieldConfiguration getConfiguration() { - return configuration; - } + public abstract T decode(ByteBuffer code); - public IndexFieldConfiguration.Type getType() { - return configuration.getType(); - } + public DataField from(Function converter, Function deconverter) { + return from(new BiConverter() { + @Override + public T to(R r) { + return converter.apply(r); + } - public void move(ByteBuffer bb) { - read(bb); + @Override + public R from(T t) { + return deconverter.apply(t); + } + }); } - public abstract ByteBuffer read(ByteBuffer bb); + public DataField from(BiConverter converter) { + R defaultValue = converter.from(DataField.this.getDefault()); + return new DataField(DataField.this.getConfiguration()) { - public T readAndDecode(ByteBuffer bb) { - return decode(read(bb)); - } - - public ByteBuffer write(T value) { - ByteBuffer buffer = ByteBuffer.allocate(getByteLength(value)); - write(value, buffer); - return buffer; - } + @Override + public void move(ByteBuffer bb) { + DataField.this.move(bb); + } - public abstract int getByteLength(T value); + @Override + public ByteBuffer read(ByteBuffer bb) { + return DataField.this.read(bb); + } - public abstract void write(T value, ByteBuffer buffer); + @Override + public int getByteLength(R value) { + return DataField.this.getByteLength(converter.to(value)); + } - public abstract void write(T value, ByteArrayOutputStream stream); + @Override + public void write(R value, ByteBuffer buffer) { + DataField.this.write(converter.to(value), buffer); + } - public boolean isDefault(ByteBuffer buffer) { - return isDefault(decode(buffer)); - } + @Override + public void write(R value, ByteArrayOutputStream stream) { + DataField.this.write(converter.to(value), stream); + } - public boolean isDefault(T value) { - return Objects.equals(value, getDefault()); - } + @Override + public R getDefault() { + return defaultValue; + } - public abstract T getDefault(); + @Override + public ByteBuffer encode(R value) { + return DataField.this.encode(converter.to(value)); + } - public ByteBuffer getDefaultEncoded() { - return encode(getDefault()); + @Override + public R decode(ByteBuffer code) { + return converter.from(DataField.this.decode(code)); + } + }; } - public abstract ByteBuffer encode(T value); - - public abstract T decode(ByteBuffer code); - - public DataField from(Function converter, Function deconverter) { - R defaultValue = deconverter.apply(DataField.this.getDefault()); - return new DataField(configuration) { + public DataFieldWithContext fromWithContext(BiConverter, Pair> converter) { + Pair defaultValue = converter.from(null); + return new DataFieldWithContext(DataField.this.getConfiguration()) { @Override public void move(ByteBuffer bb) { @@ -99,42 +100,42 @@ public ByteBuffer read(ByteBuffer bb) { } @Override - public int getByteLength(R value) { - return DataField.this.getByteLength(converter.apply(value)); + public int getByteLength(Pair value) { + return DataField.this.getByteLength(converter.to(value).getValue()); } @Override - public void write(R value, ByteBuffer buffer) { - DataField.this.write(converter.apply(value), buffer); + public void write(Pair value, ByteBuffer buffer) { + DataField.this.write(converter.to(value).getValue(), buffer); } @Override - public void write(R value, ByteArrayOutputStream stream) { - DataField.this.write(converter.apply(value), stream); + public void write(Pair value, ByteArrayOutputStream stream) { + DataField.this.write(converter.to(value).getValue(), stream); } @Override - public R getDefault() { + public Pair getDefault() { return defaultValue; } @Override - public ByteBuffer encode(R value) { - return DataField.this.encode(converter.apply(value)); + public ByteBuffer encode(Pair value) { + return DataField.this.encode(converter.to(value).getValue()); } @Override - public R decode(ByteBuffer code) { - return deconverter.apply(DataField.this.decode(code)); + public R decode(C c, ByteBuffer code) { + return converter.from(Pair.of(c, DataField.this.decode(code))).getValue(); + } + + @Override + public R readAndDecode(C c, ByteBuffer code) { + T decode = DataField.this.readAndDecode(code); + return converter.from(Pair.of(c, decode)).getValue(); } }; } - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("DataField{"); - sb.append("configuration=").append(configuration); - sb.append('}'); - return sb.toString(); - } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java new file mode 100644 index 00000000000..59ad0ad59f3 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java @@ -0,0 +1,90 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.Objects; + +/** + * Field of the DataSchema. + * Similar to {@link IndexField}, but for the DataSchema. + * This field does not allow filters. + *

+ * This class is used to read and write the data of the DataSchema. + * The ByteBuffer contains a set of entries, each entry contains a set of fields. + * @param + */ +public abstract class DataFieldBase { + + private final IndexFieldConfiguration configuration; + + public DataFieldBase(IndexFieldConfiguration configuration) { + this.configuration = configuration; + } + + public String getId() { + return configuration.getId(); + } + + public IndexFieldConfiguration.Source getSource() { + return configuration.getSource(); + } + + public String getKey() { + return configuration.getKey(); + } + + public IndexFieldConfiguration getConfiguration() { + return configuration; + } + + public IndexFieldConfiguration.Type getType() { + return configuration.getType(); + } + + public void move(ByteBuffer bb) { + read(bb); + } + + /** + * Read the next value from the ByteBuffer. + * The ByteBuffer position will be moved to the next field. + * + * @param bb ByteBuffer + * @return ByteBuffer with the read value + */ + public abstract ByteBuffer read(ByteBuffer bb); + + public ByteBuffer write(T value) { + ByteBuffer buffer = ByteBuffer.allocate(getByteLength(value)); + write(value, buffer); + return buffer; + } + + public abstract int getByteLength(T value); + + public abstract void write(T value, ByteBuffer buffer); + + public abstract void write(T value, ByteArrayOutputStream stream); + + public boolean isDefault(T value) { + return Objects.equals(value, getDefault()); + } + + public abstract T getDefault(); + + public ByteBuffer getDefaultEncoded() { + return encode(getDefault()); + } + + public abstract ByteBuffer encode(T value); + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("DataField{"); + sb.append("configuration=").append(configuration); + sb.append('}'); + return sb.toString(); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java new file mode 100644 index 00000000000..5a8908f1ce8 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java @@ -0,0 +1,43 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.apache.commons.lang3.tuple.Pair; +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; + +/** + * Data field that, for encode and decode, requires a context C. + * + * @param Context used to encode and decode the value + * @param Value type + */ +public abstract class DataFieldWithContext extends DataFieldBase> { + public DataFieldWithContext(IndexFieldConfiguration configuration) { + super(configuration); + } + + public ByteBuffer write(C context, T value) { + return write(Pair.of(context, value)); + } + + public int getByteLength(C context, T value) { + return getByteLength(Pair.of(context, value)); + } + + public void write(C context, T value, ByteBuffer buffer) { + write(Pair.of(context, value), buffer); + } + + public void write(C context, T value, ByteArrayOutputStream stream) { + write(Pair.of(context, value), stream); + } + + public ByteBuffer encode(C context, T value) { + return encode(Pair.of(context, value)); + } + + public abstract T decode(C c, ByteBuffer code); + + public abstract T readAndDecode(C c, ByteBuffer code); +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java index cf83f85ccef..eb4bf0aee83 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java @@ -28,7 +28,7 @@ */ public abstract class DataSchema { - private final List> fields; + private final List> fields; protected final DataField entryLengthField; private ByteBuffer defaultEntry; @@ -40,29 +40,25 @@ public DataSchema() { defaultEntry = ByteBuffer.allocate(0); } - protected void addField(DataField field) { + protected void addField(DataFieldBase field) { fields.add(field); ExposedByteArrayOutputStream defaultEntryStream = new ExposedByteArrayOutputStream(); - for (DataField dataField : fields) { + for (DataFieldBase dataField : fields) { writeDefaultValue(dataField, defaultEntryStream); } defaultEntry = defaultEntryStream.toByteByffer().asReadOnlyBuffer(); } - private static void writeDefaultValue(DataField dataField, ByteArrayOutputStream defaultEntry) { + private static void writeDefaultValue(DataFieldBase dataField, ByteArrayOutputStream defaultEntry) { T defaultValue = dataField.getDefault(); dataField.write(defaultValue, defaultEntry); } -// public boolean isSparse() { -// return sparse; -// } - - public DataField getField(IndexFieldConfiguration.Source source, String key) { + public DataFieldBase getField(IndexFieldConfiguration.Source source, String key) { return fields.stream().filter(i -> i.getSource() == source && i.getKey().equals(key)).findFirst().orElse(null); } - public List> getFields() { + public List> getFields() { return fields; } @@ -130,9 +126,9 @@ public ByteBuffer readNextEntry(ByteBuffer buffer) { } } - public T readField(ByteBuffer buffer, DataField field) { + public T readFieldAndDecode(ByteBuffer buffer, DataField field) { buffer.rewind(); - for (DataField thisField : fields) { + for (DataFieldBase thisField : fields) { if (thisField == entryLengthField) { // Skip entry length field continue; @@ -145,4 +141,34 @@ public T readField(ByteBuffer buffer, DataField field) { throw new IllegalArgumentException("Unknown field " + field); } + public T readFieldAndDecode(ByteBuffer buffer, DataFieldWithContext field, C context) { + buffer.rewind(); + for (DataFieldBase thisField : fields) { + if (thisField == entryLengthField) { + // Skip entry length field + continue; + } else if (thisField == field) { + return field.readAndDecode(context, buffer); + } else { + thisField.move(buffer); + } + } + throw new IllegalArgumentException("Unknown field " + field); + } + + public ByteBuffer readField(ByteBuffer buffer, DataFieldBase field) { + buffer.rewind(); + for (DataFieldBase thisField : fields) { + if (thisField == entryLengthField) { + // Skip entry length field + continue; + } else if (thisField == field) { + return field.read(buffer); + } else { + thisField.move(buffer); + } + } + throw new IllegalArgumentException("Unknown field " + field); + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java new file mode 100644 index 00000000000..d1729817039 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java @@ -0,0 +1,93 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; + +public class VarBinaryDataField extends VariableWidthDataField { + + private final VarIntDataField lengthField; + + public VarBinaryDataField(IndexFieldConfiguration configuration) { + super(configuration); + lengthField = new VarIntDataField(configuration); + } + + @Override + public ByteBuffer readAndDecode(ByteBuffer bb) { + Integer length = lengthField.readAndDecode(bb); + try { + return (ByteBuffer) bb.slice().limit(length); + } catch (Exception e) { + System.out.println("length = " + length); + System.out.println("bb.limit() = " + bb.limit()); + throw e; + } + } + + @Override + public ByteBuffer read(ByteBuffer bb) { + bb.mark(); + int start = bb.position(); + Integer length = lengthField.readAndDecode(bb); + if (length == 0) { + return ByteBuffer.wrap(new byte[0]); + } + int end = bb.position(); + int totalLength = length + (end - start); + + bb.rewind(); + ByteBuffer code = (ByteBuffer) bb.slice().limit(totalLength); + // move buffer + bb.position(bb.position() + totalLength); + return code; + } + + @Override + public int getByteLength(ByteBuffer value) { + return lengthField.getByteLength(value.limit()) + value.limit(); + } + + @Override + public void write(ByteBuffer value, ByteBuffer buffer) { + value.rewind(); + lengthField.write(value.limit(), buffer); + buffer.put(value); + value.rewind(); + } + + @Override + public void write(ByteBuffer value, ByteArrayOutputStream stream) { + value.rewind(); + lengthField.write(value.limit(), stream); + for (int i = 0; i < value.limit(); i++) { + stream.write(value.get(i)); + } + value.rewind(); + } + + @Override + public ByteBuffer getDefault() { + return ByteBuffer.wrap(new byte[]{0}); + } + + @Override + public ByteBuffer encode(ByteBuffer value) { + value.rewind(); + ByteBuffer code = ByteBuffer.allocate(getByteLength(value)); + write(value, code); + code.rewind(); + return code; + } + + @Override + public ByteBuffer decode(ByteBuffer code) { + code.rewind(); + Integer length = lengthField.readAndDecode(code); + if (length == 0) { + return ByteBuffer.wrap(new byte[0]); + } + return (ByteBuffer) code.slice().limit(length); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java similarity index 79% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java index ed3a13b9c9a..911f18404df 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarcharDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java @@ -12,11 +12,11 @@ * Varchar data field. * Read until the FIELD_SEPARATOR. */ -public class VarcharDataField extends VariableWidthDataField { +public class VarCharDataField extends VariableWidthDataField { protected static final byte FIELD_SEPARATOR = (byte) 0; - public VarcharDataField(IndexFieldConfiguration configuration) { + public VarCharDataField(IndexFieldConfiguration configuration) { super(configuration); } @@ -37,7 +37,10 @@ public ByteBuffer read(ByteBuffer bb) { bb.reset(); ByteBuffer read = (ByteBuffer) bb.slice().limit(length); // move buffer - bb.position(bb.position() + length + 1); // move one extra to skip separator + bb.position(bb.position() + length); + if (bb.hasRemaining()) { + bb.get(); // skip separator + } return read; } @@ -69,7 +72,6 @@ public void write(String value, ByteArrayOutputStream stream) { } } - @Override public boolean isDefault(ByteBuffer buffer) { return buffer.get(buffer.position() + 1) == FIELD_SEPARATOR; } @@ -89,6 +91,13 @@ public ByteBuffer encode(String value) { @Override public String decode(ByteBuffer code) { - return new String(code.array(), code.arrayOffset(), code.limit(), StandardCharsets.UTF_8); + if (code.isReadOnly()) { + int limit = code.limit(); + byte[] bytes = new byte[limit]; + code.get(bytes); + return new String(bytes, StandardCharsets.UTF_8); + } else { + return new String(code.array(), code.arrayOffset(), code.limit(), StandardCharsets.UTF_8); + } } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java index 0fe7f0c7192..585ba900d69 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java @@ -95,11 +95,6 @@ public void write(Integer value, ByteArrayOutputStream stream) { stream.write(value & VALUE_MASK); } - @Override - public boolean isDefault(ByteBuffer buffer) { - return buffer.get(buffer.position() + 1) == 0; - } - @Override public Integer getDefault() { return 0; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java new file mode 100644 index 00000000000..93755a6ba8b --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java @@ -0,0 +1,72 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; + +/** + * Variable width signed int data field. + * https://protobuf.dev/programming-guides/encoding/#signed-ints + */ +public class VarSIntDataField extends VarIntDataField { + + public VarSIntDataField(IndexFieldConfiguration configuration) { + super(configuration); + } + + @Override + public Integer readAndDecode(ByteBuffer bb) { + return decodeSign(super.readAndDecode(bb)); + } + + @Override + public int getByteLength(Integer value) { + return super.getByteLength(encodeSign(value)); + } + + @Override + public void write(Integer value, ByteBuffer buffer) { + super.write(encodeSign(value), buffer); + } + + @Override + public void write(Integer value, ByteArrayOutputStream stream) { + super.write(encodeSign(value), stream); + } + + @Override + public ByteBuffer encode(Integer value) { + return super.encode(encodeSign(value)); + } + + @Override + public Integer decode(ByteBuffer code) { + return decodeSign(super.decode(code)); + } + + public static Integer encodeSign(Integer value) { + if (value == null) { + return null; + } + if (value < 0) { + value = ((-value) << 1) + 1; + } else { + value = (value << 1); + } + return value; + } + + public static Integer decodeSign(Integer value) { + if (value == null) { + return null; + } + if ((value & 1) == 1) { + value = value >> 1; + value = -value; + } else { + value = value >> 1; + } + return value; + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodec.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodec.java index 587c8fbfac1..88dac3d065c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodec.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodec.java @@ -1,59 +1,137 @@ package org.opencb.opencga.storage.hadoop.variant.index.sample; -public class AlleleCodec { +import org.opencb.biodata.tools.commons.BiConverter; - private static final String[] ALLELE_CODES = {"A", "C", "G", "T"}; +import java.util.*; - public static boolean valid(String ref, String alt) { - return validAllele(ref) - && validAllele(alt) - && !ref.equals(alt); - } +/** + * Variant allele codec. Encode and decode any length allele into a shorter string. + *

+ * If the allele is not a pure ACGT allele, it will be left as it is. It might contain the special characters [N*,]. + * Symbolic alleles will be left as they are. + * + */ +public class AlleleCodec implements BiConverter { + + private static final Map ALLELES_ENCODE; + private static final String[] ALLELES_DECODE; + private static final Set BASES = new HashSet<>(Arrays.asList('A', 'C', 'G', 'T')); + // Chars that might be present in the alleles, and should be left as they are + private static final Set SKIP_CHARS = new HashSet<>(Arrays.asList('N', '*', ',')); + // Chars that might be present in uncommon alleles. If present, the allele can't be encoded + private static final Set INVALID_CHARS = new HashSet<>(Arrays.asList( + '<', '>', // symbolic + '[', ']', // breakend + '.', // single breakend + '|', '-' // Special chars used at VariantPhoenixKeyFactory#buildSymbolicAlternate + )); + + private static final Set RESERVED_CHARS; + + + static { + ALLELES_ENCODE = new HashMap<>(); + RESERVED_CHARS = new HashSet<>(); + RESERVED_CHARS.addAll(BASES); + RESERVED_CHARS.addAll(SKIP_CHARS); + RESERVED_CHARS.addAll(INVALID_CHARS); - public static boolean validAllele(String allele) { - if (allele.length() == 1) { - switch (allele.charAt(0)) { - case 'A': - case 'C': - case 'G': - case 'T': - return true; - default: - return false; + char code = ' '; + // Generate all possible combinations of 3 bases + for (Character base1 : BASES) { + for (Character base2 : BASES) { + for (Character base3 : BASES) { + code++; + while (RESERVED_CHARS.contains(code)) { + code++; + } + ALLELES_ENCODE.put(base1 + "" + base2 + base3, code); + } } } - return false; - } - public static byte encode(String ref, String alt) { - byte refCode = encode(ref.charAt(0)); - byte altCode = encode(alt.charAt(0)); - return join(refCode, altCode); + // Generate all possible combinations of 2 bases + for (Character base1 : BASES) { + for (Character base2 : BASES) { + code++; + while (RESERVED_CHARS.contains(code)) { + code++; + } + ALLELES_ENCODE.put(base1 + "" + base2, code); + } + } + ALLELES_DECODE = new String[code + 1]; + for (Map.Entry entry : ALLELES_ENCODE.entrySet()) { + if (ALLELES_DECODE[entry.getValue()] != null) { + throw new IllegalStateException("Repeated code " + ((int) entry.getValue())); + } + ALLELES_DECODE[entry.getValue()] = entry.getKey(); + } } - public static String[] decode(byte refAltCode) { - return new String[]{ - ALLELE_CODES[(refAltCode & 0b1100_0000) >>> 6], - ALLELE_CODES[(refAltCode & 0b0011_0000) >>> 4], - }; + @Override + public String to(String value) { + return encode(value); } - public static byte encode(char allele) { - switch (allele) { - case 'A': - return 0b00; // 0 - case 'C': - return 0b01; // 1 - case 'G': - return 0b10; // 2 - case 'T': - return 0b11; // 3 - default: - throw new IllegalArgumentException("Can not codify allele " + allele); + public String encode(String allele) { + StringBuilder sb = new StringBuilder(allele.length()); + int i = 0; + while (i < allele.length()) { + char charAt = allele.charAt(i); + if (INVALID_CHARS.contains(charAt)) { + // Allele contains non accepted characters. Skip encode + return allele; + } + if (i + 2 < allele.length()) { + String sub = allele.substring(i, i + 3); + Character c = ALLELES_ENCODE.get(sub); + if (c != null) { + sb.append(c); + i += 3; + continue; + } + } + if (i + 1 < allele.length()) { + String sub = allele.substring(i, i + 2); + Character c = ALLELES_ENCODE.get(sub); + if (c != null) { + sb.append(c); + i += 2; + continue; + } + } + sb.append(charAt); + i++; } + return sb.toString(); } - public static byte join(byte refCode, byte altCode) { - return (byte) (refCode << 6 | altCode << 4); + @Override + public String from(String value) { + return decode(value); } + + public String decode(String allele) { + StringBuilder sb = new StringBuilder(allele.length() * 2); + for (int i = 0; i < allele.length(); i++) { + char charAt = allele.charAt(i); + if (INVALID_CHARS.contains(charAt)) { + // Allele contains non accepted characters. Skip decode + return allele; + } + if (charAt >= ALLELES_DECODE.length) { + sb.append(charAt); + } else { + String s = ALLELES_DECODE[charAt]; + if (s != null) { + sb.append(s); + } else { + sb.append(charAt); + } + } + } + return sb.toString(); + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodec.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodec.java new file mode 100644 index 00000000000..5f722e38baf --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodec.java @@ -0,0 +1,59 @@ +package org.opencb.opencga.storage.hadoop.variant.index.sample; + +public class AlleleSnvCodec { + + private static final String[] ALLELE_CODES = {"A", "C", "G", "T"}; + + public static boolean valid(String ref, String alt) { + return validAllele(ref) + && validAllele(alt) + && !ref.equals(alt); + } + + public static boolean validAllele(String allele) { + if (allele.length() == 1) { + switch (allele.charAt(0)) { + case 'A': + case 'C': + case 'G': + case 'T': + return true; + default: + return false; + } + } + return false; + } + + public static byte encode(String ref, String alt) { + byte refCode = encode(ref.charAt(0)); + byte altCode = encode(alt.charAt(0)); + return join(refCode, altCode); + } + + public static String[] decode(byte refAltCode) { + return new String[]{ + ALLELE_CODES[(refAltCode & 0b1100_0000) >>> 6], + ALLELE_CODES[(refAltCode & 0b0011_0000) >>> 4], + }; + } + + public static byte encode(char allele) { + switch (allele) { + case 'A': + return 0b00; // 0 + case 'C': + return 0b01; // 1 + case 'G': + return 0b10; // 2 + case 'T': + return 0b11; // 3 + default: + throw new IllegalArgumentException("Can not codify allele " + allele); + } + } + + public static byte join(byte refCode, byte altCode) { + return (byte) (refCode << 6 | altCode << 4); + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java index 91b6540c79b..8df0628647c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java @@ -1,13 +1,16 @@ package org.opencb.opencga.storage.hadoop.variant.index.sample; +import org.apache.commons.lang3.tuple.Pair; +import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.avro.AlternateCoordinate; import org.opencb.biodata.models.variant.avro.OriginalCall; +import org.opencb.biodata.tools.commons.BiConverter; import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; -import org.opencb.opencga.storage.hadoop.variant.index.core.DataField; -import org.opencb.opencga.storage.hadoop.variant.index.core.DataSchema; -import org.opencb.opencga.storage.hadoop.variant.index.core.VarcharDataField; +import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; +import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory; +import org.opencb.opencga.storage.hadoop.variant.index.core.*; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -16,75 +19,24 @@ public class FileDataIndexSchema extends DataSchema { - private final DataField originalCallField; - private final DataField> secondaryAlternatesField; + private final DataFieldWithContext originalCallField; + private final DataFieldWithContext> secondaryAlternatesField; private boolean includeOriginalCall = true; private boolean includeSecondaryAlternates = true; public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileIndexConfiguration) { if (includeOriginalCall) { - originalCallField = new VarcharDataField( + originalCallField = new VarBinaryDataField( new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "ORIGINAL_CALL", null)) - .from( - (OriginalCall oc) -> oc == null ? null : oc.getVariantId() + "+" + oc.getAlleleIndex(), - (String s) -> { - if (s == null || s.isEmpty()) { - return null; - } else { - String[] split = s.split("\\+", 2); - return new OriginalCall(split[0], Integer.parseInt(split[1])); - } - }); + .fromWithContext(new VariantOriginalCallToBytesConverter()); addField(originalCallField); } else { originalCallField = null; } if (includeSecondaryAlternates) { - secondaryAlternatesField = new VarcharDataField( + secondaryAlternatesField = new VarBinaryDataField( new IndexFieldConfiguration(IndexFieldConfiguration.Source.STUDY, "SECONDARY_ALTERNATES", null)) - .from((List secondaryAlternates) -> { - if (secondaryAlternates == null || secondaryAlternates.isEmpty()) { - return ""; - } - boolean needsSeparator = false; - StringBuilder sb = new StringBuilder(); - for (AlternateCoordinate alternate : secondaryAlternates) { - if (needsSeparator) { - sb.append(','); - } - sb.append(alternate.getChromosome()); - sb.append("+"); - sb.append(alternate.getStart()); - sb.append("+"); - sb.append(alternate.getEnd()); - sb.append("+"); - sb.append(alternate.getReference()); - sb.append("+"); - sb.append(alternate.getAlternate()); - needsSeparator = true; - } - - return sb.toString(); - }, (String s) -> { - if (s == null || s.isEmpty()) { - return Collections.emptyList(); - } - String[] split = s.split(","); - List alternates = new ArrayList<>(split.length); - for (String alt : split) { - String[] altSplit = alt.split("\\+", 5); - String alternate = altSplit.length == 5 ? altSplit[4] : ""; - alternates.add(new AlternateCoordinate( - altSplit[0], - Integer.parseInt(altSplit[1]), - Integer.parseInt(altSplit[2]), - altSplit[3], - alternate, - VariantBuilder.inferType(altSplit[3], alternate) - )); - } - return alternates; - }); + .fromWithContext(new AlternateCoordinateToBytesConverter()); addField(secondaryAlternatesField); } else { secondaryAlternatesField = null; @@ -95,7 +47,7 @@ public boolean isIncludeOriginalCall() { return includeOriginalCall; } - public DataField getOriginalCallField() { + public DataFieldWithContext getOriginalCallField() { return originalCallField; } @@ -103,16 +55,180 @@ public boolean isIncludeSecondaryAlternates() { return includeSecondaryAlternates; } - public DataField> getSecondaryAlternatesField() { + public DataFieldWithContext> getSecondaryAlternatesField() { return secondaryAlternatesField; } - public OriginalCall readOriginalCall(ByteBuffer fileDataBitBuffer) { - return readField(fileDataBitBuffer, originalCallField); + public void writeOriginalCall(Variant variant, OriginalCall call, ByteBuffer bb) { + getOriginalCallField().write(variant, call, bb); } - public List readSecondaryAlternates(ByteBuffer fileDataBitBuffer) { - return readField(fileDataBitBuffer, secondaryAlternatesField); + public OriginalCall readOriginalCall(ByteBuffer fileDataByteBuffer, Variant variant) { + return readFieldAndDecode(fileDataByteBuffer, originalCallField, variant); + } + + public List readSecondaryAlternates(ByteBuffer fileDataBitBuffer, Variant variant) { + return readFieldAndDecode(fileDataBitBuffer, secondaryAlternatesField, variant); + } + + protected static class VariantOriginalCallToBytesConverter + implements BiConverter, Pair> { + + private final VarIntDataField varint = new VarIntDataField(null); + private final VarSIntDataField varsint = new VarSIntDataField(null); + private final VarCharDataField varchar = new VarCharDataField(null); + private final DataField alleleField = new VarCharDataField(null).from(new AlleleCodec()); + + @Override + public Pair to(Pair pair) { + if (pair == null) { + return Pair.of(null, ByteBuffer.allocate(0)); + } else if (pair.getValue() == null) { + return Pair.of(pair.getKey(), ByteBuffer.allocate(0)); + } else { + Variant variant = pair.getKey(); + OriginalCall call = pair.getValue(); + + Variant originalVariant = new Variant(call.getVariantId()); + + // Read the chromosome from the original variantId + // Can't read from the "originalVariant", as the chromosome may be normalized in the Variant constructor + String chromosome = call.getVariantId().substring(0, call.getVariantId().indexOf(":")); + boolean normalizedChromosome = !variant.getChromosome().equals(chromosome); + + String reference = originalVariant.getReference(); + + int alternatesIdx = call.getVariantId().indexOf(","); + String alternate = originalVariant.getAlternate(); + if (alternatesIdx > 0) { + alternate += call.getVariantId().substring(alternatesIdx); + } + String alternateAndExtras = VariantPhoenixKeyFactory + .buildSymbolicAlternate(reference, alternate, originalVariant.getEnd(), originalVariant.getSv()); + + int relativeStart = originalVariant.getStart() - variant.getStart(); + + ByteBuffer buffer = ByteBuffer.allocate( + varint.getByteLength(call.getAlleleIndex()) + + varsint.getByteLength(relativeStart) + + alleleField.getByteLength(reference) + + alleleField.getByteLength(alternateAndExtras) + + (normalizedChromosome ? varchar.getByteLength(chromosome) : 0 + )); + + varint.write(call.getAlleleIndex(), buffer); + varsint.write(relativeStart, buffer); + alleleField.write(reference, buffer); + alleleField.write(alternateAndExtras, buffer); + + if (normalizedChromosome) { + varchar.write(chromosome, buffer); + } + + buffer.limit(buffer.position() - 1); + buffer.rewind(); + return Pair.of(variant, buffer); + } + } + + @Override + public Pair from(Pair pair) { + if (pair == null) { + return Pair.of(null, null); + } + ByteBuffer byteBuffer = pair.getValue(); + Variant variant = pair.getKey(); + if (byteBuffer == null || variant == null || byteBuffer.limit() == 0) { + return Pair.of(variant, null); + } else { + byteBuffer.rewind(); + int alleleIndex = varint.readAndDecode(byteBuffer); + int start = varsint.readAndDecode(byteBuffer) + variant.getStart(); + String reference = alleleField.readAndDecode(byteBuffer); + String alternate = alleleField.readAndDecode(byteBuffer); + + String variantId = VariantPhoenixKeyFactory.buildVariant(variant.getChromosome(), start, reference, alternate, null, null) + .toString(); + + if (byteBuffer.hasRemaining()) { + // Replace chromosome with the original chromosome + // Can't set the chromosome directly, as the chromosome may be normalized in the Variant constructor + String originalChromosome = varchar.readAndDecode(byteBuffer); + variantId = variantId.substring(variantId.indexOf(":") + 1); + variantId = originalChromosome + ":" + variantId; + } + return Pair.of(variant, new OriginalCall(variantId, alleleIndex)); + } + } + } + + + private static class AlternateCoordinateToBytesConverter + implements BiConverter>, Pair> { + + private final VarSIntDataField varsint = new VarSIntDataField(null); + private final VarCharDataField varchar = new VarCharDataField(null); + private final DataField alleleField = new VarCharDataField(null).from(new AlleleCodec()); + + + @Override + public Pair to(Pair> pair) { + if (pair == null) { + return Pair.of(null, ByteBuffer.allocate(0)); + } + if (pair.getValue() == null || pair.getValue().isEmpty() || pair.getKey() == null) { + return Pair.of(pair.getKey(), ByteBuffer.allocate(0)); + } + Variant variant = pair.getKey(); + + ExposedByteArrayOutputStream stream = new ExposedByteArrayOutputStream(); + for (AlternateCoordinate alternate : pair.getValue()) { + if (!alternate.getChromosome().equals(variant.getChromosome())) { + varchar.write(alternate.getChromosome(), stream); + } else { + varchar.write("", stream); + } + + varsint.write(alternate.getStart() - variant.getStart(), stream); + varsint.write(alternate.getEnd() - variant.getStart(), stream); + alleleField.write(alternate.getAlternate(), stream); + alleleField.write(alternate.getReference(), stream); + } + + return Pair.of(variant, stream.toByteByffer()); + } + + @Override + public Pair> from(Pair pair) { + if (pair == null) { + return Pair.of(null, Collections.emptyList()); + } + if (pair.getValue() == null || pair.getKey() == null) { + return Pair.of(pair.getKey(), Collections.emptyList()); + } + List alternates = new ArrayList<>(2); + Variant variant = pair.getKey(); + ByteBuffer byteBuffer = pair.getValue(); + while (byteBuffer.hasRemaining()) { + String chr = varchar.readAndDecode(byteBuffer); + if (chr == null || chr.isEmpty()) { + chr = variant.getChromosome(); + } + int start = varsint.readAndDecode(byteBuffer) + variant.getStart(); + int end = varsint.readAndDecode(byteBuffer) + variant.getStart(); + String alternate = alleleField.readAndDecode(byteBuffer); + String reference = alleleField.readAndDecode(byteBuffer); + alternates.add(new AlternateCoordinate( + chr, + start, + end, + reference, + alternate, + VariantBuilder.inferType(reference, alternate))); + + } + return Pair.of(variant, alternates); + } } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java index 886e5b97bbf..d354900fa06 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java @@ -42,7 +42,7 @@ public int expectedSize(Variant variant, boolean interVariantSeparator) { } protected int expectedSize(String reference, String alternate, boolean interVariantSeparator) { - if (AlleleCodec.valid(reference, alternate)) { + if (AlleleSnvCodec.valid(reference, alternate)) { return INT24_LENGTH; // interVariantSeparator not needed when coding alleles } else { return INT24_LENGTH + reference.length() + SEPARATOR_LENGTH + alternate.length() @@ -100,9 +100,9 @@ public int toBytes(Variant variant, byte[] bytes, int offset, boolean interVaria } protected int toBytes(int relativeStart, String reference, String alternate, byte[] bytes, int offset, boolean interVariantSeparator) { - if (AlleleCodec.valid(reference, alternate)) { + if (AlleleSnvCodec.valid(reference, alternate)) { int length = append24bitInteger(relativeStart, bytes, offset); - bytes[offset] |= AlleleCodec.encode(reference, alternate); + bytes[offset] |= AlleleSnvCodec.encode(reference, alternate); return length; } else { int length = 0; @@ -118,15 +118,17 @@ protected int toBytes(int relativeStart, String reference, String alternate, byt } public Variant toVariant(String chromosome, int batchStart, byte[] bytes) { - return toVariant(chromosome, batchStart, bytes, 0); + return toVariant(chromosome, batchStart, bytes, 0, bytes.length); } - public Variant toVariant(String chromosome, int batchStart, byte[] bytes, int offset) { + public Variant toVariant(String chromosome, int batchStart, byte[] bytes, int offset, int length) { if (hasEncodedAlleles(bytes, offset)) { return toVariantEncodedAlleles(chromosome, batchStart, bytes, offset); } else { - int referenceLength = readNextSeparator(bytes, offset + INT24_LENGTH); - int alternateLength = readNextSeparator(bytes, offset + INT24_LENGTH + referenceLength + SEPARATOR_LENGTH); + int currentOffset = INT24_LENGTH; + int referenceLength = readNextSeparator(bytes, offset + currentOffset, length - currentOffset); + currentOffset += referenceLength + SEPARATOR_LENGTH; + int alternateLength = readNextSeparator(bytes, offset + currentOffset, length - currentOffset); return toVariant(chromosome, batchStart, bytes, offset, referenceLength, alternateLength); } } @@ -680,12 +682,12 @@ public String toString() { } } - private static boolean hasEncodedAlleles(byte[] bytes, int offset) { + public static boolean hasEncodedAlleles(byte[] bytes, int offset) { return (bytes[offset] & 0xF0) != 0; } private Variant toVariantEncodedAlleles(String chromosome, int batchStart, byte[] bytes, int offset) { - String[] refAlt = AlleleCodec.decode(bytes[offset]); + String[] refAlt = AlleleSnvCodec.decode(bytes[offset]); int start = batchStart + (read24bitInteger(bytes, offset) & 0x0F_FF_FF); return VariantPhoenixKeyFactory.buildVariant(chromosome, start, refAlt[0], refAlt[1], null, null); @@ -702,12 +704,16 @@ private Variant toVariant(String chromosome, int batchStart, byte[] bytes, int o } private int readNextSeparator(byte[] bytes, int offset) { - for (int i = offset; i < bytes.length; i++) { + return readNextSeparator(bytes, offset, bytes.length - offset); + } + + private int readNextSeparator(byte[] bytes, int offset, int length) { + for (int i = offset; i < (offset + length); i++) { if (bytes[i] == 0) { return i - offset; } } - return bytes.length - offset; + return length - offset; } protected int getRelativeStart(Variant variant) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java index 5990cdb4f86..59021d89ee8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java @@ -35,7 +35,7 @@ public SampleVariantIndexEntry createSampleVariantIndexEntry(int sampleIdx, int BitBuffer fileIndexValue = createFileIndexValue(variant.getType(), filePosition, file.getData(), study.getSampleDataKeyPositions(), study.getSampleData(sampleIdx)); - ByteBuffer fileDataIndexValue = createFileDataIndexValue(filePosition, file.getCall(), + ByteBuffer fileDataIndexValue = createFileDataIndexValue(variant, filePosition, file.getCall(), study.getSecondaryAlternates()); return new SampleVariantIndexEntry(variant, fileIndexValue, fileDataIndexValue); @@ -45,7 +45,7 @@ public SampleVariantIndexEntry createSampleVariantIndexEntry( int filePosition, Variant variant, OriginalCall call, List alts, Function fileAttributes, Function sampleData) { BitBuffer fileIndexValue = createFileIndexValue(variant.getType(), filePosition, fileAttributes, sampleData); - ByteBuffer fileDataIndexValue = createFileDataIndexValue(filePosition, call, + ByteBuffer fileDataIndexValue = createFileDataIndexValue(variant, filePosition, call, alts); return new SampleVariantIndexEntry(variant, fileIndexValue, fileDataIndexValue); @@ -129,23 +129,23 @@ private BitBuffer createFileIndexValue(VariantType type, int filePosition, Funct * @param secondaryAlternates Secondary alternates * @return BitBuffer of file index. */ - private ByteBuffer createFileDataIndexValue(int filePosition, OriginalCall call, + private ByteBuffer createFileDataIndexValue(Variant variant, int filePosition, OriginalCall call, List secondaryAlternates) { // if (fileDataIndex.isSparse()) { // } int fileDataSize = 0; if (fileDataSchema.isIncludeOriginalCall()) { - fileDataSize += fileDataSchema.getOriginalCallField().getByteLength(call); + fileDataSize += fileDataSchema.getOriginalCallField().getByteLength(variant, call); } if (fileDataSchema.isIncludeSecondaryAlternates()) { - fileDataSize += fileDataSchema.getSecondaryAlternatesField().getByteLength(secondaryAlternates); + fileDataSize += fileDataSchema.getSecondaryAlternatesField().getByteLength(variant, secondaryAlternates); } ByteBuffer bb = ByteBuffer.allocate(fileDataSize); if (fileDataSchema.isIncludeOriginalCall()) { - fileDataSchema.getOriginalCallField().write(call, bb); + fileDataSchema.writeOriginalCall(variant, call, bb); } if (fileDataSchema.isIncludeSecondaryAlternates()) { - fileDataSchema.getSecondaryAlternatesField().write(secondaryAlternates, bb); + fileDataSchema.getSecondaryAlternatesField().write(variant, secondaryAlternates, bb); } return bb; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java index 3ecfaa7952e..33844f6ff01 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java @@ -12,17 +12,17 @@ public class DataSchemaTest { private DataSchema dataSchema; - private VarcharDataField key1; - private VarcharDataField key2; + private VarCharDataField key1; + private VarCharDataField key2; private IntegerDataField key3; - private VarcharDataField key4; + private VarCharDataField key4; @Before public void setUp() throws Exception { - key1 = new VarcharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key1", null)); - key2 = new VarcharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key2", null)); + key1 = new VarCharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key1", null)); + key2 = new VarCharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key2", null)); key3 = new IntegerDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key3", null)); - key4 = new VarcharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key4", null)); + key4 = new VarCharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key4", null)); dataSchema = new DataSchema() { { addField(key1); @@ -36,7 +36,7 @@ public void setUp() throws Exception { @Test public void readWriteDefault() { ExposedByteArrayOutputStream stream = new ExposedByteArrayOutputStream(); - for (DataField field : dataSchema.getFields()) { + for (DataFieldBase field : dataSchema.getFields()) { field.write(field.getDefault(), stream); } ByteBuffer byteByffer = stream.toByteByffer(); @@ -112,9 +112,9 @@ private void checkEntry(ByteBuffer expected, ByteBuffer readEntry, int key3Numbe readEntry.rewind(); // Random field access order - Assert.assertEquals("key4_value", dataSchema.readField(readEntry, key4)); - Assert.assertEquals("key1_value", dataSchema.readField(readEntry, key1)); - Assert.assertEquals(key3NumberValue, dataSchema.readField(readEntry, key3).intValue()); - Assert.assertEquals("key2_value", dataSchema.readField(readEntry, key2)); + Assert.assertEquals("key4_value", dataSchema.readFieldAndDecode(readEntry, key4)); + Assert.assertEquals("key1_value", dataSchema.readFieldAndDecode(readEntry, key1)); + Assert.assertEquals(key3NumberValue, dataSchema.readFieldAndDecode(readEntry, key3).intValue()); + Assert.assertEquals("key2_value", dataSchema.readFieldAndDecode(readEntry, key2)); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataFieldTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataFieldTest.java new file mode 100644 index 00000000000..3110fc65f49 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataFieldTest.java @@ -0,0 +1,53 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import java.nio.ByteBuffer; + +@Category(ShortTests.class) +public class VarBinaryDataFieldTest { + + private VarBinaryDataField field; + + @Before + public void setUp() { + field = new VarBinaryDataField(null); + } + + @Test + public void testEncode() { + testEncode(ByteBuffer.wrap(new byte[]{1, 2, 3, 4, 5})); + testEncode(ByteBuffer.wrap(new byte[]{})); + testEncode(ByteBuffer.wrap(new byte[]{0})); + testEncode(ByteBuffer.wrap(new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0})); + } + + private void testEncode(ByteBuffer value) { + ByteBuffer encode = field.encode(value); + ByteBuffer decode = field.decode(encode); + + Assert.assertEquals(value, decode); + + int byteLength = field.getByteLength(value); + Assert.assertEquals(byteLength, encode.limit()); + + ByteBuffer buffer = ByteBuffer.allocate(byteLength); + field.write(value, buffer); + buffer.rewind(); + + ByteBuffer actualValue = field.readAndDecode(buffer); + buffer.rewind(); + Assert.assertEquals(value, actualValue); + + ByteBuffer readUndecoded = field.read(buffer); + buffer.rewind(); + actualValue = field.decode(readUndecoded); + Assert.assertEquals(value, actualValue); + } + + +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java index 4afea856064..bb865f4a0a1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataFieldTest.java @@ -3,9 +3,12 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; import java.nio.ByteBuffer; +@Category(ShortTests.class) public class VarIntDataFieldTest { private VarIntDataField field; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodecTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodecTest.java deleted file mode 100644 index 573eeeae2bc..00000000000 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleCodecTest.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.opencb.opencga.storage.hadoop.variant.index.sample; - -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import static org.junit.Assert.*; - -@Category(ShortTests.class) -public class AlleleCodecTest { - - @Test - public void codec() { - - String[] alleles = {"A", "C", "G", "T"}; - - for (String ref : alleles) { - for (String alt : alleles) { - if (ref.equals(alt)) { - continue; - } - assertTrue(AlleleCodec.valid(ref, alt)); - byte code = AlleleCodec.encode(ref, alt); -// System.out.println(ref + ", " + alt + " " + IndexUtils.byteToString(code)); - assertArrayEquals(new String[]{ref, alt}, AlleleCodec.decode(code)); - } - } - } - - @Test - public void validAlleles() { - assertTrue(AlleleCodec.validAllele("A")); - assertTrue(AlleleCodec.validAllele("C")); - assertTrue(AlleleCodec.validAllele("G")); - assertTrue(AlleleCodec.validAllele("T")); - - assertFalse(AlleleCodec.validAllele("")); - assertFalse(AlleleCodec.validAllele("N")); - assertFalse(AlleleCodec.validAllele("Z")); - assertFalse(AlleleCodec.validAllele("~")); - assertFalse(AlleleCodec.validAllele("0")); - - assertTrue(AlleleCodec.valid("C", "A")); - - assertFalse(AlleleCodec.valid("A", "A")); - assertFalse(AlleleCodec.valid("A", "N")); - assertFalse(AlleleCodec.valid("", "C")); - assertFalse(AlleleCodec.valid("A", "")); - - } - -} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodecTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodecTest.java new file mode 100644 index 00000000000..9131e86c5cf --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AlleleSnvCodecTest.java @@ -0,0 +1,52 @@ +package org.opencb.opencga.storage.hadoop.variant.index.sample; + +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import static org.junit.Assert.*; + +@Category(ShortTests.class) +public class AlleleSnvCodecTest { + + @Test + public void codec() { + + String[] alleles = {"A", "C", "G", "T"}; + + for (String ref : alleles) { + for (String alt : alleles) { + if (ref.equals(alt)) { + continue; + } + assertTrue(AlleleSnvCodec.valid(ref, alt)); + byte code = AlleleSnvCodec.encode(ref, alt); +// System.out.println(ref + ", " + alt + " " + IndexUtils.byteToString(code)); + assertArrayEquals(new String[]{ref, alt}, AlleleSnvCodec.decode(code)); + } + } + } + + @Test + public void validAlleles() { + assertTrue(AlleleSnvCodec.validAllele("A")); + assertTrue(AlleleSnvCodec.validAllele("C")); + assertTrue(AlleleSnvCodec.validAllele("G")); + assertTrue(AlleleSnvCodec.validAllele("T")); + + assertFalse(AlleleSnvCodec.validAllele("")); + assertFalse(AlleleSnvCodec.validAllele("N")); + assertFalse(AlleleSnvCodec.validAllele("Z")); + assertFalse(AlleleSnvCodec.validAllele("~")); + assertFalse(AlleleSnvCodec.validAllele("0")); + + assertTrue(AlleleSnvCodec.valid("C", "A")); + + assertFalse(AlleleSnvCodec.valid("A", "A")); + assertFalse(AlleleSnvCodec.valid("A", "N")); + assertFalse(AlleleSnvCodec.valid("", "C")); + assertFalse(AlleleSnvCodec.valid("A", "")); + + } + +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java new file mode 100644 index 00000000000..4d93133a1d3 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java @@ -0,0 +1,50 @@ +package org.opencb.opencga.storage.hadoop.variant.index.sample; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.OriginalCall; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import java.nio.ByteBuffer; + +import static org.junit.Assert.*; + +@Category(ShortTests.class) +public class FileDataIndexSchemaTest { + + @Before + public void setUp() throws Exception { + } + + @Test + public void testOriginalCallConverter() { + testOriginalCallEncoding(new Variant("1:12345678:T:-"), new OriginalCall("1:12345678:TC:C", 0)); + testOriginalCallEncoding(new Variant("1:12000078:T:-"), new OriginalCall("1:12000078:TC:C", 0)); + testOriginalCallEncoding(new Variant("1:12345678:T:-"), new OriginalCall("1:12345678:TC:C,T", 2)); + testOriginalCallEncoding(new Variant("1:12345670:-:T"), new OriginalCall("1:12345678:TTTTTTTTTTTTTTTTTTC:C,TC", 2)); + testOriginalCallEncoding(new Variant("1:12345679:-:T"), new OriginalCall("1:12345670:CTTTTTTTTTTTTTTTTTT:C,CT", 2)); + testOriginalCallEncoding(new Variant("1:12345679:CCCTCCTCTGAGTCTTCCTCCCCTTCCCGTG:-"), new OriginalCall("1:12345670:ACCCTCCTCTGAGTCTTCCTCCCCTTCCCGTG:A", 2)); + testOriginalCallEncoding(new Variant("1:12345601-12345625:C:"), new OriginalCall("1:12345600-12345625:A:", 2)); + testOriginalCallEncoding(new Variant("1:12345679:CCCTCCTCTGAGTCTTCCTCCCCTTCCCGTG:-"), new OriginalCall("1:12345670:ACCCTCCTCTGAGTCTTCCTCCCCTTCCCGTG:A]chr1:1234]", 2)); + testOriginalCallEncoding(new Variant("1:12345679:CCCTCCTCTGAGTCTTCCTCCCCTTCCCGTG:-"), new OriginalCall("chr1:12345670:ACCCTCCTCTGAGTCTTCCTCCCCTTCCCGTG:A", 2)); + } + + private static void testOriginalCallEncoding(Variant variant, OriginalCall expected) { + FileDataIndexSchema.VariantOriginalCallToBytesConverter cpair = new FileDataIndexSchema.VariantOriginalCallToBytesConverter(); + + Pair pair = cpair.to(Pair.of(variant, expected)); + System.out.println("Bytes2 length : " + pair.getValue().limit()); + System.out.println("bytes2 = " + pair.getValue() + " - " + Bytes.toStringBinary(pair.getValue())); + OriginalCall actualFromPair = cpair.from(pair).getValue(); + System.out.println(actualFromPair); + System.out.println("----"); + assertEquals(expected, actualFromPair); + + } + + +} \ No newline at end of file From add80380300b931458dd4b754ca4c1e9066565b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 9 Sep 2024 16:28:19 +0100 Subject: [PATCH 05/19] storage: Add sampleIndexConfiguration.fileDataConfiguration. #TASK-6765 --- .../storage/SampleIndexConfiguration.java | 204 ++++++------------ .../index/sample/FileDataIndexSchema.java | 18 +- .../sample/HBaseToSampleIndexConverter.java | 7 +- .../index/sample/SampleIndexDBAdaptor.java | 1 + .../sample/SampleIndexEntryPutBuilder.java | 20 +- .../index/sample/SampleIndexSchema.java | 3 +- .../SampleVariantIndexEntryConverter.java | 5 +- .../variant/index/sample/SampleIndexTest.java | 16 +- 8 files changed, 109 insertions(+), 165 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java index 21178f7fdf4..cb89309369c 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java @@ -15,147 +15,12 @@ public class SampleIndexConfiguration { public static final int DEFAULT_FILE_POSITION_SIZE_BITS = 3; private static final double[] QUAL_THRESHOLDS = new double[]{10, 20, 30}; - private static final double[] DP_THRESHOLDS = new double[]{5, 10, 15, 20, 30, 40, 50}; private static final double[] DP_THRESHOLDS_NULLABLE = new double[]{5, 10, 15, 20, 30, 50}; + private final FileIndexConfiguration fileIndexConfiguration = new FileIndexConfiguration(); + private final FileDataConfiguration fileDataConfiguration = new FileDataConfiguration(); private final AnnotationIndexConfiguration annotationIndexConfiguration = new AnnotationIndexConfiguration(); - public static SampleIndexConfiguration backwardCompatibleConfiguration() { - double[] backwardCompatibleThresholds = new double[]{0.001, 0.005, 0.01}; - SampleIndexConfiguration sampleIndexConfiguration = new SampleIndexConfiguration() - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.FILE, - StudyEntry.FILTER, - IndexFieldConfiguration.Type.CATEGORICAL, - VCFConstants.PASSES_FILTERS_v4)) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL, QUAL_THRESHOLDS).setNullable(false)) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY, DP_THRESHOLDS).setNullable(false)); - sampleIndexConfiguration.getAnnotationIndexConfiguration().getPopulationFrequency() - .addPopulation(new Population(ParamConstants.POP_FREQ_1000G_CB_V4, "ALL")) - .addPopulation(new Population(ParamConstants.POP_FREQ_GNOMAD_GENOMES, "ALL")) - .setThresholds(backwardCompatibleThresholds); - - sampleIndexConfiguration.getFileIndexConfiguration().setFilePositionBits(4); - - // Ensure backward compatibility with these two params: - sampleIndexConfiguration.addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.SAMPLE, "padding", IndexFieldConfiguration.Type.CATEGORICAL, - "add_two_extra_bits", "to_allow_backward", "compatibility")); - sampleIndexConfiguration.getFileIndexConfiguration().setFixedFieldsFirst(false); - - IndexFieldConfiguration biotypeConfiguration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.ANNOTATION, - "biotype", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) - .setValues( - NONSENSE_MEDIATED_DECAY, - LINCRNA, - MIRNA, - RETAINED_INTRON, - SNRNA, - SNORNA, - "other_non_pseudo_gene", -// "other", - PROTEIN_CODING - ).setValuesMapping(new HashMap<>()); - biotypeConfiguration.getValuesMapping().put(LINCRNA, Arrays.asList( - "lncRNA", - NON_CODING, - LINCRNA, - "macro_lncRNA", - ANTISENSE, - SENSE_INTRONIC, - SENSE_OVERLAPPING, - THREEPRIME_OVERLAPPING_NCRNA, - "bidirectional_promoter_lncRNA")); - biotypeConfiguration.getValuesMapping().put("other_non_pseudo_gene", Arrays.asList( - PROCESSED_TRANSCRIPT, - NON_STOP_DECAY, - MISC_RNA, - RRNA, - MT_RRNA, - MT_TRNA, - IG_C_GENE, - IG_D_GENE, - IG_J_GENE, - IG_V_GENE, - TR_C_GENE, - TR_D_GENE, - TR_J_GENE, - TR_V_GENE, - NMD_TRANSCRIPT_VARIANT, - TRANSCRIBED_UNPROCESSED_PSEUDGENE, - AMBIGUOUS_ORF, - KNOWN_NCRNA, - RETROTRANSPOSED, - LRG_GENE - )); - biotypeConfiguration.setNullable(false); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setBiotype(biotypeConfiguration); - IndexFieldConfiguration consequenceType = new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, - "consequenceType", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) - .setValues( - SPLICE_DONOR_VARIANT, - TRANSCRIPT_ABLATION, - TRANSCRIPT_AMPLIFICATION, - INITIATOR_CODON_VARIANT, - SPLICE_REGION_VARIANT, - INCOMPLETE_TERMINAL_CODON_VARIANT, - "utr", - "mirna_tfbs", - MISSENSE_VARIANT, - FRAMESHIFT_VARIANT, - INFRAME_DELETION, - INFRAME_INSERTION, - START_LOST, - STOP_GAINED, - STOP_LOST, - SPLICE_ACCEPTOR_VARIANT - ).setValuesMapping(new HashMap<>()); - consequenceType.getValuesMapping().put("mirna_tfbs", Arrays.asList( - TF_BINDING_SITE_VARIANT, - MATURE_MIRNA_VARIANT)); - consequenceType.getValuesMapping().put("utr", Arrays.asList( - THREE_PRIME_UTR_VARIANT, - FIVE_PRIME_UTR_VARIANT)); - consequenceType.setNullable(false); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setConsequenceType(consequenceType); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setTranscriptFlagIndexConfiguration( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, - "transcriptFlag", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, - "do_not_use" - ).setNullable(false)); - sampleIndexConfiguration.getAnnotationIndexConfiguration().setTranscriptCombination(false); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setClinicalSource( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, "clinicalSource", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, "cosmic") - .setNullable(false)); - sampleIndexConfiguration.getAnnotationIndexConfiguration().setClinicalSignificance( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, "clinicalSignificance", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, - ClinicalSignificance.likely_benign.toString(), - ClinicalSignificance.uncertain_significance.toString(), - ClinicalSignificance.likely_pathogenic.toString(), - ClinicalSignificance.pathogenic.toString(), - "unused_target_drug", - "unused_pgx", - "unused_bit8" - ).setNullable(false)); - - return sampleIndexConfiguration; - } - public static SampleIndexConfiguration defaultConfiguration() { return defaultConfiguration(false); } @@ -178,6 +43,9 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) sampleIndexConfiguration.getFileIndexConfiguration() .setFilePositionBits(DEFAULT_FILE_POSITION_SIZE_BITS); + sampleIndexConfiguration.getFileDataConfiguration() + .setIncludeOriginalCall(true) + .setIncludeSecondaryAlternates(true); IndexFieldConfiguration biotypeConfiguration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.ANNOTATION, "biotype", @@ -312,10 +180,6 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) return sampleIndexConfiguration; } - public void validate() { - validate(null); - } - public void validate(String cellbaseVersion) { addMissingValues(defaultConfiguration("v4".equalsIgnoreCase(cellbaseVersion))); @@ -336,6 +200,13 @@ public void addMissingValues(SampleIndexConfiguration defaultConfiguration) { if (fileIndexConfiguration.getCustomFields().isEmpty()) { fileIndexConfiguration.getCustomFields().addAll(defaultConfiguration.fileIndexConfiguration.customFields); } + if (fileDataConfiguration.includeOriginalCall == null) { + fileDataConfiguration.includeOriginalCall = defaultConfiguration.fileDataConfiguration.includeOriginalCall; + } + if (fileDataConfiguration.includeSecondaryAlternates == null) { + fileDataConfiguration.includeSecondaryAlternates = defaultConfiguration.fileDataConfiguration.includeSecondaryAlternates; + } + if (annotationIndexConfiguration.getPopulationFrequency() == null) { annotationIndexConfiguration.setPopulationFrequency(defaultConfiguration.annotationIndexConfiguration.populationFrequency); } @@ -368,6 +239,53 @@ public void addMissingValues(SampleIndexConfiguration defaultConfiguration) { } } + public static class FileDataConfiguration { + private Boolean includeOriginalCall; + private Boolean includeSecondaryAlternates; + + public FileDataConfiguration() { + // By default, left as null. + // The defaultConfiguration will set it to true when constructed. + this.includeOriginalCall = null; + this.includeSecondaryAlternates = null; + } + + public Boolean getIncludeOriginalCall() { + return includeOriginalCall; + } + + public FileDataConfiguration setIncludeOriginalCall(Boolean includeOriginalCall) { + this.includeOriginalCall = includeOriginalCall; + return this; + } + + public boolean isIncludeOriginalCall() { + return includeOriginalCall != null && includeOriginalCall; + } + + public Boolean getIncludeSecondaryAlternates() { + return includeSecondaryAlternates; + } + + public FileDataConfiguration setIncludeSecondaryAlternates(Boolean includeSecondaryAlternates) { + this.includeSecondaryAlternates = includeSecondaryAlternates; + return this; + } + + public boolean isIncludeSecondaryAlternates() { + return includeSecondaryAlternates != null && includeSecondaryAlternates; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("FileDataConfiguration{"); + sb.append("includeOriginalCall=").append(includeOriginalCall); + sb.append(", includeSecondaryAlternates=").append(includeSecondaryAlternates); + sb.append('}'); + return sb.toString(); + } + } + public static class FileIndexConfiguration { private final List customFields = new ArrayList<>(); @@ -687,6 +605,9 @@ public FileIndexConfiguration getFileIndexConfiguration() { return fileIndexConfiguration; } + public FileDataConfiguration getFileDataConfiguration() { + return fileDataConfiguration; + } public SampleIndexConfiguration addFileIndexField(IndexFieldConfiguration fileIndex) { if (fileIndexConfiguration.getCustomFields().contains(fileIndex)) { @@ -719,6 +640,7 @@ public int hashCode() { public String toString() { final StringBuilder sb = new StringBuilder("SampleIndexConfiguration{"); sb.append("fileIndexConfiguration=").append(fileIndexConfiguration); + sb.append("fileDataConfiguration=").append(fileDataConfiguration); sb.append(", annotationIndexConfiguration=").append(annotationIndexConfiguration); sb.append('}'); return sb.toString(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java index 8df0628647c..371657ae3bf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java @@ -21,11 +21,11 @@ public class FileDataIndexSchema extends DataSchema { private final DataFieldWithContext originalCallField; private final DataFieldWithContext> secondaryAlternatesField; - private boolean includeOriginalCall = true; - private boolean includeSecondaryAlternates = true; + private final SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration; - public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileIndexConfiguration) { - if (includeOriginalCall) { + public FileDataIndexSchema(SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration) { + this.fileDataConfiguration = fileDataConfiguration; + if (fileDataConfiguration.isIncludeOriginalCall()) { originalCallField = new VarBinaryDataField( new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "ORIGINAL_CALL", null)) .fromWithContext(new VariantOriginalCallToBytesConverter()); @@ -33,7 +33,7 @@ public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileI } else { originalCallField = null; } - if (includeSecondaryAlternates) { + if (fileDataConfiguration.isIncludeOriginalCall()) { secondaryAlternatesField = new VarBinaryDataField( new IndexFieldConfiguration(IndexFieldConfiguration.Source.STUDY, "SECONDARY_ALTERNATES", null)) .fromWithContext(new AlternateCoordinateToBytesConverter()); @@ -44,7 +44,7 @@ public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileI } public boolean isIncludeOriginalCall() { - return includeOriginalCall; + return fileDataConfiguration.isIncludeOriginalCall(); } public DataFieldWithContext getOriginalCallField() { @@ -52,17 +52,13 @@ public DataFieldWithContext getOriginalCallField() { } public boolean isIncludeSecondaryAlternates() { - return includeSecondaryAlternates; + return fileDataConfiguration.isIncludeSecondaryAlternates(); } public DataFieldWithContext> getSecondaryAlternatesField() { return secondaryAlternatesField; } - public void writeOriginalCall(Variant variant, OriginalCall call, ByteBuffer bb) { - getOriginalCallField().write(variant, call, bb); - } - public OriginalCall readOriginalCall(ByteBuffer fileDataByteBuffer, Variant variant) { return readFieldAndDecode(fileDataByteBuffer, originalCallField, variant); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java index f6d9fd64638..033105e49a9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java @@ -165,7 +165,12 @@ public Map> convertToMapSampleVariantIn BitBuffer fileIndexEntry; do { fileIndexEntry = fileIndexSchema.readEntry(fileIndexStream); - ByteBuffer fileDataEntry = fileDataSchema.readNextEntry(fileDataBuffer); + ByteBuffer fileDataEntry; + if (fileDataBuffer == null) { + fileDataEntry = null; + } else { + fileDataEntry = fileDataSchema.readNextEntry(fileDataBuffer); + } values.add(new SampleVariantIndexEntry(variant, fileIndexEntry, fileDataEntry)); } while (this.fileIndexSchema.isMultiFile(fileIndexEntry)); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 1280bc187d3..8218ca08a6c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -636,6 +636,7 @@ private Scan parse(SingleSampleIndexQuery query, LocusQuery locusQuery, boolean scan.setCaching(hBaseManager.getConf().getInt("hbase.client.scanner.caching", 100)); logger.info("---------"); + logger.info("Study = \"" + query.getStudy() + "\" (id=" + studyId + ")"); logger.info("Sample = \"" + query.getSample() + "\" (id=" + sampleId + ") , schema version = " + query.getSchema().getVersion()); logger.info("Table = " + getSampleIndexTableName(query)); printScan(scan); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java index e62c546120f..20e04c2c401 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java @@ -181,10 +181,12 @@ public void build(Put put) { put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeCountColumn(gt), Bytes.toBytes(variants.size())); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toFileIndexColumn(gt), fileIndexBuffer.getBuffer()); int position = fileDataIndexBuffer.position(); - fileDataIndexBuffer.rewind(); - fileDataIndexBuffer.limit(position); - put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), put.getTimestamp(), - fileDataIndexBuffer); + if (position > 0) { + fileDataIndexBuffer.rewind(); + fileDataIndexBuffer.limit(position); + put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), put.getTimestamp(), + fileDataIndexBuffer); + } } } @@ -404,7 +406,9 @@ private void partialBuild(boolean flush) { fileIndexBuffer.setBitBuffer(gtEntry.getFilesIndex().get(0), offset); offset += fileIndexSchema.getBitsLength(); prev = gtEntry; - fileDataIndexSchema.writeEntry(fileDataBuffer, gtEntry.getFileData().get(0)); + if (!gtEntry.getFileData().isEmpty()) { + fileDataIndexSchema.writeEntry(fileDataBuffer, gtEntry.getFileData().get(0)); + } } // Do not write the whole buffer, but only the corresponding to the processed entries. @@ -422,8 +426,10 @@ public void build(Put put) { put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeColumn(gt), variantsBuffer); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeCountColumn(gt), Bytes.toBytes(variantsCount)); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toFileIndexColumn(gt), fileIndexBuffer.toByteArray()); - put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), - put.getTimestamp(), fileDataBuffer.toByteByffer()); + if (fileDataBuffer.size() > 0) { + put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), + put.getTimestamp(), fileDataBuffer.toByteByffer()); + } } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index bd57ae196fb..2d4d700cf76 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -144,7 +144,7 @@ public SampleIndexSchema(SampleIndexConfiguration configuration, int version) { this.version = version; this.configuration = configuration; fileIndex = new FileIndexSchema(configuration.getFileIndexConfiguration()); - fileData = new FileDataIndexSchema(configuration.getFileIndexConfiguration()); + fileData = new FileDataIndexSchema(configuration.getFileDataConfiguration()); // annotationSummaryIndexSchema = new AnnotationSummaryIndexSchema(); ctIndex = new ConsequenceTypeIndexSchema(configuration.getAnnotationIndexConfiguration().getConsequenceType()); biotypeIndex = new BiotypeIndexSchema(configuration.getAnnotationIndexConfiguration().getBiotype()); @@ -218,6 +218,7 @@ public String toString() { sb.append("version=").append(version); sb.append(", configuration=").append(configuration); sb.append(", fileIndex=").append(fileIndex); + sb.append(", fileData=").append(fileData); sb.append(", popFreqIndex=").append(popFreqIndex); sb.append(", ctIndex=").append(ctIndex); sb.append(", biotypeIndex=").append(biotypeIndex); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java index 59021d89ee8..8916ad975fe 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java @@ -140,9 +140,12 @@ private ByteBuffer createFileDataIndexValue(Variant variant, int filePosition, O if (fileDataSchema.isIncludeSecondaryAlternates()) { fileDataSize += fileDataSchema.getSecondaryAlternatesField().getByteLength(variant, secondaryAlternates); } + if (fileDataSize == 0) { + return null; + } ByteBuffer bb = ByteBuffer.allocate(fileDataSize); if (fileDataSchema.isIncludeOriginalCall()) { - fileDataSchema.writeOriginalCall(variant, call, bb); + fileDataSchema.getOriginalCallField().write(variant, call, bb); } if (fileDataSchema.isIncludeSecondaryAlternates()) { fileDataSchema.getSecondaryAlternatesField().write(variant, secondaryAlternates, bb); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index 55d9d98a0f8..c7660e2475a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -158,8 +158,12 @@ public void load() throws Exception { .append(VariantStorageOptions.STATS_CALCULATE.key(), false) .append(VariantStorageOptions.LOAD_SPLIT_DATA.key(), VariantStorageEngine.SplitData.MULTI); - versioned = metadataManager.addSampleIndexConfiguration(STUDY_NAME_2, SampleIndexConfiguration.defaultConfiguration() - .addFileIndexField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.SAMPLE, "DS", new double[]{0, 1, 2})), true); + SampleIndexConfiguration configuration = SampleIndexConfiguration.defaultConfiguration() + .addFileIndexField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.SAMPLE, "DS", new double[]{0, 1, 2})); + configuration.getFileDataConfiguration() + .setIncludeOriginalCall(null) + .setIncludeSecondaryAlternates(null); + versioned = metadataManager.addSampleIndexConfiguration(STUDY_NAME_2, configuration, true); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.STAGING, versioned.getStatus()); @@ -227,7 +231,7 @@ public void load() throws Exception { // Study 1 - extra sample index configuration, not staging, only one sample in that configuration - SampleIndexConfiguration configuration = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest().getConfiguration(); + configuration = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest().getConfiguration(); // Don't modify the configuration. versioned = engine.getMetadataManager().addSampleIndexConfiguration(STUDY_NAME, configuration, true); assertEquals(2, versioned.getVersion()); @@ -250,11 +254,17 @@ public void load() throws Exception { versioned = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest(false); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE, versioned.getStatus()); + // No fileData fields should be null + assertNotNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeOriginalCall()); + assertNotNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeSecondaryAlternates()); // Study 2 - Latest should be active versioned = metadataManager.getStudyMetadata(STUDY_NAME_2).getSampleIndexConfiguration(versioned.getVersion()); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE, versioned.getStatus()); + // Both fileData fields should be null + assertNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeOriginalCall()); + assertNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeSecondaryAlternates()); // Study 3 - Latest should be active versioned = metadataManager.getStudyMetadata(STUDY_NAME_3).getSampleIndexConfiguration(versioned.getVersion()); From 3bff019ded4690d736893170d01027c7e50696f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 9 Sep 2024 22:54:35 +0100 Subject: [PATCH 06/19] storage: Fix junit tests. #TASK-6765 --- .../sample/SampleIndexVariantBiConverter.java | 18 ++++++------------ .../mr/VariantLocalConflictResolverTest.java | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java index d354900fa06..b3a41a9b3b8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java @@ -118,17 +118,15 @@ protected int toBytes(int relativeStart, String reference, String alternate, byt } public Variant toVariant(String chromosome, int batchStart, byte[] bytes) { - return toVariant(chromosome, batchStart, bytes, 0, bytes.length); + return toVariant(chromosome, batchStart, bytes, 0); } - public Variant toVariant(String chromosome, int batchStart, byte[] bytes, int offset, int length) { + public Variant toVariant(String chromosome, int batchStart, byte[] bytes, int offset) { if (hasEncodedAlleles(bytes, offset)) { return toVariantEncodedAlleles(chromosome, batchStart, bytes, offset); } else { - int currentOffset = INT24_LENGTH; - int referenceLength = readNextSeparator(bytes, offset + currentOffset, length - currentOffset); - currentOffset += referenceLength + SEPARATOR_LENGTH; - int alternateLength = readNextSeparator(bytes, offset + currentOffset, length - currentOffset); + int referenceLength = readNextSeparator(bytes, offset + INT24_LENGTH); + int alternateLength = readNextSeparator(bytes, offset + INT24_LENGTH + referenceLength + SEPARATOR_LENGTH); return toVariant(chromosome, batchStart, bytes, offset, referenceLength, alternateLength); } } @@ -704,16 +702,12 @@ private Variant toVariant(String chromosome, int batchStart, byte[] bytes, int o } private int readNextSeparator(byte[] bytes, int offset) { - return readNextSeparator(bytes, offset, bytes.length - offset); - } - - private int readNextSeparator(byte[] bytes, int offset, int length) { - for (int i = offset; i < (offset + length); i++) { + for (int i = offset; i < bytes.length; i++) { if (bytes[i] == 0) { return i - offset; } } - return length - offset; + return bytes.length - offset; } protected int getRelativeStart(Variant variant) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/archive/mr/VariantLocalConflictResolverTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/archive/mr/VariantLocalConflictResolverTest.java index f716adb0110..a450c22c929 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/archive/mr/VariantLocalConflictResolverTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/archive/mr/VariantLocalConflictResolverTest.java @@ -169,7 +169,7 @@ public void resolveConflictIndelCase1() throws Exception { se.setSampleDataKeys(Arrays.asList(GENOTYPE_KEY, GENOTYPE_FILTER_KEY)); se.setSamplesPosition(asMap("S1", 0)); se.addSampleData("S1", Arrays.asList("1/2", "LowGQXHetDel")); - se.getSecondaryAlternates().add(new AlternateCoordinate(null, null, 328, "CTT", "CTTTC", INDEL)); + se.getSecondaryAlternates().add(new AlternateCoordinate(null, 328, 330, "CTT", "CTTTC", INDEL)); addAttribute(v1, FILTER, "LowGQXHetDel"); From 42fcd2b96951db20d52a24542581af76b1916c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 10 Sep 2024 00:52:33 +0100 Subject: [PATCH 07/19] storage: Add VariantQueryParam "source" #TASK-6765 --- .../manager/VariantCatalogQueryUtils.java | 54 +-- .../options/ClinicalCommandOptions.java | 1 + .../options/VariantCommandOptions.java | 45 ++- .../AnalysisClinicalCommandExecutor.java | 1 + .../AnalysisVariantCommandExecutor.java | 1 + .../AnalysisClinicalCommandOptions.java | 5 +- .../AnalysisVariantCommandOptions.java | 317 +++++++++-------- .../src/main/R/R/Clinical-methods.R | 5 +- opencga-client/src/main/R/R/Variant-methods.R | 9 +- .../rest/clients/ClinicalAnalysisClient.java | 8 +- .../client/rest/clients/VariantClient.java | 16 +- .../src/main/javascript/ClinicalAnalysis.js | 7 +- opencga-client/src/main/javascript/Variant.js | 15 +- .../rest_clients/clinical_analysis_client.py | 10 +- .../pyopencga/rest_clients/variant_client.py | 18 +- .../variant/AnnotationVariantQueryParams.java | 29 ++ .../variant/BasicVariantQueryParams.java | 4 + .../SampleVariantStatsAnalysisParams.java | 22 +- .../models/variant/VariantQueryParams.java | 275 ++++++++++++++ .../rest/analysis/ClinicalWebService.java | 121 +++---- .../rest/analysis/VariantWebService.java | 334 +++++++++--------- .../VariantOperationWebService.java | 9 +- .../options/StorageVariantCommandOptions.java | 104 +++--- .../core/variant/VariantStorageEngine.java | 2 +- .../core/variant/adaptors/VariantQuery.java | 15 + .../variant/adaptors/VariantQueryParam.java | 279 +-------------- .../variant/query/ParsedVariantQuery.java | 11 + .../variant/query/VariantQueryParser.java | 1 + .../variant/query/VariantQuerySource.java | 39 ++ .../core/variant/query/VariantQueryUtils.java | 3 +- .../query/executors/VariantQueryExecutor.java | 23 +- .../SampleIndexOnlyVariantQueryExecutor.java | 39 +- .../variant/index/sample/SampleIndexTest.java | 44 ++- 33 files changed, 1042 insertions(+), 824 deletions(-) create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQuerySource.java diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java index b26a7f6beff..e73cb6e71c4 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java @@ -56,6 +56,7 @@ import org.opencb.opencga.core.models.sample.SamplePermissions; import org.opencb.opencga.core.models.study.Study; import org.opencb.opencga.core.models.user.UserFilter; +import org.opencb.opencga.core.models.variant.VariantQueryParams; import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -89,70 +90,45 @@ */ public class VariantCatalogQueryUtils extends CatalogUtils { - public static final String SAMPLE_ANNOTATION_DESC = - "Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith"; public static final QueryParam SAMPLE_ANNOTATION - = QueryParam.create("sampleAnnotation", SAMPLE_ANNOTATION_DESC, QueryParam.Type.TEXT_ARRAY); - public static final String PROJECT_DESC = ParamConstants.PROJECT_DESCRIPTION; - public static final QueryParam PROJECT = QueryParam.create(ParamConstants.PROJECT_PARAM, PROJECT_DESC, QueryParam.Type.TEXT_ARRAY); + = QueryParam.create("sampleAnnotation", VariantQueryParams.SAMPLE_ANNOTATION_DESC, QueryParam.Type.TEXT_ARRAY); + public static final QueryParam PROJECT = QueryParam.create(ParamConstants.PROJECT_PARAM, VariantQueryParams.PROJECT_DESC, QueryParam.Type.TEXT_ARRAY); - public static final String FAMILY_DESC = "Filter variants where any of the samples from the given family contains the variant " - + "(HET or HOM_ALT)"; public static final QueryParam FAMILY = - QueryParam.create("family", FAMILY_DESC, QueryParam.Type.TEXT); - public static final String FAMILY_MEMBERS_DESC = "Sub set of the members of a given family"; + QueryParam.create("family", VariantQueryParams.FAMILY_DESC, QueryParam.Type.TEXT); public static final QueryParam FAMILY_MEMBERS = - QueryParam.create("familyMembers", FAMILY_MEMBERS_DESC, QueryParam.Type.TEXT); - public static final String FAMILY_DISORDER_DESC = "Specify the disorder to use for the family segregation"; + QueryParam.create("familyMembers", VariantQueryParams.FAMILY_MEMBERS_DESC, QueryParam.Type.TEXT); public static final QueryParam FAMILY_DISORDER = - QueryParam.create("familyDisorder", FAMILY_DISORDER_DESC, QueryParam.Type.TEXT); - public static final String FAMILY_PROBAND_DESC = "Specify the proband child to use for the family segregation"; + QueryParam.create("familyDisorder", VariantQueryParams.FAMILY_DISORDER_DESC, QueryParam.Type.TEXT); public static final QueryParam FAMILY_PROBAND = - QueryParam.create("familyProband", FAMILY_PROBAND_DESC, QueryParam.Type.TEXT); - public static final String FAMILY_SEGREGATION_DESCR = "Filter by segregation mode from a given family. Accepted values: " - + "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, " - + "deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]"; + QueryParam.create("familyProband", VariantQueryParams.FAMILY_PROBAND_DESC, QueryParam.Type.TEXT); public static final QueryParam FAMILY_SEGREGATION = - QueryParam.create("familySegregation", FAMILY_SEGREGATION_DESCR, QueryParam.Type.TEXT); + QueryParam.create("familySegregation", VariantQueryParams.FAMILY_SEGREGATION_DESCR, QueryParam.Type.TEXT); - public static final String SAVED_FILTER_DESCR = "Use a saved filter at User level"; public static final QueryParam SAVED_FILTER = - QueryParam.create("savedFilter", SAVED_FILTER_DESCR, QueryParam.Type.TEXT); + QueryParam.create("savedFilter", VariantQueryParams.SAVED_FILTER_DESCR, QueryParam.Type.TEXT); @Deprecated public static final QueryParam FAMILY_PHENOTYPE = FAMILY_DISORDER; @Deprecated public static final QueryParam MODE_OF_INHERITANCE = FAMILY_SEGREGATION; - public static final String PANEL_DESC = "Filter by genes from the given disease panel"; public static final QueryParam PANEL = - QueryParam.create("panel", PANEL_DESC, QueryParam.Type.TEXT); - public static final String PANEL_MOI_DESC = "Filter genes from specific panels that match certain mode of inheritance. " + - "Accepted values : " - + "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, " - + "deNovo, mendelianError, compoundHeterozygous ]"; + QueryParam.create("panel", VariantQueryParams.PANEL_DESC, QueryParam.Type.TEXT); public static final QueryParam PANEL_MODE_OF_INHERITANCE = - QueryParam.create("panelModeOfInheritance", PANEL_MOI_DESC + QueryParam.create("panelModeOfInheritance", VariantQueryParams.PANEL_MOI_DESC , QueryParam.Type.TEXT); - public static final String PANEL_CONFIDENCE_DESC = "Filter genes from specific panels that match certain confidence. " + - "Accepted values : [ high, medium, low, rejected ]"; public static final QueryParam PANEL_CONFIDENCE = - QueryParam.create("panelConfidence", PANEL_CONFIDENCE_DESC, QueryParam.Type.TEXT); + QueryParam.create("panelConfidence", VariantQueryParams.PANEL_CONFIDENCE_DESC, QueryParam.Type.TEXT); - public static final String PANEL_INTERSECTION_DESC = "Intersect panel genes and regions with given " - + "genes and regions from que input query. This will prevent returning variants from regions out of the panel."; public static final QueryParam PANEL_INTERSECTION = - QueryParam.create("panelIntersection", PANEL_INTERSECTION_DESC, Type.BOOLEAN); + QueryParam.create("panelIntersection", VariantQueryParams.PANEL_INTERSECTION_DESC, Type.BOOLEAN); - public static final String PANEL_ROLE_IN_CANCER_DESC = "Filter genes from specific panels that match certain role in cancer. " + - "Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]"; public static final QueryParam PANEL_ROLE_IN_CANCER = - QueryParam.create("panelRoleInCancer", PANEL_ROLE_IN_CANCER_DESC, QueryParam.Type.TEXT); + QueryParam.create("panelRoleInCancer", VariantQueryParams.PANEL_ROLE_IN_CANCER_DESC, QueryParam.Type.TEXT); - public static final String PANEL_FEATURE_TYPE_DESC = "Filter elements from specific panels by type. " + - "Accepted values : [ gene, region, str, variant ]"; public static final QueryParam PANEL_FEATURE_TYPE = - QueryParam.create("panelFeatureType", PANEL_FEATURE_TYPE_DESC, QueryParam.Type.TEXT); + QueryParam.create("panelFeatureType", VariantQueryParams.PANEL_FEATURE_TYPE_DESC, QueryParam.Type.TEXT); public static final List VARIANT_CATALOG_QUERY_PARAMS = Arrays.asList( SAMPLE_ANNOTATION, diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/ClinicalCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/ClinicalCommandOptions.java index 853d280736d..95b192b213a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/ClinicalCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/ClinicalCommandOptions.java @@ -24,6 +24,7 @@ import static org.opencb.opencga.analysis.clinical.InterpretationAnalysis.*; import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.*; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; +import static org.opencb.opencga.core.models.variant.VariantQueryParams.*; @Parameters(commandNames = {"clinical"}, commandDescription = "Clinical analysis commands") public class ClinicalCommandOptions { diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java index aa25fe630e1..effde8ca3e3 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java @@ -52,7 +52,6 @@ import org.opencb.opencga.core.models.variant.AnnotationVariantQueryParams; import org.opencb.opencga.core.models.variant.SampleVariantFilterParams; import org.opencb.opencga.core.tools.variant.IndividualQcAnalysisExecutor; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.oskar.analysis.variant.gwas.GwasConfiguration; @@ -60,7 +59,6 @@ import java.util.List; import java.util.Map; -import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.*; import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.CohortVariantStatsCommandOptions.COHORT_VARIANT_STATS_RUN_COMMAND; import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.FamilyIndexCommandOptions.FAMILY_INDEX_COMMAND; import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.FamilyIndexCommandOptions.FAMILY_INDEX_COMMAND_DESCRIPTION; @@ -71,6 +69,7 @@ import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.VariantSecondaryIndexCommandOptions.SECONDARY_INDEX_COMMAND; import static org.opencb.opencga.app.cli.internal.options.VariantCommandOptions.VariantSecondaryIndexDeleteCommandOptions.SECONDARY_INDEX_DELETE_COMMAND; import static org.opencb.opencga.core.api.ParamConstants.*; +import static org.opencb.opencga.core.models.variant.VariantQueryParams.*; import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.AggregateCommandOptions.AGGREGATE_COMMAND; import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.AggregateCommandOptions.AGGREGATE_COMMAND_DESCRIPTION; import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.AggregateFamilyCommandOptions.AGGREGATE_FAMILY_COMMAND; @@ -86,8 +85,6 @@ import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.GenericAnnotationSaveCommandOptions.ANNOTATION_SAVE_COMMAND_DESCRIPTION; import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.VariantDeleteCommandOptions.VARIANT_DELETE_COMMAND; import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.VariantDeleteCommandOptions.VARIANT_DELETE_COMMAND_DESCRIPTION; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.ANNOT_CLINICAL_DESCR; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.ANNOT_CLINICAL_SIGNIFICANCE_DESCR; /** * Created by pfurio on 23/11/16. @@ -266,7 +263,7 @@ public class VariantSecondaryIndexCommandOptions extends GeneralCliOptions.Study @Parameter(names = {"-p", "--project"}, description = "Project to index.", arity = 1) public String project; - @Parameter(names = {"-r", "--region"}, description = VariantQueryParam.REGION_DESCR) + @Parameter(names = {"-r", "--region"}, description = REGION_DESCR) public String region; @Parameter(names = {"--sample"}, description = "Samples to index." @@ -876,25 +873,25 @@ public SampleVariantFilterParams setMaxVariants(int maxVariants) { return super.setMaxVariants(maxVariants); } - @Parameter(names = {"--id"}, description = VariantQueryParam.ID_DESCR) + @Parameter(names = {"--id"}, description = ID_DESCR) @Override public AnnotationVariantQueryParams setId(String id) { return super.setId(id); } - @Parameter(names = {"--region"}, description = VariantQueryParam.REGION_DESCR) + @Parameter(names = {"--region"}, description = REGION_DESCR) @Override public AnnotationVariantQueryParams setRegion(String region) { return super.setRegion(region); } - @Parameter(names = {"--gene"}, description = VariantQueryParam.GENE_DESCR) + @Parameter(names = {"--gene"}, description = GENE_DESCR) @Override public AnnotationVariantQueryParams setGene(String gene) { return super.setGene(gene); } - @Parameter(names = {"--type"}, description = VariantQueryParam.TYPE_DESCR) + @Parameter(names = {"--type"}, description = TYPE_DESCR) @Override public AnnotationVariantQueryParams setType(String type) { return super.setType(type); @@ -924,79 +921,79 @@ public AnnotationVariantQueryParams setPanelRoleInCancer(String panelRoleInCance return super.setPanelRoleInCancer(panelRoleInCancer); } - @Parameter(names = {"--cohort-stats-ref"}, description = VariantQueryParam.STATS_REF_DESCR) + @Parameter(names = {"--cohort-stats-ref"}, description = STATS_REF_DESCR) @Override public AnnotationVariantQueryParams setCohortStatsRef(String cohortStatsRef) { return super.setCohortStatsRef(cohortStatsRef); } - @Parameter(names = {"--cohort-stats-alt"}, description = VariantQueryParam.STATS_ALT_DESCR) + @Parameter(names = {"--cohort-stats-alt"}, description = STATS_ALT_DESCR) @Override public AnnotationVariantQueryParams setCohortStatsAlt(String cohortStatsAlt) { return super.setCohortStatsAlt(cohortStatsAlt); } - @Parameter(names = {"--cohort-stats-maf"}, description = VariantQueryParam.STATS_MAF_DESCR) + @Parameter(names = {"--cohort-stats-maf"}, description = STATS_MAF_DESCR) @Override public AnnotationVariantQueryParams setCohortStatsMaf(String cohortStatsMaf) { return super.setCohortStatsMaf(cohortStatsMaf); } - @Parameter(names = {"--ct", "--consequence-type"}, description = VariantQueryParam.ANNOT_CONSEQUENCE_TYPE_DESCR) + @Parameter(names = {"--ct", "--consequence-type"}, description = ANNOT_CONSEQUENCE_TYPE_DESCR) @Override public AnnotationVariantQueryParams setCt(String ct) { return super.setCt(ct); } - @Parameter(names = {"--xref"}, description = VariantQueryParam.ANNOT_XREF_DESCR) + @Parameter(names = {"--xref"}, description = ANNOT_XREF_DESCR) @Override public AnnotationVariantQueryParams setXref(String xref) { return super.setXref(xref); } - @Parameter(names = {"--biotype"}, description = VariantQueryParam.ANNOT_BIOTYPE_DESCR) + @Parameter(names = {"--biotype"}, description = ANNOT_BIOTYPE_DESCR) @Override public AnnotationVariantQueryParams setBiotype(String biotype) { return super.setBiotype(biotype); } - @Parameter(names = {"--protein-substitution"}, description = VariantQueryParam.ANNOT_PROTEIN_SUBSTITUTION_DESCR) + @Parameter(names = {"--protein-substitution"}, description = ANNOT_PROTEIN_SUBSTITUTION_DESCR) @Override public AnnotationVariantQueryParams setProteinSubstitution(String proteinSubstitution) { return super.setProteinSubstitution(proteinSubstitution); } - @Parameter(names = {"--conservation"}, description = VariantQueryParam.ANNOT_CONSERVATION_DESCR) + @Parameter(names = {"--conservation"}, description = ANNOT_CONSERVATION_DESCR) @Override public AnnotationVariantQueryParams setConservation(String conservation) { return super.setConservation(conservation); } - @Parameter(names = {"--population-frequency-maf"}, description = VariantQueryParam.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR) + @Parameter(names = {"--population-frequency-maf"}, description = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR) @Override public AnnotationVariantQueryParams setPopulationFrequencyMaf(String populationFrequencyMaf) { return super.setPopulationFrequencyMaf(populationFrequencyMaf); } - @Parameter(names = {"--population-frequency-alt"}, description = VariantQueryParam.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR) + @Parameter(names = {"--population-frequency-alt"}, description = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR) @Override public AnnotationVariantQueryParams setPopulationFrequencyAlt(String populationFrequencyAlt) { return super.setPopulationFrequencyAlt(populationFrequencyAlt); } - @Parameter(names = {"--population-frequency-ref"}, description = VariantQueryParam.ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR) + @Parameter(names = {"--population-frequency-ref"}, description = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR) @Override public AnnotationVariantQueryParams setPopulationFrequencyRef(String populationFrequencyRef) { return super.setPopulationFrequencyRef(populationFrequencyRef); } - @Parameter(names = {"--transcript-flag"}, description = VariantQueryParam.ANNOT_TRANSCRIPT_FLAG_DESCR) + @Parameter(names = {"--transcript-flag"}, description = ANNOT_TRANSCRIPT_FLAG_DESCR) @Override public AnnotationVariantQueryParams setTranscriptFlag(String transcriptFlag) { return super.setTranscriptFlag(transcriptFlag); } - @Parameter(names = {"--functional-score"}, description = VariantQueryParam.ANNOT_FUNCTIONAL_SCORE_DESCR) + @Parameter(names = {"--functional-score"}, description = ANNOT_FUNCTIONAL_SCORE_DESCR) @Override public AnnotationVariantQueryParams setFunctionalScore(String functionalScore) { return super.setFunctionalScore(functionalScore); @@ -1266,10 +1263,10 @@ public class KnockoutCommandOptions { + "By default filters by loss of function + missense_variant consequence types.") public String consequenceType; - @Parameter(names = {"--filter"}, description = VariantQueryParam.FILTER_DESCR) + @Parameter(names = {"--filter"}, description = FILTER_DESCR) public String filter; - @Parameter(names = {"--qual"}, description = VariantQueryParam.QUAL_DESCR) + @Parameter(names = {"--qual"}, description = QUAL_DESCR) public String qual; @Parameter(names = {"--skip-genes-file"}, description = "Do not generate the results file by gene") diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java index 84eeffe1be6..1331b4c00ad 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisClinicalCommandExecutor.java @@ -1221,6 +1221,7 @@ private RestResponse queryVariant() throws Exception { queryParams.putIfNotEmpty("panelRoleInCancer", commandOptions.panelRoleInCancer); queryParams.putIfNotEmpty("panelFeatureType", commandOptions.panelFeatureType); queryParams.putIfNotNull("panelIntersection", commandOptions.panelIntersection); + queryParams.putIfNotEmpty("source", commandOptions.source); queryParams.putIfNotEmpty("trait", commandOptions.trait); if (queryParams.get("study") == null && OpencgaMain.isShellMode()) { queryParams.putIfNotEmpty("study", sessionManager.getSession().getCurrentStudy()); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java index 3427a69163c..5b2e21cee46 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java @@ -1343,6 +1343,7 @@ private RestResponse query() throws Exception { queryParams.putIfNotEmpty("panelRoleInCancer", commandOptions.panelRoleInCancer); queryParams.putIfNotEmpty("panelFeatureType", commandOptions.panelFeatureType); queryParams.putIfNotNull("panelIntersection", commandOptions.panelIntersection); + queryParams.putIfNotEmpty("source", commandOptions.source); queryParams.putIfNotEmpty("trait", commandOptions.trait); if (queryParams.get("study") == null && OpencgaMain.isShellMode()) { queryParams.putIfNotEmpty("study", sessionManager.getSession().getCurrentStudy()); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java index 7a80b5b62dc..90870dfc8d6 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java @@ -1808,7 +1808,7 @@ public class QueryVariantCommandOptions { @Parameter(names = {"--file-data"}, description = "Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from 'file' filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP", required = false, arity = 1) public String fileData; - @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) + @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) public String sample; @Parameter(names = {"--sample-data"}, description = "Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}[,;]* . If no sample is specified, will use all samples from 'sample' or 'genotype' filter. e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10", required = false, arity = 1) @@ -1937,6 +1937,9 @@ public class QueryVariantCommandOptions { @Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean panelIntersection = false; + @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.", required = false, arity = 1) + public String source; + @Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1) public String trait; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java index 1c83e822d82..7d97404fd6c 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java @@ -484,238 +484,238 @@ public class RunExportCommandOptions { @Parameter(names = {"--job-dry-run"}, description = "Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.", required = false, arity = 1) public Boolean jobDryRun; - @Parameter(names = {"--id"}, description = "The body web service id parameter", required = false, arity = 1) + @Parameter(names = {"--id"}, description = "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T", required = false, arity = 1) public String id; - @Parameter(names = {"--region"}, description = "The body web service region parameter", required = false, arity = 1) + @Parameter(names = {"--region"}, description = "Reference allele", required = false, arity = 1) public String region; - @Parameter(names = {"--gene"}, description = "The body web service gene parameter", required = false, arity = 1) + @Parameter(names = {"--gene"}, description = "List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter", required = false, arity = 1) public String gene; - @Parameter(names = {"--type"}, description = "The body web service type parameter", required = false, arity = 1) + @Parameter(names = {"--type"}, description = "List of types, accepted values are SNV, MNV, INDEL, SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. SNV,INDEL", required = false, arity = 1) public String type; - @Parameter(names = {"--panel"}, description = "The body web service panel parameter", required = false, arity = 1) + @Parameter(names = {"--panel"}, description = "Filter by genes from the given disease panel", required = false, arity = 1) public String panel; - @Parameter(names = {"--panel-mode-of-inheritance"}, description = "The body web service panelModeOfInheritance parameter", required = false, arity = 1) + @Parameter(names = {"--panel-mode-of-inheritance"}, description = "Filter genes from specific panels that match certain mode of inheritance. Accepted values : [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous ]", required = false, arity = 1) public String panelModeOfInheritance; - @Parameter(names = {"--panel-confidence"}, description = "The body web service panelConfidence parameter", required = false, arity = 1) + @Parameter(names = {"--panel-confidence"}, description = "Filter genes from specific panels that match certain confidence. Accepted values : [ high, medium, low, rejected ]", required = false, arity = 1) public String panelConfidence; - @Parameter(names = {"--panel-role-in-cancer"}, description = "The body web service panelRoleInCancer parameter", required = false, arity = 1) + @Parameter(names = {"--panel-role-in-cancer"}, description = "Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]", required = false, arity = 1) public String panelRoleInCancer; - @Parameter(names = {"--panel-intersection"}, description = "The body web service panelIntersection parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean panelIntersection = false; - @Parameter(names = {"--panel-feature-type"}, description = "The body web service panelFeatureType parameter", required = false, arity = 1) + @Parameter(names = {"--panel-feature-type"}, description = "Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]", required = false, arity = 1) public String panelFeatureType; - @Parameter(names = {"--cohort-stats-ref"}, description = "The body web service cohortStatsRef parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-ref"}, description = "Reference Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsRef; - @Parameter(names = {"--cohort-stats-alt"}, description = "The body web service cohortStatsAlt parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-alt"}, description = "Alternate Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsAlt; - @Parameter(names = {"--cohort-stats-maf"}, description = "The body web service cohortStatsMaf parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-maf"}, description = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsMaf; - @Parameter(names = {"--ct"}, description = "The body web service ct parameter", required = false, arity = 1) + @Parameter(names = {"--ct"}, description = "List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. Accepts aliases 'loss_of_function' and 'protein_altering'", required = false, arity = 1) public String ct; - @Parameter(names = {"--xref"}, description = "The body web service xref parameter", required = false, arity = 1) + @Parameter(names = {"--xref"}, description = "List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...", required = false, arity = 1) public String xref; - @Parameter(names = {"--biotype"}, description = "The body web service biotype parameter", required = false, arity = 1) + @Parameter(names = {"--biotype"}, description = "List of biotypes, e.g. protein_coding", required = false, arity = 1) public String biotype; - @Parameter(names = {"--protein-substitution"}, description = "The body web service proteinSubstitution parameter", required = false, arity = 1) + @Parameter(names = {"--protein-substitution"}, description = "Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number} or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant", required = false, arity = 1) public String proteinSubstitution; - @Parameter(names = {"--conservation"}, description = "The body web service conservation parameter", required = false, arity = 1) + @Parameter(names = {"--conservation"}, description = "Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1", required = false, arity = 1) public String conservation; - @Parameter(names = {"--population-frequency-maf"}, description = "The body web service populationFrequencyMaf parameter", required = false, arity = 1) + @Parameter(names = {"--population-frequency-maf"}, description = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String populationFrequencyMaf; - @Parameter(names = {"--population-frequency-alt"}, description = "The body web service populationFrequencyAlt parameter", required = false, arity = 1) + @Parameter(names = {"--population-frequency-alt"}, description = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String populationFrequencyAlt; - @Parameter(names = {"--population-frequency-ref"}, description = "The body web service populationFrequencyRef parameter", required = false, arity = 1) + @Parameter(names = {"--population-frequency-ref"}, description = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String populationFrequencyRef; - @Parameter(names = {"--transcript-flag"}, description = "The body web service transcriptFlag parameter", required = false, arity = 1) + @Parameter(names = {"--transcript-flag"}, description = "List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500", required = false, arity = 1) public String transcriptFlag; - @Parameter(names = {"--functional-score"}, description = "The body web service functionalScore parameter", required = false, arity = 1) + @Parameter(names = {"--functional-score"}, description = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3", required = false, arity = 1) public String functionalScore; - @Parameter(names = {"--clinical"}, description = "The body web service clinical parameter", required = false, arity = 1) + @Parameter(names = {"--clinical"}, description = "Clinical source: clinvar, cosmic", required = false, arity = 1) public String clinical; - @Parameter(names = {"--clinical-significance"}, description = "The body web service clinicalSignificance parameter", required = false, arity = 1) + @Parameter(names = {"--clinical-significance"}, description = "Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic", required = false, arity = 1) public String clinicalSignificance; - @Parameter(names = {"--clinical-confirmed-status"}, description = "The body web service clinicalConfirmedStatus parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--clinical-confirmed-status"}, description = "Clinical confirmed status", required = false, help = true, arity = 0) public boolean clinicalConfirmedStatus = false; - @Parameter(names = {"--body_project"}, description = "The body web service project parameter", required = false, arity = 1) + @Parameter(names = {"--body_project"}, description = "Project [organization@]project where project can be either the ID or the alias", required = false, arity = 1) public String bodyProject; - @Parameter(names = {"--body_study"}, description = "The body web service study parameter", required = false, arity = 1) + @Parameter(names = {"--body_study"}, description = "Filter variants from the given studies, these can be either the numeric ID or the alias with the format organization@project:study", required = false, arity = 1) public String bodyStudy; - @Parameter(names = {"--saved-filter"}, description = "The body web service savedFilter parameter", required = false, arity = 1) + @Parameter(names = {"--saved-filter"}, description = "Use a saved filter at User level", required = false, arity = 1) public String savedFilter; - @Parameter(names = {"--chromosome"}, description = "The body web service chromosome parameter", required = false, arity = 1) + @Parameter(names = {"--chromosome"}, description = "List of chromosomes, this is an alias of 'region' parameter with just the chromosome names", required = false, arity = 1) public String chromosome; - @Parameter(names = {"--reference"}, description = "The body web service reference parameter", required = false, arity = 1) + @Parameter(names = {"--reference"}, description = "Reference allele", required = false, arity = 1) public String reference; - @Parameter(names = {"--alternate"}, description = "The body web service alternate parameter", required = false, arity = 1) + @Parameter(names = {"--alternate"}, description = "Main alternate allele", required = false, arity = 1) public String alternate; @Parameter(names = {"--release"}, description = "The body web service release parameter", required = false, arity = 1) public String release; - @Parameter(names = {"--include-study"}, description = "The body web service includeStudy parameter", required = false, arity = 1) + @Parameter(names = {"--include-study"}, description = "List of studies to include in the result. Accepts 'all' and 'none'.", required = false, arity = 1) public String includeStudy; - @Parameter(names = {"--include-sample"}, description = "The body web service includeSample parameter", required = false, arity = 1) + @Parameter(names = {"--include-sample"}, description = "List of samples to be included in the result. Accepts 'all' and 'none'. If undefined, automatically includes samples used for filtering. If none, no sample is included.", required = false, arity = 1) public String includeSample; - @Parameter(names = {"--include-file"}, description = "The body web service includeFile parameter", required = false, arity = 1) + @Parameter(names = {"--include-file"}, description = "List of files to be returned. Accepts 'all' and 'none'. If undefined, automatically includes files used for filtering. If none, no file is included.", required = false, arity = 1) public String includeFile; - @Parameter(names = {"--include-sample-data"}, description = "The body web service includeSampleData parameter", required = false, arity = 1) + @Parameter(names = {"--include-sample-data"}, description = "List of Sample Data keys (i.e. FORMAT column from VCF file) from Sample Data to include in the output. e.g: DP,AD. Accepts 'all' and 'none'.", required = false, arity = 1) public String includeSampleData; - @Parameter(names = {"--include-sample-id"}, description = "The body web service includeSampleId parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--include-sample-id"}, description = "Include sampleId on each result", required = false, help = true, arity = 0) public boolean includeSampleId = false; - @Parameter(names = {"--include-genotype"}, description = "The body web service includeGenotype parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--include-genotype"}, description = "Include genotypes, apart of other formats defined with includeFormat", required = false, help = true, arity = 0) public boolean includeGenotype = false; - @Parameter(names = {"--file"}, description = "The body web service file parameter", required = false, arity = 1) + @Parameter(names = {"--file"}, description = "Filter variants from the files specified. This will set includeFile parameter when not provided", required = false, arity = 1) public String file; - @Parameter(names = {"--qual"}, description = "The body web service qual parameter", required = false, arity = 1) + @Parameter(names = {"--qual"}, description = "Specify the QUAL for any of the files. If 'file' filter is provided, will match the file and the qual. e.g.: >123.4", required = false, arity = 1) public String qual; - @Parameter(names = {"--filter"}, description = "The body web service filter parameter", required = false, arity = 1) + @Parameter(names = {"--filter"}, description = "Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: PASS,LowGQX", required = false, arity = 1) public String filter; - @Parameter(names = {"--file-data"}, description = "The body web service fileData parameter", required = false, arity = 1) + @Parameter(names = {"--file-data"}, description = "Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from 'file' filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP", required = false, arity = 1) public String fileData; - @Parameter(names = {"--genotype"}, description = "The body web service genotype parameter", required = false, arity = 1) + @Parameter(names = {"--genotype"}, description = "Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)* e.g. HG0097:0/0;HG0098:0/1,1/1. Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided", required = false, arity = 1) public String genotype; - @Parameter(names = {"--sample"}, description = "The body web service sample parameter", required = false, arity = 1) + @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) public String sample; - @Parameter(names = {"--sample-limit"}, description = "The body web service sampleLimit parameter", required = false, arity = 1) + @Parameter(names = {"--sample-limit"}, description = "Limit the number of samples to be included in the result", required = false, arity = 1) public Integer sampleLimit; - @Parameter(names = {"--sample-skip"}, description = "The body web service sampleSkip parameter", required = false, arity = 1) + @Parameter(names = {"--sample-skip"}, description = "Skip some samples from the result. Useful for sample pagination.", required = false, arity = 1) public Integer sampleSkip; - @Parameter(names = {"--sample-data"}, description = "The body web service sampleData parameter", required = false, arity = 1) + @Parameter(names = {"--sample-data"}, description = "Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}[,;]* . If no sample is specified, will use all samples from 'sample' or 'genotype' filter. e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10", required = false, arity = 1) public String sampleData; - @Parameter(names = {"--sample-annotation"}, description = "The body web service sampleAnnotation parameter", required = false, arity = 1) + @Parameter(names = {"--sample-annotation"}, description = "Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith", required = false, arity = 1) public String sampleAnnotation; - @Parameter(names = {"--family"}, description = "The body web service family parameter", required = false, arity = 1) + @Parameter(names = {"--family"}, description = "Filter variants where any of the samples from the given family contains the variant (HET or HOM_ALT)", required = false, arity = 1) public String family; - @Parameter(names = {"--family-members"}, description = "The body web service familyMembers parameter", required = false, arity = 1) + @Parameter(names = {"--family-members"}, description = "Sub set of the members of a given family", required = false, arity = 1) public String familyMembers; - @Parameter(names = {"--family-disorder"}, description = "The body web service familyDisorder parameter", required = false, arity = 1) + @Parameter(names = {"--family-disorder"}, description = "Specify the disorder to use for the family segregation", required = false, arity = 1) public String familyDisorder; - @Parameter(names = {"--family-proband"}, description = "The body web service familyProband parameter", required = false, arity = 1) + @Parameter(names = {"--family-proband"}, description = "Specify the proband child to use for the family segregation", required = false, arity = 1) public String familyProband; - @Parameter(names = {"--family-segregation"}, description = "The body web service familySegregation parameter", required = false, arity = 1) + @Parameter(names = {"--family-segregation"}, description = "Filter by segregation mode from a given family. Accepted values: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]", required = false, arity = 1) public String familySegregation; - @Parameter(names = {"--cohort"}, description = "The body web service cohort parameter", required = false, arity = 1) + @Parameter(names = {"--cohort"}, description = "Select variants with calculated stats for the selected cohorts", required = false, arity = 1) public String cohort; - @Parameter(names = {"--cohort-stats-pass"}, description = "The body web service cohortStatsPass parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-pass"}, description = "Filter PASS frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL>0.8", required = false, arity = 1) public String cohortStatsPass; - @Parameter(names = {"--cohort-stats-mgf"}, description = "The body web service cohortStatsMgf parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-mgf"}, description = "Minor Genotype Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsMgf; - @Parameter(names = {"--missing-alleles"}, description = "The body web service missingAlleles parameter", required = false, arity = 1) + @Parameter(names = {"--missing-alleles"}, description = "Number of missing alleles: [{study:}]{cohort}[<|>|<=|>=]{number}", required = false, arity = 1) public String missingAlleles; - @Parameter(names = {"--missing-genotypes"}, description = "The body web service missingGenotypes parameter", required = false, arity = 1) + @Parameter(names = {"--missing-genotypes"}, description = "Number of missing genotypes: [{study:}]{cohort}[<|>|<=|>=]{number}", required = false, arity = 1) public String missingGenotypes; - @Parameter(names = {"--annotation-exists"}, description = "The body web service annotationExists parameter", required = false, arity = 1) + @Parameter(names = {"--annotation-exists"}, description = "Return only annotated variants", required = false, arity = 1) public Boolean annotationExists; - @Parameter(names = {"--score"}, description = "The body web service score parameter", required = false, arity = 1) + @Parameter(names = {"--score"}, description = "Filter by variant score: [{study:}]{score}[<|>|<=|>=]{number}", required = false, arity = 1) public String score; - @Parameter(names = {"--polyphen"}, description = "The body web service polyphen parameter", required = false, arity = 1) + @Parameter(names = {"--polyphen"}, description = "Polyphen, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. <=0.9 , =benign", required = false, arity = 1) public String polyphen; - @Parameter(names = {"--sift"}, description = "The body web service sift parameter", required = false, arity = 1) + @Parameter(names = {"--sift"}, description = "Sift, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. >0.1 , ~=tolerant", required = false, arity = 1) public String sift; @Parameter(names = {"--gene-role-in-cancer"}, description = "The body web service geneRoleInCancer parameter", required = false, arity = 1) public String geneRoleInCancer; - @Parameter(names = {"--gene-trait-id"}, description = "The body web service geneTraitId parameter", required = false, arity = 1) + @Parameter(names = {"--gene-trait-id"}, description = "List of gene trait association id. e.g. 'umls:C0007222' , 'OMIM:269600'", required = false, arity = 1) public String geneTraitId; - @Parameter(names = {"--gene-trait-name"}, description = "The body web service geneTraitName parameter", required = false, arity = 1) + @Parameter(names = {"--gene-trait-name"}, description = "List of gene trait association names. e.g. Cardiovascular Diseases", required = false, arity = 1) public String geneTraitName; - @Parameter(names = {"--trait"}, description = "The body web service trait parameter", required = false, arity = 1) + @Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1) public String trait; - @Parameter(names = {"--cosmic"}, description = "The body web service cosmic parameter", required = false, arity = 1) + @Parameter(names = {"--cosmic"}, description = "List of COSMIC mutation IDs.", required = false, arity = 1) public String cosmic; - @Parameter(names = {"--clinvar"}, description = "The body web service clinvar parameter", required = false, arity = 1) + @Parameter(names = {"--clinvar"}, description = "Clinical source: clinvar, cosmic", required = false, arity = 1) public String clinvar; - @Parameter(names = {"--hpo"}, description = "The body web service hpo parameter", required = false, arity = 1) + @Parameter(names = {"--hpo"}, description = "List of HPO terms. e.g. 'HP:0000545,HP:0002812'", required = false, arity = 1) public String hpo; - @Parameter(names = {"--go"}, description = "The body web service go parameter", required = false, arity = 1) + @Parameter(names = {"--go"}, description = "List of GO (Gene Ontology) terms. e.g. 'GO:0002020'", required = false, arity = 1) public String go; - @Parameter(names = {"--expression"}, description = "The body web service expression parameter", required = false, arity = 1) + @Parameter(names = {"--expression"}, description = "List of tissues of interest. e.g. 'lung'", required = false, arity = 1) public String expression; - @Parameter(names = {"--protein-keyword"}, description = "The body web service proteinKeyword parameter", required = false, arity = 1) + @Parameter(names = {"--protein-keyword"}, description = "List of Uniprot protein variant annotation keywords", required = false, arity = 1) public String proteinKeyword; - @Parameter(names = {"--drug"}, description = "The body web service drug parameter", required = false, arity = 1) + @Parameter(names = {"--drug"}, description = "List of drug names", required = false, arity = 1) public String drug; - @Parameter(names = {"--custom-annotation"}, description = "The body web service customAnnotation parameter", required = false, arity = 1) + @Parameter(names = {"--custom-annotation"}, description = "Custom annotation: {key}[<|>|<=|>=]{number} or {key}[~=|=]{text}", required = false, arity = 1) public String customAnnotation; - @Parameter(names = {"--unknown-genotype"}, description = "The body web service unknownGenotype parameter", required = false, arity = 1) + @Parameter(names = {"--unknown-genotype"}, description = "Returned genotype for unknown genotypes. Common values: [0/0, 0|0, ./.]", required = false, arity = 1) public String unknownGenotype; - @Parameter(names = {"--sample-metadata"}, description = "The body web service sampleMetadata parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--sample-metadata"}, description = "Return the samples metadata group by study. Sample names will appear in the same order as their corresponding genotypes.", required = false, help = true, arity = 0) public boolean sampleMetadata = false; - @Parameter(names = {"--sort"}, description = "The body web service sort parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--sort"}, description = "Sort the results by chromosome, start, end and alternate allele", required = false, help = true, arity = 0) public boolean sort = false; @Parameter(names = {"--outdir"}, description = "The body web service outdir parameter", required = false, arity = 1) @@ -1510,7 +1510,7 @@ public class MetadataCommandOptions { @Parameter(names = {"--file"}, description = "Filter variants from the files specified. This will set includeFile parameter when not provided", required = false, arity = 1) public String file; - @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) + @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) public String sample; @Parameter(names = {"--include-study"}, description = "List of studies to include in the result. Accepts 'all' and 'none'.", required = false, arity = 1) @@ -1791,7 +1791,7 @@ public class QueryCommandOptions { @Parameter(names = {"--file-data"}, description = "Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from 'file' filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP", required = false, arity = 1) public String fileData; - @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) + @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) public String sample; @Parameter(names = {"--genotype"}, description = "Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)* e.g. HG0097:0/0;HG0098:0/1,1/1. Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided", required = false, arity = 1) @@ -1956,6 +1956,9 @@ public class QueryCommandOptions { @Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean panelIntersection = false; + @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.", required = false, arity = 1) + public String source; + @Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1) public String trait; @@ -2088,7 +2091,7 @@ public class AggregationStatsSampleCommandOptions { @Parameter(names = {"--filter"}, description = "Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: PASS,LowGQX", required = false, arity = 1) public String filter; - @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) + @Parameter(names = {"--sample"}, description = "Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. ", required = false, arity = 1) public String sample; @Parameter(names = {"--genotype"}, description = "Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)* e.g. HG0097:0/0;HG0098:0/1,1/1. Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided", required = false, arity = 1) @@ -2227,82 +2230,82 @@ public class RunSampleQcCommandOptions { @Parameter(names = {"--vs-description"}, description = "Variant stats description.", required = false, arity = 1) public String vsDescription; - @Parameter(names = {"--vs-query-id"}, description = "The body web service id parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-id"}, description = "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T", required = false, arity = 1) public String vsQueryId; - @Parameter(names = {"--vs-query-region"}, description = "The body web service region parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-region"}, description = "Reference allele", required = false, arity = 1) public String vsQueryRegion; - @Parameter(names = {"--vs-query-gene"}, description = "The body web service gene parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-gene"}, description = "List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter", required = false, arity = 1) public String vsQueryGene; - @Parameter(names = {"--vs-query-type"}, description = "The body web service type parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-type"}, description = "List of types, accepted values are SNV, MNV, INDEL, SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. SNV,INDEL", required = false, arity = 1) public String vsQueryType; - @Parameter(names = {"--vs-query-panel"}, description = "The body web service panel parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-panel"}, description = "Filter by genes from the given disease panel", required = false, arity = 1) public String vsQueryPanel; - @Parameter(names = {"--vs-query-panel-mode-of-inheritance"}, description = "The body web service panelModeOfInheritance parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-panel-mode-of-inheritance"}, description = "Filter genes from specific panels that match certain mode of inheritance. Accepted values : [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous ]", required = false, arity = 1) public String vsQueryPanelModeOfInheritance; - @Parameter(names = {"--vs-query-panel-confidence"}, description = "The body web service panelConfidence parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-panel-confidence"}, description = "Filter genes from specific panels that match certain confidence. Accepted values : [ high, medium, low, rejected ]", required = false, arity = 1) public String vsQueryPanelConfidence; - @Parameter(names = {"--vs-query-panel-role-in-cancer"}, description = "The body web service panelRoleInCancer parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-panel-role-in-cancer"}, description = "Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]", required = false, arity = 1) public String vsQueryPanelRoleInCancer; - @Parameter(names = {"--vs-query-panel-intersection"}, description = "The body web service panelIntersection parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--vs-query-panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean vsQueryPanelIntersection = false; - @Parameter(names = {"--vs-query-panel-feature-type"}, description = "The body web service panelFeatureType parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-panel-feature-type"}, description = "Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]", required = false, arity = 1) public String vsQueryPanelFeatureType; - @Parameter(names = {"--vs-query-cohort-stats-ref"}, description = "The body web service cohortStatsRef parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-cohort-stats-ref"}, description = "Reference Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String vsQueryCohortStatsRef; - @Parameter(names = {"--vs-query-cohort-stats-alt"}, description = "The body web service cohortStatsAlt parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-cohort-stats-alt"}, description = "Alternate Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String vsQueryCohortStatsAlt; - @Parameter(names = {"--vs-query-cohort-stats-maf"}, description = "The body web service cohortStatsMaf parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-cohort-stats-maf"}, description = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String vsQueryCohortStatsMaf; - @Parameter(names = {"--vs-query-ct"}, description = "The body web service ct parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-ct"}, description = "List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. Accepts aliases 'loss_of_function' and 'protein_altering'", required = false, arity = 1) public String vsQueryCt; - @Parameter(names = {"--vs-query-xref"}, description = "The body web service xref parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-xref"}, description = "List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...", required = false, arity = 1) public String vsQueryXref; - @Parameter(names = {"--vs-query-biotype"}, description = "The body web service biotype parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-biotype"}, description = "List of biotypes, e.g. protein_coding", required = false, arity = 1) public String vsQueryBiotype; - @Parameter(names = {"--vs-query-protein-substitution"}, description = "The body web service proteinSubstitution parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-protein-substitution"}, description = "Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number} or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant", required = false, arity = 1) public String vsQueryProteinSubstitution; - @Parameter(names = {"--vs-query-conservation"}, description = "The body web service conservation parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-conservation"}, description = "Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1", required = false, arity = 1) public String vsQueryConservation; - @Parameter(names = {"--vs-query-population-frequency-maf"}, description = "The body web service populationFrequencyMaf parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-population-frequency-maf"}, description = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String vsQueryPopulationFrequencyMaf; - @Parameter(names = {"--vs-query-population-frequency-alt"}, description = "The body web service populationFrequencyAlt parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-population-frequency-alt"}, description = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String vsQueryPopulationFrequencyAlt; - @Parameter(names = {"--vs-query-population-frequency-ref"}, description = "The body web service populationFrequencyRef parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-population-frequency-ref"}, description = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String vsQueryPopulationFrequencyRef; - @Parameter(names = {"--vs-query-transcript-flag"}, description = "The body web service transcriptFlag parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-transcript-flag"}, description = "List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500", required = false, arity = 1) public String vsQueryTranscriptFlag; - @Parameter(names = {"--vs-query-functional-score"}, description = "The body web service functionalScore parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-functional-score"}, description = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3", required = false, arity = 1) public String vsQueryFunctionalScore; - @Parameter(names = {"--vs-query-clinical"}, description = "The body web service clinical parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-clinical"}, description = "Clinical source: clinvar, cosmic", required = false, arity = 1) public String vsQueryClinical; - @Parameter(names = {"--vs-query-clinical-significance"}, description = "The body web service clinicalSignificance parameter", required = false, arity = 1) + @Parameter(names = {"--vs-query-clinical-significance"}, description = "Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic", required = false, arity = 1) public String vsQueryClinicalSignificance; - @Parameter(names = {"--vs-query-clinical-confirmed-status"}, description = "The body web service clinicalConfirmedStatus parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--vs-query-clinical-confirmed-status"}, description = "Clinical confirmed status", required = false, help = true, arity = 0) public boolean vsQueryClinicalConfirmedStatus = false; @Parameter(names = {"--ms-id"}, description = "Signature ID.", required = false, arity = 1) @@ -2420,82 +2423,82 @@ public class RunSampleCommandOptions { @Parameter(names = {"--job-dry-run"}, description = "Flag indicating that the job will be executed in dry-run mode. In this mode, OpenCGA will validate that all parameters and prerequisites are correctly set for successful execution, but the job will not actually run.", required = false, arity = 1) public Boolean jobDryRun; - @Parameter(names = {"--id"}, description = "The body web service id parameter", required = false, arity = 1) + @Parameter(names = {"--id"}, description = "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T", required = false, arity = 1) public String id; - @Parameter(names = {"--region"}, description = "The body web service region parameter", required = false, arity = 1) + @Parameter(names = {"--region"}, description = "Reference allele", required = false, arity = 1) public String region; - @Parameter(names = {"--gene"}, description = "The body web service gene parameter", required = false, arity = 1) + @Parameter(names = {"--gene"}, description = "List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter", required = false, arity = 1) public String gene; - @Parameter(names = {"--type"}, description = "The body web service type parameter", required = false, arity = 1) + @Parameter(names = {"--type"}, description = "List of types, accepted values are SNV, MNV, INDEL, SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. SNV,INDEL", required = false, arity = 1) public String type; - @Parameter(names = {"--panel"}, description = "The body web service panel parameter", required = false, arity = 1) + @Parameter(names = {"--panel"}, description = "Filter by genes from the given disease panel", required = false, arity = 1) public String panel; - @Parameter(names = {"--panel-mode-of-inheritance"}, description = "The body web service panelModeOfInheritance parameter", required = false, arity = 1) + @Parameter(names = {"--panel-mode-of-inheritance"}, description = "Filter genes from specific panels that match certain mode of inheritance. Accepted values : [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous ]", required = false, arity = 1) public String panelModeOfInheritance; - @Parameter(names = {"--panel-confidence"}, description = "The body web service panelConfidence parameter", required = false, arity = 1) + @Parameter(names = {"--panel-confidence"}, description = "Filter genes from specific panels that match certain confidence. Accepted values : [ high, medium, low, rejected ]", required = false, arity = 1) public String panelConfidence; - @Parameter(names = {"--panel-role-in-cancer"}, description = "The body web service panelRoleInCancer parameter", required = false, arity = 1) + @Parameter(names = {"--panel-role-in-cancer"}, description = "Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]", required = false, arity = 1) public String panelRoleInCancer; - @Parameter(names = {"--panel-intersection"}, description = "The body web service panelIntersection parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean panelIntersection = false; - @Parameter(names = {"--panel-feature-type"}, description = "The body web service panelFeatureType parameter", required = false, arity = 1) + @Parameter(names = {"--panel-feature-type"}, description = "Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]", required = false, arity = 1) public String panelFeatureType; - @Parameter(names = {"--cohort-stats-ref"}, description = "The body web service cohortStatsRef parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-ref"}, description = "Reference Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsRef; - @Parameter(names = {"--cohort-stats-alt"}, description = "The body web service cohortStatsAlt parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-alt"}, description = "Alternate Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsAlt; - @Parameter(names = {"--cohort-stats-maf"}, description = "The body web service cohortStatsMaf parameter", required = false, arity = 1) + @Parameter(names = {"--cohort-stats-maf"}, description = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String cohortStatsMaf; - @Parameter(names = {"--ct"}, description = "The body web service ct parameter", required = false, arity = 1) + @Parameter(names = {"--ct"}, description = "List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. Accepts aliases 'loss_of_function' and 'protein_altering'", required = false, arity = 1) public String ct; - @Parameter(names = {"--xref"}, description = "The body web service xref parameter", required = false, arity = 1) + @Parameter(names = {"--xref"}, description = "List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...", required = false, arity = 1) public String xref; - @Parameter(names = {"--biotype"}, description = "The body web service biotype parameter", required = false, arity = 1) + @Parameter(names = {"--biotype"}, description = "List of biotypes, e.g. protein_coding", required = false, arity = 1) public String biotype; - @Parameter(names = {"--protein-substitution"}, description = "The body web service proteinSubstitution parameter", required = false, arity = 1) + @Parameter(names = {"--protein-substitution"}, description = "Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number} or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant", required = false, arity = 1) public String proteinSubstitution; - @Parameter(names = {"--conservation"}, description = "The body web service conservation parameter", required = false, arity = 1) + @Parameter(names = {"--conservation"}, description = "Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1", required = false, arity = 1) public String conservation; - @Parameter(names = {"--population-frequency-maf"}, description = "The body web service populationFrequencyMaf parameter", required = false, arity = 1) + @Parameter(names = {"--population-frequency-maf"}, description = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String populationFrequencyMaf; - @Parameter(names = {"--population-frequency-alt"}, description = "The body web service populationFrequencyAlt parameter", required = false, arity = 1) + @Parameter(names = {"--population-frequency-alt"}, description = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String populationFrequencyAlt; - @Parameter(names = {"--population-frequency-ref"}, description = "The body web service populationFrequencyRef parameter", required = false, arity = 1) + @Parameter(names = {"--population-frequency-ref"}, description = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String populationFrequencyRef; - @Parameter(names = {"--transcript-flag"}, description = "The body web service transcriptFlag parameter", required = false, arity = 1) + @Parameter(names = {"--transcript-flag"}, description = "List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500", required = false, arity = 1) public String transcriptFlag; - @Parameter(names = {"--functional-score"}, description = "The body web service functionalScore parameter", required = false, arity = 1) + @Parameter(names = {"--functional-score"}, description = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3", required = false, arity = 1) public String functionalScore; - @Parameter(names = {"--clinical"}, description = "The body web service clinical parameter", required = false, arity = 1) + @Parameter(names = {"--clinical"}, description = "Clinical source: clinvar, cosmic", required = false, arity = 1) public String clinical; - @Parameter(names = {"--clinical-significance"}, description = "The body web service clinicalSignificance parameter", required = false, arity = 1) + @Parameter(names = {"--clinical-significance"}, description = "Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic", required = false, arity = 1) public String clinicalSignificance; - @Parameter(names = {"--clinical-confirmed-status"}, description = "The body web service clinicalConfirmedStatus parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--clinical-confirmed-status"}, description = "Clinical confirmed status", required = false, help = true, arity = 0) public boolean clinicalConfirmedStatus = false; @Parameter(names = {"--genotypes"}, description = "The body web service genotypes parameter", required = false, arity = 1) @@ -2607,82 +2610,82 @@ public class RunSampleStatsCommandOptions { @Parameter(names = {"--individual"}, description = "The body web service individual parameter", required = false, arity = 1) public String individual; - @Parameter(names = {"--variant-query-id"}, description = "The body web service id parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-id"}, description = "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T", required = false, arity = 1) public String variantQueryId; - @Parameter(names = {"--variant-query-region"}, description = "The body web service region parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-region"}, description = "Reference allele", required = false, arity = 1) public String variantQueryRegion; - @Parameter(names = {"--variant-query-gene"}, description = "The body web service gene parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-gene"}, description = "List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter", required = false, arity = 1) public String variantQueryGene; - @Parameter(names = {"--variant-query-type"}, description = "The body web service type parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-type"}, description = "List of types, accepted values are SNV, MNV, INDEL, SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. SNV,INDEL", required = false, arity = 1) public String variantQueryType; - @Parameter(names = {"--variant-query-panel"}, description = "The body web service panel parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-panel"}, description = "Filter by genes from the given disease panel", required = false, arity = 1) public String variantQueryPanel; - @Parameter(names = {"--variant-query-panel-mode-of-inheritance"}, description = "The body web service panelModeOfInheritance parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-panel-mode-of-inheritance"}, description = "Filter genes from specific panels that match certain mode of inheritance. Accepted values : [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, mendelianError, compoundHeterozygous ]", required = false, arity = 1) public String variantQueryPanelModeOfInheritance; - @Parameter(names = {"--variant-query-panel-confidence"}, description = "The body web service panelConfidence parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-panel-confidence"}, description = "Filter genes from specific panels that match certain confidence. Accepted values : [ high, medium, low, rejected ]", required = false, arity = 1) public String variantQueryPanelConfidence; - @Parameter(names = {"--variant-query-panel-role-in-cancer"}, description = "The body web service panelRoleInCancer parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-panel-role-in-cancer"}, description = "Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]", required = false, arity = 1) public String variantQueryPanelRoleInCancer; - @Parameter(names = {"--variant-query-panel-intersection"}, description = "The body web service panelIntersection parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--variant-query-panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean variantQueryPanelIntersection = false; - @Parameter(names = {"--variant-query-panel-feature-type"}, description = "The body web service panelFeatureType parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-panel-feature-type"}, description = "Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]", required = false, arity = 1) public String variantQueryPanelFeatureType; - @Parameter(names = {"--variant-query-cohort-stats-ref"}, description = "The body web service cohortStatsRef parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-cohort-stats-ref"}, description = "Reference Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String variantQueryCohortStatsRef; - @Parameter(names = {"--variant-query-cohort-stats-alt"}, description = "The body web service cohortStatsAlt parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-cohort-stats-alt"}, description = "Alternate Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String variantQueryCohortStatsAlt; - @Parameter(names = {"--variant-query-cohort-stats-maf"}, description = "The body web service cohortStatsMaf parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-cohort-stats-maf"}, description = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4", required = false, arity = 1) public String variantQueryCohortStatsMaf; - @Parameter(names = {"--variant-query-ct"}, description = "The body web service ct parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-ct"}, description = "List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. Accepts aliases 'loss_of_function' and 'protein_altering'", required = false, arity = 1) public String variantQueryCt; - @Parameter(names = {"--variant-query-xref"}, description = "The body web service xref parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-xref"}, description = "List of any external reference, these can be genes, proteins or variants. Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...", required = false, arity = 1) public String variantQueryXref; - @Parameter(names = {"--variant-query-biotype"}, description = "The body web service biotype parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-biotype"}, description = "List of biotypes, e.g. protein_coding", required = false, arity = 1) public String variantQueryBiotype; - @Parameter(names = {"--variant-query-protein-substitution"}, description = "The body web service proteinSubstitution parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-protein-substitution"}, description = "Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number} or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant", required = false, arity = 1) public String variantQueryProteinSubstitution; - @Parameter(names = {"--variant-query-conservation"}, description = "The body web service conservation parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-conservation"}, description = "Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1", required = false, arity = 1) public String variantQueryConservation; - @Parameter(names = {"--variant-query-population-frequency-maf"}, description = "The body web service populationFrequencyMaf parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-population-frequency-maf"}, description = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String variantQueryPopulationFrequencyMaf; - @Parameter(names = {"--variant-query-population-frequency-alt"}, description = "The body web service populationFrequencyAlt parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-population-frequency-alt"}, description = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String variantQueryPopulationFrequencyAlt; - @Parameter(names = {"--variant-query-population-frequency-ref"}, description = "The body web service populationFrequencyRef parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-population-frequency-ref"}, description = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. 1000G:ALL<0.01", required = false, arity = 1) public String variantQueryPopulationFrequencyRef; - @Parameter(names = {"--variant-query-transcript-flag"}, description = "The body web service transcriptFlag parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-transcript-flag"}, description = "List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500", required = false, arity = 1) public String variantQueryTranscriptFlag; - @Parameter(names = {"--variant-query-functional-score"}, description = "The body web service functionalScore parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-functional-score"}, description = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3", required = false, arity = 1) public String variantQueryFunctionalScore; - @Parameter(names = {"--variant-query-clinical"}, description = "The body web service clinical parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-clinical"}, description = "Clinical source: clinvar, cosmic", required = false, arity = 1) public String variantQueryClinical; - @Parameter(names = {"--variant-query-clinical-significance"}, description = "The body web service clinicalSignificance parameter", required = false, arity = 1) + @Parameter(names = {"--variant-query-clinical-significance"}, description = "Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic", required = false, arity = 1) public String variantQueryClinicalSignificance; - @Parameter(names = {"--variant-query-clinical-confirmed-status"}, description = "The body web service clinicalConfirmedStatus parameter", required = false, help = true, arity = 0) + @Parameter(names = {"--variant-query-clinical-confirmed-status"}, description = "Clinical confirmed status", required = false, help = true, arity = 0) public boolean variantQueryClinicalConfirmedStatus = false; @Parameter(names = {"--variant-query-sample-data"}, description = "The body web service sampleData parameter", required = false, arity = 1) diff --git a/opencga-client/src/main/R/R/Clinical-methods.R b/opencga-client/src/main/R/R/Clinical-methods.R index 6fb3c4dfffa..7860e7ee0f5 100644 --- a/opencga-client/src/main/R/R/Clinical-methods.R +++ b/opencga-client/src/main/R/R/Clinical-methods.R @@ -43,6 +43,8 @@ #' | summaryRgaVariant | /{apiVersion}/analysis/clinical/rga/variant/summary | limit, skip, count, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study | #' | search | /{apiVersion}/analysis/clinical/search | include, exclude, limit, skip, count, flattenAnnotations, study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, dueDate, qualityControlSummary, release, status, internalStatus, annotation, deleted | #' | queryVariant | /{apiVersion}/analysis/clinical/variant/query | include, exclude, limit, skip, count, approximateCount, approximateCountSamplingSize, savedFilter, includeInterpretation, id, region, type, study, file, filter, qual, fileData, sample, sampleData, sampleAnnotation, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, trait | +#' | search | /{apiVersion}/analysis/clinical/search | include, exclude, limit, skip, count, flattenAnnotations, study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, dueDate, qualityControlSummary, release, snapshot, status, internalStatus, annotation, deleted | +#' | queryVariant | /{apiVersion}/analysis/clinical/variant/query | include, exclude, limit, skip, count, approximateCount, approximateCountSamplingSize, savedFilter, includeInterpretation, id, region, type, study, file, filter, qual, fileData, sample, sampleData, sampleAnnotation, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, source, trait | #' | acl | /{apiVersion}/analysis/clinical/{clinicalAnalyses}/acl | clinicalAnalyses[*], study, member, silent | #' | delete | /{apiVersion}/analysis/clinical/{clinicalAnalyses}/delete | study, force, clinicalAnalyses[*] | #' | update | /{apiVersion}/analysis/clinical/{clinicalAnalyses}/update | include, exclude, clinicalAnalyses[*], study, commentsAction, flagsAction, analystsAction, filesAction, panelsAction, annotationSetsAction, includeResult, body[*] | @@ -581,7 +583,7 @@ setMethod("clinicalClient", "OpencgaR", function(OpencgaR, annotationSet, clinic #' @param filter Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: PASS,LowGQX. #' @param qual Specify the QUAL for any of the files. If 'file' filter is provided, will match the file and the qual. e.g.: >123.4. #' @param fileData Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from "file" filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. - #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . + #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . #' @param sampleData Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}[,;]* . If no sample is specified, will use all samples from "sample" or "genotype" filter. e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10. #' @param sampleAnnotation Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith. #' @param cohort Select variants with calculated stats for the selected cohorts. @@ -624,6 +626,7 @@ setMethod("clinicalClient", "OpencgaR", function(OpencgaR, annotationSet, clinic #' @param panelRoleInCancer Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]. #' @param panelFeatureType Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. #' @param panelIntersection Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel. + #' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. #' @param trait List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... queryVariant=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="clinical/variant", subcategoryId=NULL, action="query", params=params, httpMethod="GET", diff --git a/opencga-client/src/main/R/R/Variant-methods.R b/opencga-client/src/main/R/R/Variant-methods.R index 5413a7604b5..0686020512d 100644 --- a/opencga-client/src/main/R/R/Variant-methods.R +++ b/opencga-client/src/main/R/R/Variant-methods.R @@ -46,7 +46,7 @@ #' | queryMutationalSignature | /{apiVersion}/analysis/variant/mutationalSignature/query | study, sample, type, ct, biotype, fileData, filter, qual, region, gene, panel, panelModeOfInheritance, panelConfidence, panelFeatureType, panelRoleInCancer, panelIntersection, msId, msDescription | #' | runMutationalSignature | /{apiVersion}/analysis/variant/mutationalSignature/run | study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | #' | runPlink | /{apiVersion}/analysis/variant/plink/run | study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | -#' | query | /{apiVersion}/analysis/variant/query | include, exclude, limit, skip, count, sort, summary, approximateCount, approximateCountSamplingSize, savedFilter, id, region, type, reference, alternate, project, study, file, filter, qual, fileData, sample, genotype, sampleData, sampleAnnotation, sampleMetadata, unknownGenotype, sampleLimit, sampleSkip, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, includeStudy, includeFile, includeSample, includeSampleData, includeGenotype, includeSampleId, annotationExists, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, trait | +#' | query | /{apiVersion}/analysis/variant/query | include, exclude, limit, skip, count, sort, summary, approximateCount, approximateCountSamplingSize, savedFilter, id, region, type, reference, alternate, project, study, file, filter, qual, fileData, sample, genotype, sampleData, sampleAnnotation, sampleMetadata, unknownGenotype, sampleLimit, sampleSkip, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, includeStudy, includeFile, includeSample, includeSampleData, includeGenotype, includeSampleId, annotationExists, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, source, trait | #' | runRelatedness | /{apiVersion}/analysis/variant/relatedness/run | study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | #' | runRvtests | /{apiVersion}/analysis/variant/rvtests/run | study, jobId, jobDescription, jobDependsOn, jobTags, jobScheduledStartTime, jobPriority, jobDryRun, body[*] | #' | aggregationStatsSample | /{apiVersion}/analysis/variant/sample/aggregationStats | savedFilter, region, type, project, study, file, filter, sample, genotype, sampleAnnotation, family, familyDisorder, familySegregation, familyMembers, familyProband, ct, biotype, populationFrequencyAlt, clinical, clinicalSignificance, clinicalConfirmedStatus, field | @@ -398,7 +398,7 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N #' @param project Project [organization@]project where project can be either the ID or the alias. #' @param study Filter variants from the given studies, these can be either the numeric ID or the alias with the format organization@project:study. #' @param file Filter variants from the files specified. This will set includeFile parameter when not provided. - #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . + #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . #' @param includeStudy List of studies to include in the result. Accepts 'all' and 'none'. #' @param includeFile List of files to be returned. Accepts 'all' and 'none'. If undefined, automatically includes files used for filtering. If none, no file is included. #' @param includeSample List of samples to be included in the result. Accepts 'all' and 'none'. If undefined, automatically includes samples used for filtering. If none, no sample is included. @@ -483,7 +483,7 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N #' @param filter Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: PASS,LowGQX. #' @param qual Specify the QUAL for any of the files. If 'file' filter is provided, will match the file and the qual. e.g.: >123.4. #' @param fileData Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from "file" filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. - #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . + #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . #' @param genotype Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)* e.g. HG0097:0/0;HG0098:0/1,1/1. Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided. #' @param sampleData Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}[,;]* . If no sample is specified, will use all samples from "sample" or "genotype" filter. e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10. #' @param sampleAnnotation Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith. @@ -538,6 +538,7 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N #' @param panelRoleInCancer Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]. #' @param panelFeatureType Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. #' @param panelIntersection Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel. + #' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. #' @param trait List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... query=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="variant", subcategoryId=NULL, action="query", params=params, httpMethod="GET", as.queryParam=NULL, ...), @@ -580,7 +581,7 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N #' @param study Filter variants from the given studies, these can be either the numeric ID or the alias with the format organization@project:study. #' @param file Filter variants from the files specified. This will set includeFile parameter when not provided. #' @param filter Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: PASS,LowGQX. - #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . + #' @param sample Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive. e.g. HG0097:DeNovo Sample must have parents defined and indexed. . #' @param genotype Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)* e.g. HG0097:0/0;HG0098:0/1,1/1. Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided. #' @param sampleAnnotation Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith. #' @param family Filter variants where any of the samples from the given family contains the variant (HET or HOM_ALT). diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java index 869c987e791..501c2c52407 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java @@ -758,8 +758,8 @@ public RestResponse search(ObjectMap params) throws ClientExce * is specified, will use all files from "file" filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can * be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. * sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This - * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) + * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' * operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. * 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, * regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, @@ -822,6 +822,10 @@ public RestResponse search(ObjectMap params) throws ClientExce * panelFeatureType: Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. * panelIntersection: Intersect panel genes and regions with given genes and regions from que input query. This will prevent * returning variants from regions out of the panel. + * source: Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), + * 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved + * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing + * or be partial. * trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @return a RestResponse object. * @throws ClientException ClientException if there is any server error. diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java index 6a68ae8ea82..b78d73dda9b 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java @@ -579,8 +579,8 @@ public RestResponse runMendelianError(MendelianErrorAnalysisParams data, Ob * organization@project:study. * file: Filter variants from the files specified. This will set includeFile parameter when not provided. * sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This - * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) + * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' * operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. * 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, * regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, @@ -720,8 +720,8 @@ public RestResponse runPlink(PlinkWrapperParams data, ObjectMap params) thr * is specified, will use all files from "file" filter. e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can * be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. * sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This - * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) + * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' * operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. * 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, * regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, @@ -805,6 +805,10 @@ public RestResponse runPlink(PlinkWrapperParams data, ObjectMap params) thr * panelFeatureType: Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. * panelIntersection: Intersect panel genes and regions with given genes and regions from que input query. This will prevent * returning variants from regions out of the panel. + * source: Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), + * 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved + * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing + * or be partial. * trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @return a RestResponse object. * @throws ClientException ClientException if there is any server error. @@ -873,8 +877,8 @@ public RestResponse runRvtests(RvtestsWrapperParams data, ObjectMap params) * filter: Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: * PASS,LowGQX. * sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This - * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) + * filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' * operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. * 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary allele will match, * regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, diff --git a/opencga-client/src/main/javascript/ClinicalAnalysis.js b/opencga-client/src/main/javascript/ClinicalAnalysis.js index f2e4031455c..d1b7228f7ed 100644 --- a/opencga-client/src/main/javascript/ClinicalAnalysis.js +++ b/opencga-client/src/main/javascript/ClinicalAnalysis.js @@ -646,8 +646,8 @@ export default class ClinicalAnalysis extends OpenCGAParentClass { * [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from "file" filter. e.g. AN>200 or * file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. * @param {String} [params.sample] - Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not - * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. * HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice * versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with * genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. @@ -719,6 +719,9 @@ export default class ClinicalAnalysis extends OpenCGAParentClass { * variant ]. * @param {Boolean} [params.panelIntersection] - Intersect panel genes and regions with given genes and regions from que input query. * This will prevent returning variants from regions out of the panel. + * @param {String} [params.source] - Select the variant data source from where to fetch the data. Accepted values are 'variant_index' + * (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved + * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. * @param {String} [params.trait] - List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @returns {Promise} Promise object in the form of RestResponse instance. */ diff --git a/opencga-client/src/main/javascript/Variant.js b/opencga-client/src/main/javascript/Variant.js index 7d8b01966da..ae291e7a9e5 100644 --- a/opencga-client/src/main/javascript/Variant.js +++ b/opencga-client/src/main/javascript/Variant.js @@ -470,8 +470,8 @@ export default class Variant extends OpenCGAParentClass { * format organization@project:study. * @param {String} [params.file] - Filter variants from the files specified. This will set includeFile parameter when not provided. * @param {String} [params.sample] - Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not - * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. * HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice * versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with * genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. @@ -603,8 +603,8 @@ export default class Variant extends OpenCGAParentClass { * [{file}:]{key}{op}{value}[,;]* . If no file is specified, will use all files from "file" filter. e.g. AN>200 or * file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. * @param {String} [params.sample] - Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not - * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. * HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice * versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with * genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. @@ -696,6 +696,9 @@ export default class Variant extends OpenCGAParentClass { * variant ]. * @param {Boolean} [params.panelIntersection] - Intersect panel genes and regions with given genes and regions from que input query. * This will prevent returning variants from regions out of the panel. + * @param {String} [params.source] - Select the variant data source from where to fetch the data. Accepted values are 'variant_index' + * (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved + * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. * @param {String} [params.trait] - List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @returns {Promise} Promise object in the form of RestResponse instance. */ @@ -755,8 +758,8 @@ export default class Variant extends OpenCGAParentClass { * @param {String} [params.filter] - Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the * filter. e.g.: PASS,LowGQX. * @param {String} [params.sample] - Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not - * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND (;) and OR (,) - * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + * provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that contain the main variant. Accepts AND ';' and OR ',' + * operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. * HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice * versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with * genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py index 6ccab3a988a..976de7948d5 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py @@ -806,9 +806,9 @@ def query_variant(self, **options): :param str sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that - contain the main variant. Accepts AND (;) and OR (,) operators. + contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: - {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary @@ -918,6 +918,12 @@ def query_variant(self, **options): :param bool panel_intersection: Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel. + :param str source: Select the variant data source from where to fetch + the data. Accepted values are 'variant_index' (default), + 'secondary_annotation_index' and 'secondary_sample_index'. When + selecting a secondary_index, the data will be retrieved exclusively + from that secondary index, and the 'include/exclude' parameters + will be ignored. Some data might be missing or be partial. :param str trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py index 3993f48ba22..24aa650aa41 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py @@ -610,9 +610,9 @@ def metadata(self, **options): :param str sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that - contain the main variant. Accepts AND (;) and OR (,) operators. + contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: - {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary @@ -802,9 +802,9 @@ def query(self, **options): :param str sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that - contain the main variant. Accepts AND (;) and OR (,) operators. + contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: - {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary @@ -949,6 +949,12 @@ def query(self, **options): :param bool panel_intersection: Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel. + :param str source: Select the variant data source from where to fetch + the data. Accepted values are 'variant_index' (default), + 'secondary_annotation_index' and 'secondary_sample_index'. When + selecting a secondary_index, the data will be retrieved exclusively + from that secondary index, and the 'include/exclude' parameters + will be ignored. Some data might be missing or be partial. :param str trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... """ @@ -1032,9 +1038,9 @@ def aggregation_stats_sample(self, **options): :param str sample: Filter variants by sample genotype. This will automatically set 'includeSample' parameter when not provided. This filter accepts multiple 3 forms: 1) List of samples: Samples that - contain the main variant. Accepts AND (;) and OR (,) operators. + contain the main variant. Accepts AND ';' and OR ',' operators. e.g. HG0097,HG0098 . 2) List of samples with genotypes: - {sample}:{gt1},{gt2}. Accepts AND (;) and OR (,) operators. e.g. + {sample}:{gt1},{gt2}. Accepts AND ';' and OR ',' operators. e.g. HG0097:0/0;HG0098:0/1,1/1 . Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. When filtering by multi-allelic genotypes, any secondary diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/AnnotationVariantQueryParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/AnnotationVariantQueryParams.java index 3713cce05b1..cdcb0bcd878 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/AnnotationVariantQueryParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/AnnotationVariantQueryParams.java @@ -1,37 +1,66 @@ package org.opencb.opencga.core.models.variant; import com.fasterxml.jackson.annotation.JsonInclude; +import org.opencb.commons.annotations.DataField; import org.opencb.commons.datastore.core.Query; import org.opencb.opencga.core.tools.ToolParams; +import static org.opencb.opencga.core.models.variant.VariantQueryParams.*; + public class AnnotationVariantQueryParams extends ToolParams { + @DataField(description = ID_DESCR) private String id; + @DataField(description = REFERENCE_DESCR) private String region; + @DataField(description = GENE_DESCR) private String gene; + @DataField(description = TYPE_DESCR) private String type; + @DataField(description = PANEL_DESC) private String panel; + @DataField(description = PANEL_MOI_DESC) private String panelModeOfInheritance; + @DataField(description = PANEL_CONFIDENCE_DESC) private String panelConfidence; + @DataField(description = PANEL_ROLE_IN_CANCER_DESC) private String panelRoleInCancer; @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @DataField(description = PANEL_INTERSECTION_DESC) private boolean panelIntersection; + @DataField(description = PANEL_FEATURE_TYPE_DESC) private String panelFeatureType; + @DataField(description = STATS_REF_DESCR) private String cohortStatsRef; + @DataField(description = STATS_ALT_DESCR) private String cohortStatsAlt; + @DataField(description = STATS_MAF_DESCR) private String cohortStatsMaf; + @DataField(description = ANNOT_CONSEQUENCE_TYPE_DESCR) private String ct; + @DataField(description = ANNOT_XREF_DESCR) private String xref; + @DataField(description = ANNOT_BIOTYPE_DESCR) private String biotype; + @DataField(description = ANNOT_PROTEIN_SUBSTITUTION_DESCR) private String proteinSubstitution; + @DataField(description = ANNOT_CONSERVATION_DESCR) private String conservation; + @DataField(description = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR) private String populationFrequencyMaf; + @DataField(description = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR) private String populationFrequencyAlt; + @DataField(description = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR) private String populationFrequencyRef; + @DataField(description = ANNOT_TRANSCRIPT_FLAG_DESCR) private String transcriptFlag; + @DataField(description = ANNOT_FUNCTIONAL_SCORE_DESCR) private String functionalScore; + @DataField(description = ANNOT_CLINICAL_DESCR) private String clinical; + @DataField(description = ANNOT_CLINICAL_SIGNIFICANCE_DESCR) private String clinicalSignificance; @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @DataField(description = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR) private boolean clinicalConfirmedStatus; public AnnotationVariantQueryParams() { diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/BasicVariantQueryParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/BasicVariantQueryParams.java index 72b621af649..5314c4bc7c8 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/BasicVariantQueryParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/BasicVariantQueryParams.java @@ -16,12 +16,16 @@ package org.opencb.opencga.core.models.variant; +import org.opencb.commons.annotations.DataField; + /** * Basic set of VariantQueryParams, containing only the most used ones. */ public class BasicVariantQueryParams extends AnnotationVariantQueryParams { + @DataField(description = VariantQueryParams.PROJECT_DESC) private String project; + @DataField(description = VariantQueryParams.STUDY_DESCR) private String study; public String getProject() { diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/SampleVariantStatsAnalysisParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/SampleVariantStatsAnalysisParams.java index aa32124e051..0807a4ae731 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/SampleVariantStatsAnalysisParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/SampleVariantStatsAnalysisParams.java @@ -27,7 +27,7 @@ public class SampleVariantStatsAnalysisParams extends ToolParams { + "Use sample=all to compute sample stats of all samples in the variant storage."; private List sample; private List individual; - private VariantQueryParams variantQuery = new VariantQueryParams(); + private VariantQueryParamsForSampleVariantStats variantQuery = new VariantQueryParamsForSampleVariantStats(); private String outdir; private boolean index; private boolean indexOverwrite; @@ -35,14 +35,14 @@ public class SampleVariantStatsAnalysisParams extends ToolParams { private String indexDescription; private Integer batchSize; - public static class VariantQueryParams extends AnnotationVariantQueryParams { + public static class VariantQueryParamsForSampleVariantStats extends AnnotationVariantQueryParams { public String sampleData; public String fileData; - public VariantQueryParams() { + public VariantQueryParamsForSampleVariantStats() { } - public VariantQueryParams(Query query) { + public VariantQueryParamsForSampleVariantStats(Query query) { super(query); } @@ -50,7 +50,7 @@ public String getSampleData() { return sampleData; } - public VariantQueryParams setSampleData(String sampleData) { + public VariantQueryParamsForSampleVariantStats setSampleData(String sampleData) { this.sampleData = sampleData; return this; } @@ -59,7 +59,7 @@ public String getFileData() { return fileData; } - public VariantQueryParams setFileData(String fileData) { + public VariantQueryParamsForSampleVariantStats setFileData(String fileData) { this.fileData = fileData; return this; } @@ -72,19 +72,19 @@ public SampleVariantStatsAnalysisParams(List sample, List indivi boolean indexOverwrite, String indexId, String indexDescription, Integer batchSize, AnnotationVariantQueryParams variantQuery) { this(sample, individual, outdir, index, indexOverwrite, indexId, indexDescription, batchSize, - variantQuery == null ? null : new VariantQueryParams(variantQuery.toQuery())); + variantQuery == null ? null : new VariantQueryParamsForSampleVariantStats(variantQuery.toQuery())); } public SampleVariantStatsAnalysisParams(List sample, List individual, String outdir, boolean index, boolean indexOverwrite, String indexId, String indexDescription, Integer batchSize, Query variantQuery) { this(sample, individual, outdir, index, indexOverwrite, indexId, indexDescription, batchSize, - variantQuery == null ? null : new VariantQueryParams(variantQuery)); + variantQuery == null ? null : new VariantQueryParamsForSampleVariantStats(variantQuery)); } public SampleVariantStatsAnalysisParams(List sample, List individual, String outdir, boolean index, boolean indexOverwrite, String indexId, String indexDescription, Integer batchSize, - VariantQueryParams variantQuery) { + VariantQueryParamsForSampleVariantStats variantQuery) { this.sample = sample; this.individual = individual; this.variantQuery = variantQuery; @@ -114,11 +114,11 @@ public SampleVariantStatsAnalysisParams setIndividual(List individual) { return this; } - public VariantQueryParams getVariantQuery() { + public VariantQueryParamsForSampleVariantStats getVariantQuery() { return variantQuery; } - public SampleVariantStatsAnalysisParams setVariantQuery(VariantQueryParams variantQuery) { + public SampleVariantStatsAnalysisParams setVariantQuery(VariantQueryParamsForSampleVariantStats variantQuery) { this.variantQuery = variantQuery; return this; } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java index b3ffc62bbe4..3a018796c21 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java @@ -17,7 +17,9 @@ package org.opencb.opencga.core.models.variant; import com.fasterxml.jackson.annotation.JsonInclude; +import org.opencb.commons.annotations.DataField; import org.opencb.commons.datastore.core.Query; +import org.opencb.opencga.core.api.ParamConstants; /** * When using native values (like boolean or int), set add @@ -25,68 +27,332 @@ */ public class VariantQueryParams extends BasicVariantQueryParams { + public static final String SAMPLE_ANNOTATION_DESC = + "Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith"; + public static final String PROJECT_DESC = ParamConstants.PROJECT_DESCRIPTION; + public static final String FAMILY_DESC = "Filter variants where any of the samples from the given family contains the variant " + + "(HET or HOM_ALT)"; + public static final String FAMILY_MEMBERS_DESC = "Sub set of the members of a given family"; + public static final String FAMILY_DISORDER_DESC = "Specify the disorder to use for the family segregation"; + public static final String FAMILY_PROBAND_DESC = "Specify the proband child to use for the family segregation"; + public static final String FAMILY_SEGREGATION_DESCR = "Filter by segregation mode from a given family. Accepted values: " + + "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, " + + "deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]"; + public static final String SAVED_FILTER_DESCR = "Use a saved filter at User level"; + public static final String PANEL_DESC = "Filter by genes from the given disease panel"; + public static final String PANEL_MOI_DESC = "Filter genes from specific panels that match certain mode of inheritance. " + + "Accepted values : " + + "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, " + + "deNovo, mendelianError, compoundHeterozygous ]"; + public static final String PANEL_CONFIDENCE_DESC = "Filter genes from specific panels that match certain confidence. " + + "Accepted values : [ high, medium, low, rejected ]"; + public static final String PANEL_INTERSECTION_DESC = "Intersect panel genes and regions with given " + + "genes and regions from que input query. This will prevent returning variants from regions out of the panel."; + public static final String PANEL_ROLE_IN_CANCER_DESC = "Filter genes from specific panels that match certain role in cancer. " + + "Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]"; + public static final String PANEL_FEATURE_TYPE_DESC = "Filter elements from specific panels by type. " + + "Accepted values : [ gene, region, str, variant ]"; + private static final String ACCEPTS_AND_OR = "Accepts AND ';' and OR ',' operators."; + private static final String ACCEPTS_ALL_NONE = "Accepts '" + ParamConstants.ALL + "' and '" + ParamConstants.NONE + "'."; + + public static final String ID_DESCR + = "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T"; + public static final String REGION_DESCR + = "List of regions, these can be just a single chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000"; + @Deprecated + public static final String CHROMOSOME_DESCR + = "List of chromosomes, this is an alias of 'region' parameter with just the chromosome names"; + public static final String REFERENCE_DESCR + = "Reference allele"; + public static final String ALTERNATE_DESCR + = "Main alternate allele"; + public static final String TYPE_DESCR + = "List of types, accepted values are SNV, MNV, INDEL, SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN," + + " INSERTION, DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. SNV,INDEL"; + public static final String STUDY_DESCR + = "Filter variants from the given studies, these can be either the numeric ID or the alias with the format " + + "organization@project:study"; + public static final String GENOTYPE_DESCR + = "Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)*" + + " e.g. HG0097:0/0;HG0098:0/1,1/1. " + + "Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. " + + "When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position" + + " e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... " + + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS " + + " e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. " + + "This will automatically set 'includeSample' parameter when not provided"; + public static final String SAMPLE_DATA_DESCR + = "Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}[,;]* . " + + "If no sample is specified, will use all samples from \"sample\" or \"genotype\" filter. " + + "e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . " + + "Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10"; + public static final String INCLUDE_SAMPLE_ID_DESCR + = "Include sampleId on each result"; + public static final String SAMPLE_METADATA_DESCR + = "Return the samples metadata group by study. Sample names will appear in the same order as their corresponding genotypes."; + public static final String INCLUDE_GENOTYPE_DESCR + = "Include genotypes, apart of other formats defined with includeFormat"; + public static final String SAMPLE_LIMIT_DESCR + = "Limit the number of samples to be included in the result"; + public static final String SAMPLE_SKIP_DESCR + = "Skip some samples from the result. Useful for sample pagination."; + public static final String FILE_DESCR + = "Filter variants from the files specified. This will set includeFile parameter when not provided"; + public static final String FILE_DATA_DESCR + = "Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . " + + "If no file is specified, will use all files from \"file\" filter. " + + "e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . " + + "Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP"; + public static final String FILTER_DESCR + = "Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. " + + "e.g.: PASS,LowGQX"; + public static final String QUAL_DESCR + = "Specify the QUAL for any of the files. If 'file' filter is provided, will match the file and the qual. " + + "e.g.: >123.4"; + public static final String COHORT_DESCR + = "Select variants with calculated stats for the selected cohorts"; + public static final String STATS_REF_DESCR + = "Reference Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; + public static final String STATS_ALT_DESCR + = "Alternate Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; + public static final String STATS_MAF_DESCR + = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; + public static final String STATS_MGF_DESCR + = "Minor Genotype Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; + public static final String STATS_PASS_FREQ_DESCR + = "Filter PASS frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL>0.8"; + public static final String MISSING_ALLELES_DESCR + = "Number of missing alleles: [{study:}]{cohort}[<|>|<=|>=]{number}"; + public static final String MISSING_GENOTYPES_DESCR + = "Number of missing genotypes: [{study:}]{cohort}[<|>|<=|>=]{number}"; + public static final String SCORE_DESCR + = "Filter by variant score: [{study:}]{score}[<|>|<=|>=]{number}"; + public static final String ANNOT_EXISTS_DESCR + = "Return only annotated variants"; + public static final String ANNOT_XREF_DESCR + = "List of any external reference, these can be genes, proteins or variants. " + + "Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ..."; + public static final String GENE_DESCR + = "List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter"; + public static final String ANNOT_BIOTYPE_DESCR + = "List of biotypes, e.g. protein_coding"; + public static final String ANNOT_CONSEQUENCE_TYPE_DESCR + = "List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. " + + "Accepts aliases 'loss_of_function' and 'protein_altering'"; + @Deprecated + public static final String ANNOT_POLYPHEN_DESCR + = "Polyphen, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. <=0.9 , =benign"; + @Deprecated + public static final String ANNOT_SIFT_DESCR + = "Sift, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. >0.1 , ~=tolerant"; + public static final String ANNOT_PROTEIN_SUBSTITUTION_DESCR + = "Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number}" + + " or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant"; + public static final String ANNOT_CONSERVATION_DESCR + = "Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1"; + public static final String ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR + = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. " + + ParamConstants.POP_FREQ_1000G + ":ALL<0.01"; + public static final String ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR + = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. " + + ParamConstants.POP_FREQ_1000G + ":ALL<0.01"; + public static final String ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR + = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. " + + ParamConstants.POP_FREQ_1000G + ":ALL<0.01"; + public static final String ANNOT_TRANSCRIPT_FLAG_DESCR + = "List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500"; + public static final String ANNOT_GENE_TRAIT_ID_DESCR + = "List of gene trait association id. e.g. \"umls:C0007222\" , \"OMIM:269600\""; + @Deprecated + public static final String ANNOT_GENE_TRAIT_NAME_DESCR + = "List of gene trait association names. e.g. Cardiovascular Diseases"; + public static final String ANNOT_TRAIT_DESCR + = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,..."; + public static final String ANNOT_CLINICAL_DESCR + = "Clinical source: clinvar, cosmic"; + public static final String ANNOT_CLINICAL_SIGNIFICANCE_DESCR + = "Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic"; + public static final String ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR + = "Clinical confirmed status"; + @Deprecated + public static final String ANNOT_CLINVAR_DESCR + = "List of ClinVar accessions"; + @Deprecated + public static final String ANNOT_COSMIC_DESCR + = "List of COSMIC mutation IDs."; + @Deprecated + public static final String ANNOT_HPO_DESCR + = "List of HPO terms. e.g. \"HP:0000545,HP:0002812\""; + public static final String ANNOT_GO_DESCR + = "List of GO (Gene Ontology) terms. e.g. \"GO:0002020\""; + public static final String ANNOT_EXPRESSION_DESCR + = "List of tissues of interest. e.g. \"lung\""; + public static final String ANNOT_GENE_ROLE_IN_CANCER_DESCR + = ""; + public static final String ANNOT_PROTEIN_KEYWORD_DESCR + = "List of Uniprot protein variant annotation keywords"; + public static final String ANNOT_DRUG_DESCR + = "List of drug names"; + public static final String ANNOT_FUNCTIONAL_SCORE_DESCR + = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3"; + public static final String CUSTOM_ANNOTATION_DESCR + = "Custom annotation: {key}[<|>|<=|>=]{number} or {key}[~=|=]{text}"; + public static final String UNKNOWN_GENOTYPE_DESCR + = "Returned genotype for unknown genotypes. Common values: [0/0, 0|0, ./.]"; + public static final String RELEASE_DESCR + = ""; + public static final String SOURCE_DESCR = "Select the variant data source from where to fetch the data." + + " Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. " + + "When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, " + + "and the 'include/exclude' parameters will be ignored. " + + "If the given query can not be fully resolved using the secondary index, an exception will be raised. " + + "As the returned variants will only contain data from the secondary_index, some data might be missing or be partial."; + public static final String INCLUDE_FILE_DESCR + = "List of files to be returned. " + + ACCEPTS_ALL_NONE + " If undefined, automatically includes files used for filtering. If none, no file is included."; + public static final String INCLUDE_SAMPLE_DATA_DESCR + = "List of Sample Data keys (i.e. FORMAT column from VCF file) from Sample Data to include in the output. e.g: DP,AD. " + + ACCEPTS_ALL_NONE; + public static final String INCLUDE_SAMPLE_DESCR + = "List of samples to be included in the result. " + + ACCEPTS_ALL_NONE + " If undefined, automatically includes samples used for filtering. If none, no sample is included."; + public static final String INCLUDE_STUDY_DESCR + = "List of studies to include in the result. " + + ACCEPTS_ALL_NONE; + public static final String SAMPLE_DESCR + = "Filter variants by sample genotype. " + + "This will automatically set 'includeSample' parameter when not provided. " + + "This filter accepts multiple 3 forms: " + + "1) List of samples: Samples that contain the main variant. " + ACCEPTS_AND_OR + " " + + " e.g. HG0097,HG0098 . " + + "2) List of samples with genotypes: {sample}:{gt1},{gt2}. " + ACCEPTS_AND_OR + " " + + " e.g. HG0097:0/0;HG0098:0/1,1/1 . " + + "Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. " + + "When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position" + + " e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... " + + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS " + + " e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . " + + "3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted." + + "Accepted segregation modes: " + + "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, " + + "deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive." + + " e.g. HG0097:DeNovo " + + "Sample must have parents defined and indexed. "; + + @DataField(description = SAVED_FILTER_DESCR) private String savedFilter; + @DataField(description = CHROMOSOME_DESCR) private String chromosome; + @DataField(description = REFERENCE_DESCR) private String reference; + @DataField(description = ALTERNATE_DESCR) private String alternate; + @DataField(description = RELEASE_DESCR) private String release; + @DataField(description = INCLUDE_STUDY_DESCR) private String includeStudy; + @DataField(description = INCLUDE_SAMPLE_DESCR) private String includeSample; + @DataField(description = INCLUDE_FILE_DESCR) private String includeFile; + @DataField(description = INCLUDE_SAMPLE_DATA_DESCR) private String includeSampleData; @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @DataField(description = INCLUDE_SAMPLE_ID_DESCR) private boolean includeSampleId; @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @DataField(description = INCLUDE_GENOTYPE_DESCR) private boolean includeGenotype; + @DataField(description = FILE_DESCR) private String file; + @DataField(description = QUAL_DESCR) private String qual; + @DataField(description = FILTER_DESCR) private String filter; + @DataField(description = FILE_DATA_DESCR) private String fileData; + @DataField(description = GENOTYPE_DESCR) private String genotype; + @DataField(description = SAMPLE_DESCR) private String sample; + @DataField(description = SAMPLE_LIMIT_DESCR) private Integer sampleLimit; + @DataField(description = SAMPLE_SKIP_DESCR) private Integer sampleSkip; + @DataField(description = SAMPLE_DATA_DESCR) private String sampleData; + @DataField(description = SAMPLE_ANNOTATION_DESC) private String sampleAnnotation; + @DataField(description = FAMILY_DESC) private String family; + @DataField(description = FAMILY_MEMBERS_DESC) private String familyMembers; + @DataField(description = FAMILY_DISORDER_DESC) private String familyDisorder; + @DataField(description = FAMILY_PROBAND_DESC) private String familyProband; + @DataField(description = FAMILY_SEGREGATION_DESCR) private String familySegregation; + @DataField(description = COHORT_DESCR) private String cohort; + @DataField(description = STATS_PASS_FREQ_DESCR) private String cohortStatsPass; + @DataField(description = STATS_MGF_DESCR) private String cohortStatsMgf; + @DataField(description = MISSING_ALLELES_DESCR) private String missingAlleles; + @DataField(description = MISSING_GENOTYPES_DESCR) private String missingGenotypes; + @DataField(description = ANNOT_EXISTS_DESCR) private Boolean annotationExists; + @DataField(description = SCORE_DESCR) private String score; + @DataField(description = ANNOT_POLYPHEN_DESCR) @Deprecated private String polyphen; + @DataField(description = ANNOT_SIFT_DESCR) @Deprecated private String sift; + @DataField(description = ANNOT_GENE_ROLE_IN_CANCER_DESCR) private String geneRoleInCancer; + @DataField(description = ANNOT_GENE_TRAIT_ID_DESCR) private String geneTraitId; + @DataField(description = ANNOT_GENE_TRAIT_NAME_DESCR) private String geneTraitName; + @DataField(description = ANNOT_TRAIT_DESCR) private String trait; + @DataField(description = ANNOT_COSMIC_DESCR) private String cosmic; + @DataField(description = ANNOT_CLINICAL_DESCR) private String clinvar; + @DataField(description = ANNOT_HPO_DESCR) private String hpo; + @DataField(description = ANNOT_GO_DESCR) private String go; + @DataField(description = ANNOT_EXPRESSION_DESCR) private String expression; + @DataField(description = ANNOT_PROTEIN_KEYWORD_DESCR) private String proteinKeyword; + @DataField(description = ANNOT_DRUG_DESCR) private String drug; + @DataField(description = CUSTOM_ANNOTATION_DESCR) private String customAnnotation; + @DataField(since = "3.2.1", description = SOURCE_DESCR) + private String source; + @DataField(description = UNKNOWN_GENOTYPE_DESCR) private String unknownGenotype; @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @DataField(description = SAMPLE_METADATA_DESCR) private boolean sampleMetadata = false; @JsonInclude(JsonInclude.Include.NON_DEFAULT) + @DataField(description = "Sort the results by chromosome, start, end and alternate allele") private boolean sort = false; @@ -534,6 +800,15 @@ public VariantQueryParams setUnknownGenotype(String unknownGenotype) { return this; } + public String getSource() { + return source; + } + + public VariantQueryParams setSource(String source) { + this.source = source; + return this; + } + public boolean isSampleMetadata() { return sampleMetadata; } diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/ClinicalWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/ClinicalWebService.java index f893c1f5986..7d3d441aa32 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/ClinicalWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/ClinicalWebService.java @@ -32,7 +32,6 @@ import org.opencb.opencga.analysis.clinical.zetta.ZettaInterpretationAnalysis; import org.opencb.opencga.analysis.rga.RgaManager; import org.opencb.opencga.analysis.rga.RgaQueryParams; -import org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils; import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; import org.opencb.opencga.catalog.db.api.ClinicalAnalysisDBAdaptor; import org.opencb.opencga.catalog.db.api.InterpretationDBAdaptor; @@ -49,6 +48,7 @@ import org.opencb.opencga.core.models.job.Job; import org.opencb.opencga.core.models.sample.Sample; import org.opencb.opencga.core.models.study.configuration.ClinicalAnalysisStudyConfiguration; +import org.opencb.opencga.core.models.variant.VariantQueryParams; import org.opencb.opencga.core.tools.annotations.*; import javax.servlet.http.HttpServletRequest; @@ -61,11 +61,10 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicReference; -import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.SAVED_FILTER_DESCR; +import static org.opencb.opencga.core.models.variant.VariantQueryParams.SAVED_FILTER_DESCR; import static org.opencb.opencga.core.api.ParamConstants.INCLUDE_INTERPRETATION; import static org.opencb.opencga.core.api.ParamConstants.JOB_DEPENDS_ON; import static org.opencb.opencga.server.rest.analysis.VariantWebService.getVariantQuery; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; @Path("/{apiVersion}/analysis/clinical") @Produces(MediaType.APPLICATION_JSON) @@ -1205,67 +1204,69 @@ public Response rgaIndexRun( // Interpretation ID to include fields related to @ApiImplicitParam(name = ParamConstants.INCLUDE_INTERPRETATION, value = ParamConstants.INCLUDE_INTERPRETATION_DESCRIPTION, dataType = "string", paramType = "query"), // Variant filters - @ApiImplicitParam(name = "id", value = ID_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "type", value = TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "id", value = VariantQueryParams.ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "type", value = VariantQueryParams.TYPE_DESCR, dataType = "string", paramType = "query"), // Study filters - @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "file", value = FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "filter", value = FILTER_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "qual", value = QUAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "fileData", value = FILE_DATA_DESCR, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "sample", value = SAMPLE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleData", value = SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleAnnotation", value = VariantCatalogQueryUtils.SAMPLE_ANNOTATION_DESC, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "cohort", value = COHORT_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsRef", value = STATS_REF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsAlt", value = STATS_ALT_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsMaf", value = STATS_MAF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsMgf", value = STATS_MGF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsPass", value = STATS_PASS_FREQ_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "missingAlleles", value = MISSING_ALLELES_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "missingGenotypes", value = MISSING_GENOTYPES_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "score", value = SCORE_DESCR, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "family", value = VariantCatalogQueryUtils.FAMILY_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyDisorder", value = VariantCatalogQueryUtils.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familySegregation", value = VariantCatalogQueryUtils.FAMILY_SEGREGATION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyMembers", value = VariantCatalogQueryUtils.FAMILY_MEMBERS_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyProband", value = VariantCatalogQueryUtils.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "file", value = VariantQueryParams.FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "filter", value = VariantQueryParams.FILTER_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "qual", value = VariantQueryParams.QUAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "fileData", value = VariantQueryParams.FILE_DATA_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "sample", value = VariantQueryParams.SAMPLE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleData", value = VariantQueryParams.SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleAnnotation", value = VariantQueryParams.SAMPLE_ANNOTATION_DESC, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "cohort", value = VariantQueryParams.COHORT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsRef", value = VariantQueryParams.STATS_REF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsAlt", value = VariantQueryParams.STATS_ALT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsMaf", value = VariantQueryParams.STATS_MAF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsMgf", value = VariantQueryParams.STATS_MGF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsPass", value = VariantQueryParams.STATS_PASS_FREQ_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "missingAlleles", value = VariantQueryParams.MISSING_ALLELES_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "missingGenotypes", value = VariantQueryParams.MISSING_GENOTYPES_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "score", value = VariantQueryParams.SCORE_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "family", value = VariantQueryParams.FAMILY_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyDisorder", value = VariantQueryParams.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familySegregation", value = VariantQueryParams.FAMILY_SEGREGATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyMembers", value = VariantQueryParams.FAMILY_MEMBERS_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyProband", value = VariantQueryParams.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), // Annotation filters - @ApiImplicitParam(name = "gene", value = GENE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "ct", value = ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "xref", value = ANNOT_XREF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "biotype", value = ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "proteinSubstitution", value = ANNOT_PROTEIN_SUBSTITUTION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "conservation", value = ANNOT_CONSERVATION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyAlt", value = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyRef", value = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyMaf", value = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "transcriptFlag", value = ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "geneTraitId", value = ANNOT_GENE_TRAIT_ID_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "go", value = ANNOT_GO_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "expression", value = ANNOT_EXPRESSION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "proteinKeyword", value = ANNOT_PROTEIN_KEYWORD_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "drug", value = ANNOT_DRUG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "functionalScore", value = ANNOT_FUNCTIONAL_SCORE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinical", value = ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalSignificance", value = ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalConfirmedStatus", value = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), - @ApiImplicitParam(name = "customAnnotation", value = CUSTOM_ANNOTATION_DESCR, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "panel", value = VariantCatalogQueryUtils.PANEL_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelModeOfInheritance", value = VariantCatalogQueryUtils.PANEL_MOI_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelConfidence", value = VariantCatalogQueryUtils.PANEL_CONFIDENCE_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelRoleInCancer", value = VariantCatalogQueryUtils.PANEL_ROLE_IN_CANCER_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelFeatureType", value = VariantCatalogQueryUtils.PANEL_FEATURE_TYPE_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelIntersection", value = VariantCatalogQueryUtils.PANEL_INTERSECTION_DESC, dataType = "boolean", paramType = "query"), - - @ApiImplicitParam(name = "trait", value = ANNOT_TRAIT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "gene", value = VariantQueryParams.GENE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "ct", value = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "xref", value = VariantQueryParams.ANNOT_XREF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "biotype", value = VariantQueryParams.ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "proteinSubstitution", value = VariantQueryParams.ANNOT_PROTEIN_SUBSTITUTION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "conservation", value = VariantQueryParams.ANNOT_CONSERVATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyAlt", value = VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyRef", value = VariantQueryParams.ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyMaf", value = VariantQueryParams.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "transcriptFlag", value = VariantQueryParams.ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "geneTraitId", value = VariantQueryParams.ANNOT_GENE_TRAIT_ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "go", value = VariantQueryParams.ANNOT_GO_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "expression", value = VariantQueryParams.ANNOT_EXPRESSION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "proteinKeyword", value = VariantQueryParams.ANNOT_PROTEIN_KEYWORD_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "drug", value = VariantQueryParams.ANNOT_DRUG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "functionalScore", value = VariantQueryParams.ANNOT_FUNCTIONAL_SCORE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinical", value = VariantQueryParams.ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalSignificance", value = VariantQueryParams.ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalConfirmedStatus", value = VariantQueryParams.ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "customAnnotation", value = VariantQueryParams.CUSTOM_ANNOTATION_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "panel", value = VariantQueryParams.PANEL_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelModeOfInheritance", value = VariantQueryParams.PANEL_MOI_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelConfidence", value = VariantQueryParams.PANEL_CONFIDENCE_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelRoleInCancer", value = VariantQueryParams.PANEL_ROLE_IN_CANCER_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelFeatureType", value = VariantQueryParams.PANEL_FEATURE_TYPE_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelIntersection", value = VariantQueryParams.PANEL_INTERSECTION_DESC, dataType = "boolean", paramType = "query"), + + @ApiImplicitParam(name = "source", value = VariantQueryParams.SOURCE_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "trait", value = VariantQueryParams.ANNOT_TRAIT_DESCR, dataType = "string", paramType = "query"), }) public Response variantQuery() { // Get all query options diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java index be6ec91c91f..d56b6d02ed9 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/analysis/VariantWebService.java @@ -94,7 +94,7 @@ import java.nio.file.Paths; import java.util.*; -import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.SAVED_FILTER_DESCR; +import static org.opencb.opencga.core.models.variant.VariantQueryParams.SAVED_FILTER_DESCR; import static org.opencb.opencga.core.api.ParamConstants.JOB_DEPENDS_ON; import static org.opencb.opencga.core.common.JacksonUtils.getUpdateObjectMapper; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; @@ -230,84 +230,86 @@ public Response variantFileDelete( @ApiImplicitParam(name = "savedFilter", value = SAVED_FILTER_DESCR, dataType = "string", paramType = "query"), // Variant filters - @ApiImplicitParam(name = "id", value = ID_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "type", value = TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "reference", value = REFERENCE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "alternate", value = ALTERNATE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "id", value = VariantQueryParams.ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "type", value = VariantQueryParams.TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "reference", value = VariantQueryParams.REFERENCE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "alternate", value = VariantQueryParams.ALTERNATE_DESCR, dataType = "string", paramType = "query"), // Study filters - @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantCatalogQueryUtils.PROJECT_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "file", value = FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "filter", value = FILTER_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "qual", value = QUAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "fileData", value = FILE_DATA_DESCR, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "sample", value = SAMPLE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "genotype", value = GENOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleData", value = SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleAnnotation", value = VariantCatalogQueryUtils.SAMPLE_ANNOTATION_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleMetadata", value = SAMPLE_METADATA_DESCR, dataType = "boolean", paramType = "query"), - @ApiImplicitParam(name = "unknownGenotype", value = UNKNOWN_GENOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleLimit", value = SAMPLE_LIMIT_DESCR, dataType = "integer", paramType = "query"), - @ApiImplicitParam(name = "sampleSkip", value = SAMPLE_SKIP_DESCR, dataType = "integer", paramType = "query"), - - @ApiImplicitParam(name = "cohort", value = COHORT_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsRef", value = STATS_REF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsAlt", value = STATS_ALT_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsMaf", value = STATS_MAF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsMgf", value = STATS_MGF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsPass", value = STATS_PASS_FREQ_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "missingAlleles", value = MISSING_ALLELES_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "missingGenotypes", value = MISSING_GENOTYPES_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "score", value = SCORE_DESCR, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "family", value = VariantCatalogQueryUtils.FAMILY_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyDisorder", value = VariantCatalogQueryUtils.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familySegregation", value = VariantCatalogQueryUtils.FAMILY_SEGREGATION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyMembers", value = VariantCatalogQueryUtils.FAMILY_MEMBERS_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyProband", value = VariantCatalogQueryUtils.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "includeStudy", value = INCLUDE_STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeFile", value = INCLUDE_FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeSample", value = INCLUDE_SAMPLE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeSampleData", value = INCLUDE_SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeGenotype", value = INCLUDE_GENOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeSampleId", value = INCLUDE_SAMPLE_ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantQueryParams.PROJECT_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "file", value = VariantQueryParams.FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "filter", value = VariantQueryParams.FILTER_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "qual", value = VariantQueryParams.QUAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "fileData", value = VariantQueryParams.FILE_DATA_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "sample", value = VariantQueryParams.SAMPLE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "genotype", value = VariantQueryParams.GENOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleData", value = VariantQueryParams.SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleAnnotation", value = VariantQueryParams.SAMPLE_ANNOTATION_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleMetadata", value = VariantQueryParams.SAMPLE_METADATA_DESCR, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "unknownGenotype", value = VariantQueryParams.UNKNOWN_GENOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleLimit", value = VariantQueryParams.SAMPLE_LIMIT_DESCR, dataType = "integer", paramType = "query"), + @ApiImplicitParam(name = "sampleSkip", value = VariantQueryParams.SAMPLE_SKIP_DESCR, dataType = "integer", paramType = "query"), + + @ApiImplicitParam(name = "cohort", value = VariantQueryParams.COHORT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsRef", value = VariantQueryParams.STATS_REF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsAlt", value = VariantQueryParams.STATS_ALT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsMaf", value = VariantQueryParams.STATS_MAF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsMgf", value = VariantQueryParams.STATS_MGF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsPass", value = VariantQueryParams.STATS_PASS_FREQ_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "missingAlleles", value = VariantQueryParams.MISSING_ALLELES_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "missingGenotypes", value = VariantQueryParams.MISSING_GENOTYPES_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "score", value = VariantQueryParams.SCORE_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "family", value = VariantQueryParams.FAMILY_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyDisorder", value = VariantQueryParams.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familySegregation", value = VariantQueryParams.FAMILY_SEGREGATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyMembers", value = VariantQueryParams.FAMILY_MEMBERS_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyProband", value = VariantQueryParams.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "includeStudy", value = VariantQueryParams.INCLUDE_STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeFile", value = VariantQueryParams.INCLUDE_FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeSample", value = VariantQueryParams.INCLUDE_SAMPLE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeSampleData", value = VariantQueryParams.INCLUDE_SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeGenotype", value = VariantQueryParams.INCLUDE_GENOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeSampleId", value = VariantQueryParams.INCLUDE_SAMPLE_ID_DESCR, dataType = "string", paramType = "query"), // Annotation filters - @ApiImplicitParam(name = "annotationExists", value = ANNOT_EXISTS_DESCR, dataType = "java.lang.Boolean", paramType = "query"), - @ApiImplicitParam(name = "gene", value = GENE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "ct", value = ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "xref", value = ANNOT_XREF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "biotype", value = ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "proteinSubstitution", value = ANNOT_PROTEIN_SUBSTITUTION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "conservation", value = ANNOT_CONSERVATION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyAlt", value = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyRef", value = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyMaf", value = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "transcriptFlag", value = ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "geneTraitId", value = ANNOT_GENE_TRAIT_ID_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "go", value = ANNOT_GO_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "expression", value = ANNOT_EXPRESSION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "proteinKeyword", value = ANNOT_PROTEIN_KEYWORD_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "drug", value = ANNOT_DRUG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "functionalScore", value = ANNOT_FUNCTIONAL_SCORE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinical", value = ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalSignificance", value = ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalConfirmedStatus", value = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), - @ApiImplicitParam(name = "customAnnotation", value = CUSTOM_ANNOTATION_DESCR, dataType = "string", paramType = "query"), - - @ApiImplicitParam(name = "panel", value = VariantCatalogQueryUtils.PANEL_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelModeOfInheritance", value = VariantCatalogQueryUtils.PANEL_MOI_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelConfidence", value = VariantCatalogQueryUtils.PANEL_CONFIDENCE_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelRoleInCancer", value = VariantCatalogQueryUtils.PANEL_ROLE_IN_CANCER_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelFeatureType", value = VariantCatalogQueryUtils.PANEL_FEATURE_TYPE_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelIntersection", value = VariantCatalogQueryUtils.PANEL_INTERSECTION_DESC, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "annotationExists", value = VariantQueryParams.ANNOT_EXISTS_DESCR, dataType = "java.lang.Boolean", paramType = "query"), + @ApiImplicitParam(name = "gene", value = VariantQueryParams.GENE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "ct", value = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "xref", value = VariantQueryParams.ANNOT_XREF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "biotype", value = VariantQueryParams.ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "proteinSubstitution", value = VariantQueryParams.ANNOT_PROTEIN_SUBSTITUTION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "conservation", value = VariantQueryParams.ANNOT_CONSERVATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyAlt", value = VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyRef", value = VariantQueryParams.ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyMaf", value = VariantQueryParams.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "transcriptFlag", value = VariantQueryParams.ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "geneTraitId", value = VariantQueryParams.ANNOT_GENE_TRAIT_ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "go", value = VariantQueryParams.ANNOT_GO_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "expression", value = VariantQueryParams.ANNOT_EXPRESSION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "proteinKeyword", value = VariantQueryParams.ANNOT_PROTEIN_KEYWORD_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "drug", value = VariantQueryParams.ANNOT_DRUG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "functionalScore", value = VariantQueryParams.ANNOT_FUNCTIONAL_SCORE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinical", value = VariantQueryParams.ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalSignificance", value = VariantQueryParams.ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalConfirmedStatus", value = VariantQueryParams.ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "customAnnotation", value = VariantQueryParams.CUSTOM_ANNOTATION_DESCR, dataType = "string", paramType = "query"), + + @ApiImplicitParam(name = "panel", value = VariantQueryParams.PANEL_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelModeOfInheritance", value = VariantQueryParams.PANEL_MOI_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelConfidence", value = VariantQueryParams.PANEL_CONFIDENCE_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelRoleInCancer", value = VariantQueryParams.PANEL_ROLE_IN_CANCER_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelFeatureType", value = VariantQueryParams.PANEL_FEATURE_TYPE_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelIntersection", value = VariantQueryParams.PANEL_INTERSECTION_DESC, dataType = "boolean", paramType = "query"), + + @ApiImplicitParam(name = "source", value = VariantQueryParams.SOURCE_DESCR, dataType = "string", paramType = "query"), // WARN: Only available in Solr - @ApiImplicitParam(name = "trait", value = ANNOT_TRAIT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "trait", value = VariantQueryParams.ANNOT_TRAIT_DESCR, dataType = "string", paramType = "query"), // // DEPRECATED PARAMS // @ApiImplicitParam(name = "chromosome", value = DEPRECATED + "Use 'region' instead", dataType = "string", paramType = "query"), @@ -415,8 +417,8 @@ public Response export( @Path("/annotation/query") @ApiOperation(value = "Query variant annotations from any saved versions", response = VariantAnnotation.class) @ApiImplicitParams({ - @ApiImplicitParam(name = "id", value = ID_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "id", value = VariantQueryParams.ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), @ApiImplicitParam(name = QueryOptions.INCLUDE, value = ParamConstants.INCLUDE_DESCRIPTION, example = "name,attributes", dataType = "string", paramType = "query"), @ApiImplicitParam(name = QueryOptions.EXCLUDE, value = ParamConstants.EXCLUDE_DESCRIPTION, example = "id,status", dataType = "string", paramType = "query"), @ApiImplicitParam(name = QueryOptions.LIMIT, value = ParamConstants.LIMIT_DESCRIPTION, dataType = "integer", paramType = "query"), @@ -437,7 +439,7 @@ public Response getAnnotation(@ApiParam(value = "Annotation identifier") @Defaul @Path("/annotation/metadata") @ApiOperation(value = "Read variant annotations metadata from any saved versions") public Response getAnnotationMetadata(@ApiParam(value = "Annotation identifier") @QueryParam("annotationId") String annotationId, - @ApiParam(value = VariantCatalogQueryUtils.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project) { + @ApiParam(value = VariantQueryParams.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project) { return run(() -> variantManager.getAnnotationMetadata(annotationId, project, token)); } @@ -557,7 +559,7 @@ public Response familyGenotypes( public Response samples( @ApiParam(value = "Study where all the samples belong to") @QueryParam(ParamConstants.STUDY_PARAM) String studyStr, @ApiParam(value = "List of samples to check. By default, all samples") @QueryParam("sample") String samples, - @ApiParam(value = VariantCatalogQueryUtils.SAMPLE_ANNOTATION_DESC) @QueryParam("sampleAnnotation") String sampleAnnotation, + @ApiParam(value = VariantQueryParams.SAMPLE_ANNOTATION_DESC) @QueryParam("sampleAnnotation") String sampleAnnotation, @ApiParam(value = "Genotypes that the sample must have to be selected") @QueryParam("genotype") @DefaultValue("0/1,1/1") String genotypesStr, @ApiParam(value = "Samples must be present in ALL variants or in ANY variant.") @QueryParam("all") @DefaultValue("false") boolean all ) { @@ -649,32 +651,32 @@ public Response sampleQuery( @ApiImplicitParam(name = "savedFilter", value = SAVED_FILTER_DESCR, dataType = "string", paramType = "query"), // Variant filters - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "type", value = TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "type", value = VariantQueryParams.TYPE_DESCR, dataType = "string", paramType = "query"), // Study filters - @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantCatalogQueryUtils.PROJECT_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "file", value = FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "filter", value = FILTER_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantQueryParams.PROJECT_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "file", value = VariantQueryParams.FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "filter", value = VariantQueryParams.FILTER_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sample", value = SAMPLE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "genotype", value = GENOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleAnnotation", value = VariantCatalogQueryUtils.SAMPLE_ANNOTATION_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sample", value = VariantQueryParams.SAMPLE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "genotype", value = VariantQueryParams.GENOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleAnnotation", value = VariantQueryParams.SAMPLE_ANNOTATION_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "family", value = VariantCatalogQueryUtils.FAMILY_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyDisorder", value = VariantCatalogQueryUtils.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familySegregation", value = VariantCatalogQueryUtils.FAMILY_SEGREGATION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyMembers", value = VariantCatalogQueryUtils.FAMILY_MEMBERS_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "familyProband", value = VariantCatalogQueryUtils.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "family", value = VariantQueryParams.FAMILY_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyDisorder", value = VariantQueryParams.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familySegregation", value = VariantQueryParams.FAMILY_SEGREGATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyMembers", value = VariantQueryParams.FAMILY_MEMBERS_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "familyProband", value = VariantQueryParams.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), // Annotation filters - @ApiImplicitParam(name = "ct", value = ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "biotype", value = ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyAlt", value = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinical", value = ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalSignificance", value = ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalConfirmedStatus", value = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query") + @ApiImplicitParam(name = "ct", value = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "biotype", value = VariantQueryParams.ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyAlt", value = VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinical", value = VariantQueryParams.ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalSignificance", value = VariantQueryParams.ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalConfirmedStatus", value = VariantQueryParams.ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query") }) public Response sampleAggregationStats(@ApiParam(value = "List of facet fields separated by semicolons, e.g.: studies;type." @@ -714,24 +716,24 @@ public Response sampleStatsRun( @Path("/sample/stats/query") @ApiImplicitParams({ // Variant filters - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "type", value = TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "type", value = VariantQueryParams.TYPE_DESCR, dataType = "string", paramType = "query"), // Study filters - @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "file", value = FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "filter", value = FILTER_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "file", value = VariantQueryParams.FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "filter", value = VariantQueryParams.FILTER_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sampleData", value = SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sampleData", value = VariantQueryParams.SAMPLE_DATA_DESCR, dataType = "string", paramType = "query"), // Annotation filters - @ApiImplicitParam(name = "ct", value = ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "biotype", value = ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "transcriptFlag", value = ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyAlt", value = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinical", value = ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalSignificance", value = ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalConfirmedStatus", value = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "ct", value = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "biotype", value = VariantQueryParams.ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "transcriptFlag", value = VariantQueryParams.ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyAlt", value = VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinical", value = VariantQueryParams.ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalSignificance", value = VariantQueryParams.ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalConfirmedStatus", value = VariantQueryParams.ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), }) @ApiOperation(value = "Obtain sample variant stats from a sample.", response = SampleVariantStats.class) public Response sampleStatsQuery(@ApiParam(value = ParamConstants.STUDY_DESCRIPTION) @QueryParam(ParamConstants.STUDY_PARAM) String studyStr, @@ -827,12 +829,12 @@ public Response cohortStatsDelete(@ApiParam(value = ParamConstants.STUDY_PARAM) @ApiImplicitParam(name = "savedFilter", value = SAVED_FILTER_DESCR, dataType = "string", paramType = "query"), // Variant filters - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "type", value = TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "type", value = VariantQueryParams.TYPE_DESCR, dataType = "string", paramType = "query"), // Study filters - @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantCatalogQueryUtils.PROJECT_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantQueryParams.PROJECT_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), // @ApiImplicitParam(name = "file", value = FILE_DESCR, dataType = "string", paramType = "query"), // @ApiImplicitParam(name = "filter", value = FILTER_DESCR, dataType = "string", paramType = "query"), @@ -842,15 +844,15 @@ public Response cohortStatsDelete(@ApiParam(value = ParamConstants.STUDY_PARAM) // @ApiImplicitParam(name = "samplesMetadata", value = SAMPLE_METADATA_DESCR, dataType = "boolean", paramType = "query"), // @ApiImplicitParam(name = "unknownGenotype", value = UNKNOWN_GENOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohort", value = COHORT_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsRef", value = STATS_REF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsAlt", value = STATS_ALT_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsMaf", value = STATS_MAF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsMgf", value = STATS_MGF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "cohortStatsPass", value = STATS_PASS_FREQ_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "missingAlleles", value = MISSING_ALLELES_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "missingGenotypes", value = MISSING_GENOTYPES_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "score", value = SCORE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohort", value = VariantQueryParams.COHORT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsRef", value = VariantQueryParams.STATS_REF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsAlt", value = VariantQueryParams.STATS_ALT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsMaf", value = VariantQueryParams.STATS_MAF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsMgf", value = VariantQueryParams.STATS_MGF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "cohortStatsPass", value = VariantQueryParams.STATS_PASS_FREQ_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "missingAlleles", value = VariantQueryParams.MISSING_ALLELES_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "missingGenotypes", value = VariantQueryParams.MISSING_GENOTYPES_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "score", value = VariantQueryParams.SCORE_DESCR, dataType = "string", paramType = "query"), // @ApiImplicitParam(name = "family", value = VariantCatalogQueryUtils.FAMILY_DESC, dataType = "string", paramType = "query"), // @ApiImplicitParam(name = "familyDisorder", value = VariantCatalogQueryUtils.FAMILY_DISORDER_DESC, dataType = "string", paramType = "query"), @@ -859,30 +861,30 @@ public Response cohortStatsDelete(@ApiParam(value = ParamConstants.STUDY_PARAM) // @ApiImplicitParam(name = "familyProband", value = VariantCatalogQueryUtils.FAMILY_PROBAND_DESC, dataType = "string", paramType = "query"), // Annotation filters - @ApiImplicitParam(name = "annotationExists", value = ANNOT_EXISTS_DESCR, dataType = "java.lang.Boolean", paramType = "query"), - @ApiImplicitParam(name = "gene", value = GENE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "ct", value = ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "xref", value = ANNOT_XREF_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "biotype", value = ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "proteinSubstitution", value = ANNOT_PROTEIN_SUBSTITUTION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "conservation", value = ANNOT_CONSERVATION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyAlt", value = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyRef", value = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "populationFrequencyMaf", value = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "transcriptFlag", value = ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "geneTraitId", value = ANNOT_GENE_TRAIT_ID_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "go", value = ANNOT_GO_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "expression", value = ANNOT_EXPRESSION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "proteinKeyword", value = ANNOT_PROTEIN_KEYWORD_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "drug", value = ANNOT_DRUG_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "functionalScore", value = ANNOT_FUNCTIONAL_SCORE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinical", value = ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalSignificance", value = ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "clinicalConfirmedStatus", value = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), - @ApiImplicitParam(name = "customAnnotation", value = CUSTOM_ANNOTATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "annotationExists", value = VariantQueryParams.ANNOT_EXISTS_DESCR, dataType = "java.lang.Boolean", paramType = "query"), + @ApiImplicitParam(name = "gene", value = VariantQueryParams.GENE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "ct", value = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "xref", value = VariantQueryParams.ANNOT_XREF_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "biotype", value = VariantQueryParams.ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "proteinSubstitution", value = VariantQueryParams.ANNOT_PROTEIN_SUBSTITUTION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "conservation", value = VariantQueryParams.ANNOT_CONSERVATION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyAlt", value = VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyRef", value = VariantQueryParams.ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "populationFrequencyMaf", value = VariantQueryParams.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "transcriptFlag", value = VariantQueryParams.ANNOT_TRANSCRIPT_FLAG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "geneTraitId", value = VariantQueryParams.ANNOT_GENE_TRAIT_ID_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "go", value = VariantQueryParams.ANNOT_GO_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "expression", value = VariantQueryParams.ANNOT_EXPRESSION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "proteinKeyword", value = VariantQueryParams.ANNOT_PROTEIN_KEYWORD_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "drug", value = VariantQueryParams.ANNOT_DRUG_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "functionalScore", value = VariantQueryParams.ANNOT_FUNCTIONAL_SCORE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinical", value = VariantQueryParams.ANNOT_CLINICAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalSignificance", value = VariantQueryParams.ANNOT_CLINICAL_SIGNIFICANCE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "clinicalConfirmedStatus", value = VariantQueryParams.ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "customAnnotation", value = VariantQueryParams.CUSTOM_ANNOTATION_DESCR, dataType = "string", paramType = "query"), // WARN: Only available in Solr - @ApiImplicitParam(name = "trait", value = ANNOT_TRAIT_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "trait", value = VariantQueryParams.ANNOT_TRAIT_DESCR, dataType = "string", paramType = "query"), }) public Response getAggregationStats(@ApiParam(value = "List of facet fields separated by semicolons, e.g.: studies;type. For nested faceted fields use >>, e.g.: chromosome>>type;percentile(gerp)") @QueryParam(ParamConstants.FIELD_PARAM) String field) { return run(() -> { @@ -904,13 +906,13 @@ public Response cohortStatsDelete(@ApiParam(value = ParamConstants.STUDY_PARAM) @Path("/metadata") @ApiOperation(value = "", response = VariantMetadata.class) @ApiImplicitParams({ - @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantCatalogQueryUtils.PROJECT_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "file", value = FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "sample", value = SAMPLE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeStudy", value = INCLUDE_STUDY_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeFile", value = INCLUDE_FILE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "includeSample", value = INCLUDE_SAMPLE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.PROJECT_PARAM, value = VariantQueryParams.PROJECT_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = ParamConstants.STUDY_PARAM, value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "file", value = VariantQueryParams.FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "sample", value = VariantQueryParams.SAMPLE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeStudy", value = VariantQueryParams.INCLUDE_STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeFile", value = VariantQueryParams.INCLUDE_FILE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "includeSample", value = VariantQueryParams.INCLUDE_SAMPLE_DESCR, dataType = "string", paramType = "query"), @ApiImplicitParam(name = QueryOptions.INCLUDE, value = ParamConstants.INCLUDE_DESCRIPTION, example = "name,attributes", dataType = "string", paramType = "query"), @ApiImplicitParam(name = QueryOptions.EXCLUDE, value = ParamConstants.EXCLUDE_DESCRIPTION, example = "id,status", dataType = "string", paramType = "query"), }) @@ -972,22 +974,22 @@ public Response mutationalSignatureRun( @Path("/mutationalSignature/query") @ApiOperation(value = MutationalSignatureAnalysis.DESCRIPTION + " Use context index.", response = Signature.class) @ApiImplicitParams({ - @ApiImplicitParam(name = "study", value = STUDY_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "study", value = VariantQueryParams.STUDY_DESCR, dataType = "string", paramType = "query"), @ApiImplicitParam(name = "sample", value = "Sample name", dataType = "string", paramType = "query"), @ApiImplicitParam(name = "type", value = "Variant type. Valid values: SNV, SV", dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "ct", value = ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "biotype", value = ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "fileData", value = FILE_DATA_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "filter", value = FILTER_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "qual", value = QUAL_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "region", value = REGION_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "gene", value = GENE_DESCR, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panel", value = VariantCatalogQueryUtils.PANEL_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelModeOfInheritance", value = VariantCatalogQueryUtils.PANEL_MOI_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelConfidence", value = VariantCatalogQueryUtils.PANEL_CONFIDENCE_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelFeatureType", value = VariantCatalogQueryUtils.PANEL_FEATURE_TYPE_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelRoleInCancer", value = VariantCatalogQueryUtils.PANEL_ROLE_IN_CANCER_DESC, dataType = "string", paramType = "query"), - @ApiImplicitParam(name = "panelIntersection", value = VariantCatalogQueryUtils.PANEL_INTERSECTION_DESC, dataType = "boolean", paramType = "query"), + @ApiImplicitParam(name = "ct", value = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "biotype", value = VariantQueryParams.ANNOT_BIOTYPE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "fileData", value = VariantQueryParams.FILE_DATA_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "filter", value = VariantQueryParams.FILTER_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "qual", value = VariantQueryParams.QUAL_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "region", value = VariantQueryParams.REGION_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "gene", value = VariantQueryParams.GENE_DESCR, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panel", value = VariantQueryParams.PANEL_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelModeOfInheritance", value = VariantQueryParams.PANEL_MOI_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelConfidence", value = VariantQueryParams.PANEL_CONFIDENCE_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelFeatureType", value = VariantQueryParams.PANEL_FEATURE_TYPE_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelRoleInCancer", value = VariantQueryParams.PANEL_ROLE_IN_CANCER_DESC, dataType = "string", paramType = "query"), + @ApiImplicitParam(name = "panelIntersection", value = VariantQueryParams.PANEL_INTERSECTION_DESC, dataType = "boolean", paramType = "query"), }) public Response mutationalSignatureQuery( @ApiParam(value = FieldConstants.MUTATIONAL_SIGNATURE_ID_DESCRIPTION) @QueryParam("msId") String msId, diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java index a2143f45090..a5dd2d39ead 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java @@ -21,7 +21,6 @@ import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.analysis.variant.julie.JulieTool; -import org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils; import org.opencb.opencga.analysis.variant.operations.*; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.config.storage.CellBaseConfiguration; @@ -241,7 +240,7 @@ public Response secondaryIndex( @ApiParam(value = ParamConstants.JOB_SCHEDULED_START_TIME_DESCRIPTION) @QueryParam(ParamConstants.JOB_SCHEDULED_START_TIME) String scheduledStartTime, @ApiParam(value = ParamConstants.JOB_PRIORITY_DESCRIPTION) @QueryParam(ParamConstants.SUBMIT_JOB_PRIORITY_PARAM) String jobPriority, @ApiParam(value = ParamConstants.JOB_DRY_RUN_DESCRIPTION) @QueryParam(ParamConstants.JOB_DRY_RUN) Boolean dryRun, - @ApiParam(value = VariantCatalogQueryUtils.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, + @ApiParam(value = VariantQueryParams.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, @ApiParam(value = ParamConstants.STUDY_DESCRIPTION) @QueryParam(ParamConstants.STUDY_PARAM) String study, @ApiParam(value = VariantSecondaryAnnotationIndexParams.DESCRIPTION) VariantSecondaryAnnotationIndexParams params) { return variantSecondaryAnnotationIndex(jobName, jobDescription, dependsOn, jobTags, scheduledStartTime, jobPriority, dryRun, project, study, params); @@ -258,7 +257,7 @@ public Response variantSecondaryAnnotationIndex( @ApiParam(value = ParamConstants.JOB_SCHEDULED_START_TIME_DESCRIPTION) @QueryParam(ParamConstants.JOB_SCHEDULED_START_TIME) String scheduledStartTime, @ApiParam(value = ParamConstants.JOB_PRIORITY_DESCRIPTION) @QueryParam(ParamConstants.SUBMIT_JOB_PRIORITY_PARAM) String jobPriority, @ApiParam(value = ParamConstants.JOB_DRY_RUN_DESCRIPTION) @QueryParam(ParamConstants.JOB_DRY_RUN) Boolean dryRun, - @ApiParam(value = VariantCatalogQueryUtils.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, + @ApiParam(value = VariantQueryParams.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, @ApiParam(value = ParamConstants.STUDY_DESCRIPTION) @QueryParam(ParamConstants.STUDY_PARAM) String study, @ApiParam(value = VariantSecondaryAnnotationIndexParams.DESCRIPTION) VariantSecondaryAnnotationIndexParams params) { return submitOperation(VariantSecondaryAnnotationIndexOperationTool.ID, project, study, params, jobName, jobDescription, dependsOn, jobTags, scheduledStartTime, jobPriority, dryRun); @@ -311,7 +310,7 @@ public Response annotationDelete( @ApiParam(value = ParamConstants.JOB_SCHEDULED_START_TIME_DESCRIPTION) @QueryParam(ParamConstants.JOB_SCHEDULED_START_TIME) String scheduledStartTime, @ApiParam(value = ParamConstants.JOB_PRIORITY_DESCRIPTION) @QueryParam(ParamConstants.SUBMIT_JOB_PRIORITY_PARAM) String jobPriority, @ApiParam(value = ParamConstants.JOB_DRY_RUN_DESCRIPTION) @QueryParam(ParamConstants.JOB_DRY_RUN) Boolean dryRun, - @ApiParam(value = VariantCatalogQueryUtils.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, + @ApiParam(value = VariantQueryParams.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, @ApiParam(value = "Annotation identifier") @QueryParam("annotationId") String annotationId ) { Map params = new HashMap<>(); @@ -332,7 +331,7 @@ public Response annotationSave( @ApiParam(value = ParamConstants.JOB_SCHEDULED_START_TIME_DESCRIPTION) @QueryParam(ParamConstants.JOB_SCHEDULED_START_TIME) String scheduledStartTime, @ApiParam(value = ParamConstants.JOB_PRIORITY_DESCRIPTION) @QueryParam(ParamConstants.SUBMIT_JOB_PRIORITY_PARAM) String jobPriority, @ApiParam(value = ParamConstants.JOB_DRY_RUN_DESCRIPTION) @QueryParam(ParamConstants.JOB_DRY_RUN) Boolean dryRun, - @ApiParam(value = VariantCatalogQueryUtils.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, + @ApiParam(value = VariantQueryParams.PROJECT_DESC) @QueryParam(ParamConstants.PROJECT_PARAM) String project, @ApiParam(value = VariantAnnotationSaveParams.DESCRIPTION) VariantAnnotationSaveParams params) { return submitOperationToProject(VariantAnnotationSaveOperationTool.ID, project, params, jobName, jobDescription, dependsOn, jobTags, scheduledStartTime, jobPriority, dryRun); diff --git a/opencga-storage/opencga-storage-app/src/main/java/org/opencb/opencga/storage/app/cli/client/options/StorageVariantCommandOptions.java b/opencga-storage/opencga-storage-app/src/main/java/org/opencb/opencga/storage/app/cli/client/options/StorageVariantCommandOptions.java index 398b7f775cd..b4f12c8b5fb 100644 --- a/opencga-storage/opencga-storage-app/src/main/java/org/opencb/opencga/storage/app/cli/client/options/StorageVariantCommandOptions.java +++ b/opencga-storage/opencga-storage-app/src/main/java/org/opencb/opencga/storage/app/cli/client/options/StorageVariantCommandOptions.java @@ -20,6 +20,7 @@ import com.beust.jcommander.converters.CommaParameterSplitter; import org.opencb.biodata.models.variant.metadata.Aggregation; import org.opencb.opencga.core.models.operations.variant.VariantScoreIndexParams; +import org.opencb.opencga.core.models.variant.VariantQueryParams; import org.opencb.opencga.storage.app.cli.GeneralCliOptions; import org.opencb.opencga.storage.core.variant.annotation.VariantAnnotationManager; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotatorFactory; @@ -31,7 +32,6 @@ import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.GenericAnnotationCommandOptions.ANNOTATION_ID_DESCRIPTION; import static org.opencb.opencga.storage.app.cli.client.options.StorageVariantCommandOptions.GenericAnnotationCommandOptions.ANNOTATION_ID_PARAM; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; /** * Created by imedina on 22/01/17. @@ -249,31 +249,31 @@ public class VariantDeleteCommandOptions extends GenericVariantDeleteOptions { */ public static class BasicVariantQueryOptions { - @Parameter(names = {"--id"}, description = ID_DESCR, variableArity = true) + @Parameter(names = {"--id"}, description = VariantQueryParams.ID_DESCR, variableArity = true) public List id; - @Parameter(names = {"-r", "--region"}, description = REGION_DESCR) + @Parameter(names = {"-r", "--region"}, description = VariantQueryParams.REGION_DESCR) public String region; @Parameter(names = {"--region-file"}, description = "GFF File with regions") public String regionFile; - @Parameter(names = {"-g", "--gene"}, description = GENE_DESCR) + @Parameter(names = {"-g", "--gene"}, description = VariantQueryParams.GENE_DESCR) public String gene; - @Parameter(names = {"-t", "--type"}, description = TYPE_DESCR) + @Parameter(names = {"-t", "--type"}, description = VariantQueryParams.TYPE_DESCR) public String type; - @Parameter(names = {"--ct", "--consequence-type"}, description = ANNOT_CONSEQUENCE_TYPE_DESCR) + @Parameter(names = {"--ct", "--consequence-type"}, description = VariantQueryParams.ANNOT_CONSEQUENCE_TYPE_DESCR) public String consequenceType; - @Parameter(names = {"-c", "--conservation"}, description = ANNOT_CONSERVATION_DESCR) + @Parameter(names = {"-c", "--conservation"}, description = VariantQueryParams.ANNOT_CONSERVATION_DESCR) public String conservation; - @Parameter(names = {"--ps", "--protein-substitution"}, description = ANNOT_PROTEIN_SUBSTITUTION_DESCR) + @Parameter(names = {"--ps", "--protein-substitution"}, description = VariantQueryParams.ANNOT_PROTEIN_SUBSTITUTION_DESCR) public String proteinSubstitution; - @Parameter(names = {"--fs", "--functional-score"}, description = ANNOT_FUNCTIONAL_SCORE_DESCR) + @Parameter(names = {"--fs", "--functional-score"}, description = VariantQueryParams.ANNOT_FUNCTIONAL_SCORE_DESCR) public String functionalScore; @Deprecated @@ -288,16 +288,16 @@ void setDeprecatedPopulationFreqAlternate(String populationFreqAlt) { this.populationFreqAlt = populationFreqAlt; } - @Parameter(names = {"--pf", "--population-frequency-alt"}, description = ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR) + @Parameter(names = {"--pf", "--population-frequency-alt"}, description = VariantQueryParams.ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR) public String populationFreqAlt; - @Parameter(names = {"--cohort-stats-ref"}, description = STATS_REF_DESCR) + @Parameter(names = {"--cohort-stats-ref"}, description = VariantQueryParams.STATS_REF_DESCR) public String rf; - @Parameter(names = {"--cohort-stats-alt"}, description = STATS_ALT_DESCR) + @Parameter(names = {"--cohort-stats-alt"}, description = VariantQueryParams.STATS_ALT_DESCR) public String af; - @Parameter(names = {"--maf", "--cohort-stats-maf"}, description = STATS_MAF_DESCR) + @Parameter(names = {"--maf", "--cohort-stats-maf"}, description = VariantQueryParams.STATS_MAF_DESCR) public String maf; } @@ -310,13 +310,13 @@ public static class GenericVariantQueryOptions extends BasicVariantQueryOptions // @Parameter(names = {"-s", "--study"}, description = "A comma separated list of studies to be used as filter") // public String study; - @Parameter(names = {"--gt", "--genotype"}, description = GENOTYPE_DESCR) + @Parameter(names = {"--gt", "--genotype"}, description = VariantQueryParams.GENOTYPE_DESCR) public String sampleGenotype; - @Parameter(names = {"--sample"}, description = SAMPLE_DESCR) + @Parameter(names = {"--sample"}, description = VariantQueryParams.SAMPLE_DESCR) public String samples; - @Parameter(names = {"--sample-data"}, description = SAMPLE_DATA_DESCR) + @Parameter(names = {"--sample-data"}, description = VariantQueryParams.SAMPLE_DATA_DESCR) public String sampleData; @Parameter(names = {"--format"}, hidden = true) @@ -324,10 +324,10 @@ public void setFormat(String format) { sampleData = format; } - @Parameter(names = {"-f", "--file"}, description = FILE_DESCR) + @Parameter(names = {"-f", "--file"}, description = VariantQueryParams.FILE_DESCR) public String file; - @Parameter(names = {"--file-data"}, description = FILE_DATA_DESCR) + @Parameter(names = {"--file-data"}, description = VariantQueryParams.FILE_DATA_DESCR) public String fileData; @Parameter(names = {"--info"}, hidden = true) @@ -335,16 +335,16 @@ public void setInfo(String info) { fileData = info; } - @Parameter(names = {"--filter"}, description = FILTER_DESCR) + @Parameter(names = {"--filter"}, description = VariantQueryParams.FILTER_DESCR) public String filter; - @Parameter(names = {"--qual"}, description = QUAL_DESCR) + @Parameter(names = {"--qual"}, description = VariantQueryParams.QUAL_DESCR) public String qual; - @Parameter(names = {"--score"}, description = SCORE_DESCR) + @Parameter(names = {"--score"}, description = VariantQueryParams.SCORE_DESCR) public String score; - @Parameter(names = {"--biotype"}, description = ANNOT_BIOTYPE_DESCR) + @Parameter(names = {"--biotype"}, description = VariantQueryParams.ANNOT_BIOTYPE_DESCR) public String geneBiotype; @Parameter(names = {"--population-maf"}, hidden = true, description = DEPRECATED + "use --pmaf or --population-frequency-maf") @@ -352,32 +352,32 @@ void setDeprecatedPopulationFreqMaf(String populationFreqMaf) { this.populationFreqMaf = populationFreqMaf; } - @Parameter(names = {"--pmaf", "--population-frequency-maf"}, description = ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR) + @Parameter(names = {"--pmaf", "--population-frequency-maf"}, description = VariantQueryParams.ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR) public String populationFreqMaf; - @Parameter(names = {"--population-frequency-ref"}, description = ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR) + @Parameter(names = {"--population-frequency-ref"}, description = VariantQueryParams.ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR) public String populationFreqRef; - @Parameter(names = {"--transcript-flag"}, description = ANNOT_TRANSCRIPT_FLAG_DESCR) + @Parameter(names = {"--transcript-flag"}, description = VariantQueryParams.ANNOT_TRANSCRIPT_FLAG_DESCR) public String flags; - @Parameter(names = {"--gene-trait-id"}, description = ANNOT_GENE_TRAIT_ID_DESCR) + @Parameter(names = {"--gene-trait-id"}, description = VariantQueryParams.ANNOT_GENE_TRAIT_ID_DESCR) public String geneTraitId; @Deprecated @Parameter(names = {"--gene-trait-name"}, hidden = true, description = DEPRECATED + "use --trait") public String geneTraitName; - @Parameter(names = {"--go", "--gene-ontology"}, description = ANNOT_GO_DESCR) + @Parameter(names = {"--go", "--gene-ontology"}, description = VariantQueryParams.ANNOT_GO_DESCR) public String go; - @Parameter(names = {"--expression"}, description = ANNOT_EXPRESSION_DESCR) + @Parameter(names = {"--expression"}, description = VariantQueryParams.ANNOT_EXPRESSION_DESCR) public String expression; - @Parameter(names = {"--protein-keywords"}, description = ANNOT_PROTEIN_KEYWORD_DESCR) + @Parameter(names = {"--protein-keywords"}, description = VariantQueryParams.ANNOT_PROTEIN_KEYWORD_DESCR) public String proteinKeywords; - @Parameter(names = {"--drug"}, description = ANNOT_DRUG_DESCR) + @Parameter(names = {"--drug"}, description = VariantQueryParams.ANNOT_DRUG_DESCR) public String drugs; @Deprecated @@ -392,23 +392,23 @@ void setClinvar(String clinvar) { setXref(clinvar); } - @Parameter(names = {"--trait"}, description = ANNOT_TRAIT_DESCR) + @Parameter(names = {"--trait"}, description = VariantQueryParams.ANNOT_TRAIT_DESCR) void setTrait(String trait) { this.trait = this.trait == null ? trait : this.trait + ',' + trait; } public String trait; - @Parameter(names = {"--mgf", "--cohort-stats-mgf"}, description = STATS_MGF_DESCR) + @Parameter(names = {"--mgf", "--cohort-stats-mgf"}, description = VariantQueryParams.STATS_MGF_DESCR) public String mgf; - @Parameter(names = {"--cohort-stats-pass"}, description = STATS_PASS_FREQ_DESCR) + @Parameter(names = {"--cohort-stats-pass"}, description = VariantQueryParams.STATS_PASS_FREQ_DESCR) public String cohortStatsPass; - @Parameter(names = {"--stats-missing-allele"}, description = MISSING_ALLELES_DESCR) + @Parameter(names = {"--stats-missing-allele"}, description = VariantQueryParams.MISSING_ALLELES_DESCR) public String missingAlleleCount; - @Parameter(names = {"--stats-missing-genotype"}, description = MISSING_GENOTYPES_DESCR) + @Parameter(names = {"--stats-missing-genotype"}, description = VariantQueryParams.MISSING_GENOTYPES_DESCR) public String missingGenotypeCount; // @Parameter(names = {"--dominant"}, description = "[PENDING] Take a family in the form of: FATHER,MOTHER,CHILD and specifies if is" + @@ -427,7 +427,7 @@ void setTrait(String trait) { // public String compoundHeterozygous; - @Parameter(names = {"--include-study"}, description = INCLUDE_STUDY_DESCR) + @Parameter(names = {"--include-study"}, description = VariantQueryParams.INCLUDE_STUDY_DESCR) public String includeStudy; @Deprecated @@ -436,7 +436,7 @@ void setOutputStudy(String outputStudy) { includeStudy = outputStudy; } - @Parameter(names = {"--include-file"}, description = INCLUDE_FILE_DESCR) + @Parameter(names = {"--include-file"}, description = VariantQueryParams.INCLUDE_FILE_DESCR) public String includeFile; @Deprecated @@ -445,7 +445,7 @@ void setOutputFile(String outputFile) { includeFile = outputFile; } - @Parameter(names = {"--include-sample"}, description = INCLUDE_SAMPLE_DESCR) + @Parameter(names = {"--include-sample"}, description = VariantQueryParams.INCLUDE_SAMPLE_DESCR) public String includeSample; @Deprecated @@ -454,7 +454,7 @@ void setOutputSample(String outputSample) { includeSample = outputSample; } - @Parameter(names = {"--include-sample-data"}, description = INCLUDE_SAMPLE_DATA_DESCR) + @Parameter(names = {"--include-sample-data"}, description = VariantQueryParams.INCLUDE_SAMPLE_DATA_DESCR) public String includeSampleData; @Parameter(names = {"--include-format"}, hidden = true) @@ -462,10 +462,10 @@ public void setIncludeFormat(String includeFormat) { includeSampleData = includeFormat; } - @Parameter(names = {"--include-genotype"}, description = INCLUDE_GENOTYPE_DESCR) + @Parameter(names = {"--include-genotype"}, description = VariantQueryParams.INCLUDE_GENOTYPE_DESCR) public boolean includeGenotype; - @Parameter(names = {"--include-sample-id"}, description = INCLUDE_SAMPLE_ID_DESCR) + @Parameter(names = {"--include-sample-id"}, description = VariantQueryParams.INCLUDE_SAMPLE_ID_DESCR) public boolean includeSampleId; @Parameter(names = {"--annotations", "--output-vcf-info"}, description = "Set variant annotation to return in the INFO column. " + @@ -478,7 +478,7 @@ void setOutputUnknownGenotype(String outputUnknownGenotype) { this.unknownGenotype = outputUnknownGenotype; } - @Parameter(names = {"--unknown-genotype"}, description = UNKNOWN_GENOTYPE_DESCR) + @Parameter(names = {"--unknown-genotype"}, description = VariantQueryParams.UNKNOWN_GENOTYPE_DESCR) public String unknownGenotype = "./."; @Deprecated @@ -493,29 +493,29 @@ void setAnnotXref(String annotXref) { setXref(annotXref); } - @Parameter(names = {"--xref"}, description = ANNOT_XREF_DESCR) + @Parameter(names = {"--xref"}, description = VariantQueryParams.ANNOT_XREF_DESCR) void setXref(String xref) { this.xref = this.xref == null ? xref : this.xref + ',' + xref; } public String xref; - @Parameter(names = {"--clinical"}, description = ANNOT_CLINICAL_DESCR) + @Parameter(names = {"--clinical"}, description = VariantQueryParams.ANNOT_CLINICAL_DESCR) public String clinical; - @Parameter(names = {"--clinical-significance"}, description = ANNOT_CLINICAL_SIGNIFICANCE_DESCR) + @Parameter(names = {"--clinical-significance"}, description = VariantQueryParams.ANNOT_CLINICAL_SIGNIFICANCE_DESCR) public String clinicalSignificance; - @Parameter(names = {"--clinical-confirmed-status"}, description = ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR) + @Parameter(names = {"--clinical-confirmed-status"}, description = VariantQueryParams.ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR) public boolean clinicalConfirmedStatus; - @Parameter(names = {"--sample-metadata"}, description = SAMPLE_METADATA_DESCR) + @Parameter(names = {"--sample-metadata"}, description = VariantQueryParams.SAMPLE_METADATA_DESCR) public boolean samplesMetadata; - @Parameter(names = {"--sample-limit"}, description = SAMPLE_LIMIT_DESCR) + @Parameter(names = {"--sample-limit"}, description = VariantQueryParams.SAMPLE_LIMIT_DESCR) public int sampleLimit; - @Parameter(names = {"--sample-skip"}, description = SAMPLE_SKIP_DESCR) + @Parameter(names = {"--sample-skip"}, description = VariantQueryParams.SAMPLE_SKIP_DESCR) public int sampleSkip; @Parameter(names = {"--summary"}, description = "Fast fetch of main variant parameters") @@ -534,7 +534,7 @@ public class VariantQueryCommandOptions extends GenericVariantQueryOptions { @ParametersDelegate public GeneralCliOptions.QueryCommandOptions commonQueryOptions = queryCommandOptions; - @Parameter(names = {"-s", "--study"}, description = STUDY_DESCR) + @Parameter(names = {"-s", "--study"}, description = VariantQueryParams.STUDY_DESCR) public String study; @Parameter(names = {"--of", "--output-format"}, description = "Output format: vcf, vcf.gz, json or json.gz", arity = 1) @@ -665,10 +665,10 @@ public static class GenericAnnotationQueryCommandOptions { @Parameter(names = ANNOTATION_ID_PARAM, description = ANNOTATION_ID_DESCRIPTION) public String annotationId = VariantAnnotationManager.CURRENT; - @Parameter(names = {"--id"}, description = ID_DESCR, variableArity = true) + @Parameter(names = {"--id"}, description = VariantQueryParams.ID_DESCR, variableArity = true) public List id; - @Parameter(names = {"-r", "--region"}, description = REGION_DESCR) + @Parameter(names = {"-r", "--region"}, description = VariantQueryParams.REGION_DESCR) public String region; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index dd24ee1334d..e9081225b82 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1348,7 +1348,7 @@ public VariantQueryExecutor getVariantQueryExecutor(Query query, QueryOptions op public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQuery) { try { for (VariantQueryExecutor executor : getVariantQueryExecutors()) { - if (executor.canUseThisExecutor(variantQuery.getQuery(), variantQuery.getInputOptions())) { + if (executor.canUseThisExecutor(variantQuery)) { logger.info("Using VariantQueryExecutor : " + executor.getClass().getName()); logger.info(" Query : " + VariantQueryUtils.printQuery(variantQuery.getInputQuery())); logger.info(" Options : " + variantQuery.getInputOptions().toJson()); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java index 09b1867854e..72dcc35fea5 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java @@ -6,6 +6,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.storage.core.variant.query.Values; +import org.opencb.opencga.storage.core.variant.query.VariantQuerySource; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import java.util.Arrays; @@ -649,6 +650,20 @@ public Integer release() { return getInt(VariantQueryParam.RELEASE.key()); } + public VariantQuery source(String value) { + put(VariantQueryParam.SOURCE.key(), value); + return this; + } + + public VariantQuery source(VariantQuerySource value) { + put(VariantQueryParam.SOURCE.key(), value); + return this; + } + + public String source() { + return getString(VariantQueryParam.SOURCE.key()); + } + @Override public VariantQuery append(String key, Object value) { return (VariantQuery) super.append(key, value); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java index 1973d7f383d..6abbbf35435 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java @@ -17,12 +17,11 @@ package org.opencb.opencga.storage.core.variant.adaptors; import org.opencb.commons.datastore.core.QueryParam; -import org.opencb.opencga.core.api.ParamConstants; import java.util.*; import static org.opencb.commons.datastore.core.QueryParam.Type.*; -import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.*; +import static org.opencb.opencga.core.models.variant.VariantQueryParams.*; /** * Created on 30/03/17. @@ -37,339 +36,97 @@ public final class VariantQueryParam implements QueryParam { private static final List VALUES = new ArrayList<>(); private static final Map VALUES_MAP = new HashMap<>(); - private static final String ACCEPTS_ALL_NONE = "Accepts '" + ALL + "' and '" + NONE + "'."; - private static final String ACCEPTS_AND_OR = "Accepts AND (" + AND + ") and OR (" + OR + ") operators."; - public static final String ID_DESCR - = "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T"; public static final VariantQueryParam ID = new VariantQueryParam("id", TEXT_ARRAY, ID_DESCR); - - public static final String REGION_DESCR - = "List of regions, these can be just a single chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000"; public static final VariantQueryParam REGION = new VariantQueryParam("region", TEXT_ARRAY, REGION_DESCR); - - @Deprecated - public static final String CHROMOSOME_DESCR - = "List of chromosomes, this is an alias of 'region' parameter with just the chromosome names"; - - public static final String REFERENCE_DESCR - = "Reference allele"; public static final VariantQueryParam REFERENCE = new VariantQueryParam("reference", TEXT_ARRAY, REFERENCE_DESCR); - - public static final String ALTERNATE_DESCR - = "Main alternate allele"; public static final VariantQueryParam ALTERNATE = new VariantQueryParam("alternate", TEXT_ARRAY, ALTERNATE_DESCR); - - public static final String TYPE_DESCR - = "List of types, accepted values are SNV, MNV, INDEL, SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN," - + " INSERTION, DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. SNV,INDEL"; public static final VariantQueryParam TYPE = new VariantQueryParam("type", TEXT_ARRAY, TYPE_DESCR); - - - public static final String STUDY_DESCR - = "Filter variants from the given studies, these can be either the numeric ID or the alias with the format " - + "organization@project:study"; public static final VariantQueryParam STUDY = new VariantQueryParam("study", TEXT_ARRAY, STUDY_DESCR); - - public static final String INCLUDE_STUDY_DESCR - = "List of studies to include in the result. " - + ACCEPTS_ALL_NONE; public static final VariantQueryParam INCLUDE_STUDY = new VariantQueryParam("includeStudy", TEXT_ARRAY, INCLUDE_STUDY_DESCR); - - public static final String SAMPLE_DESCR - = "Filter variants by sample genotype. " - + "This will automatically set 'includeSample' parameter when not provided. " - + "This filter accepts multiple 3 forms: " - + "1) List of samples: Samples that contain the main variant. " + ACCEPTS_AND_OR + " " - + " e.g. HG0097,HG0098 . " - + "2) List of samples with genotypes: {sample}:{gt1},{gt2}. " + ACCEPTS_AND_OR + " " - + " e.g. HG0097:0/0;HG0098:0/1,1/1 . " - + "Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. " - + "When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position" - + " e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... " - + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS " - + " e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . " - + "3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted." - + "Accepted segregation modes: " - + "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, " - + "deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]. Value is case insensitive." - + " e.g. HG0097:DeNovo " - + "Sample must have parents defined and indexed. "; public static final VariantQueryParam SAMPLE = new VariantQueryParam("sample", TEXT_ARRAY, SAMPLE_DESCR); - - public static final String GENOTYPE_DESCR - = "Samples with a specific genotype: {samp_1}:{gt_1}(,{gt_n})*(;{samp_n}:{gt_1}(,{gt_n})*)*" - + " e.g. HG0097:0/0;HG0098:0/1,1/1. " - + "Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. " - + "When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position" - + " e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... " - + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS " - + " e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. " - + "This will automatically set 'includeSample' parameter when not provided"; public static final VariantQueryParam GENOTYPE = new VariantQueryParam("genotype", TEXT_ARRAY, GENOTYPE_DESCR); - - public static final String SAMPLE_DATA_DESCR - = "Filter by any SampleData field from samples. [{sample}:]{key}{op}{value}[,;]* . " - + "If no sample is specified, will use all samples from \"sample\" or \"genotype\" filter. " - + "e.g. DP>200 or HG0097:DP>200,HG0098:DP<10 . " - + "Many FORMAT fields can be combined. e.g. HG0097:DP>200;GT=1/1,0/1,HG0098:DP<10"; public static final VariantQueryParam SAMPLE_DATA = new VariantQueryParam("sampleData", TEXT_ARRAY, SAMPLE_DATA_DESCR); - - public static final String INCLUDE_SAMPLE_DESCR - = "List of samples to be included in the result. " - + ACCEPTS_ALL_NONE + " If undefined, automatically includes samples used for filtering. If none, no sample is included."; public static final VariantQueryParam INCLUDE_SAMPLE = new VariantQueryParam("includeSample", TEXT_ARRAY, INCLUDE_SAMPLE_DESCR); - - public static final String INCLUDE_SAMPLE_ID_DESCR - = "Include sampleId on each result"; public static final VariantQueryParam INCLUDE_SAMPLE_ID = new VariantQueryParam("includeSampleId", BOOLEAN, INCLUDE_SAMPLE_ID_DESCR); - - public static final String SAMPLE_METADATA_DESCR - = "Return the samples metadata group by study. Sample names will appear in the same order as their corresponding genotypes."; public static final VariantQueryParam SAMPLE_METADATA = new VariantQueryParam("sampleMetadata", BOOLEAN, SAMPLE_METADATA_DESCR); - - public static final String INCLUDE_SAMPLE_DATA_DESCR - = "List of Sample Data keys (i.e. FORMAT column from VCF file) from Sample Data to include in the output. e.g: DP,AD. " - + ACCEPTS_ALL_NONE; public static final VariantQueryParam INCLUDE_SAMPLE_DATA = new VariantQueryParam("includeSampleData", TEXT_ARRAY, INCLUDE_SAMPLE_DATA_DESCR); - - public static final String INCLUDE_GENOTYPE_DESCR - = "Include genotypes, apart of other formats defined with includeFormat"; public static final VariantQueryParam INCLUDE_GENOTYPE = new VariantQueryParam("includeGenotype", BOOLEAN, INCLUDE_GENOTYPE_DESCR); - - public static final String SAMPLE_LIMIT_DESCR - = "Limit the number of samples to be included in the result"; public static final VariantQueryParam SAMPLE_LIMIT = new VariantQueryParam("sampleLimit", INTEGER, SAMPLE_LIMIT_DESCR); - - public static final String SAMPLE_SKIP_DESCR - = "Skip some samples from the result. Useful for sample pagination."; public static final VariantQueryParam SAMPLE_SKIP = new VariantQueryParam("sampleSkip", INTEGER, SAMPLE_SKIP_DESCR); - - public static final String FILE_DESCR - = "Filter variants from the files specified. This will set includeFile parameter when not provided"; public static final VariantQueryParam FILE = new VariantQueryParam("file", TEXT_ARRAY, FILE_DESCR); - - public static final String FILE_DATA_DESCR - = "Filter by file data (i.e. FILTER, QUAL and INFO columns from VCF file). [{file}:]{key}{op}{value}[,;]* . " - + "If no file is specified, will use all files from \"file\" filter. " - + "e.g. AN>200 or file_1.vcf:AN>200;file_2.vcf:AN<10 . " - + "Many fields can be combined. e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP"; public static final VariantQueryParam FILE_DATA = new VariantQueryParam("fileData", TEXT_ARRAY, FILE_DATA_DESCR); - - public static final String FILTER_DESCR - = "Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. " - + "e.g.: PASS,LowGQX"; public static final VariantQueryParam FILTER = new VariantQueryParam("filter", TEXT_ARRAY, FILTER_DESCR); - - public static final String QUAL_DESCR - = "Specify the QUAL for any of the files. If 'file' filter is provided, will match the file and the qual. " - + "e.g.: >123.4"; public static final VariantQueryParam QUAL = new VariantQueryParam("qual", DECIMAL_ARRAY, QUAL_DESCR); - - public static final String INCLUDE_FILE_DESCR - = "List of files to be returned. " - + ACCEPTS_ALL_NONE + " If undefined, automatically includes files used for filtering. If none, no file is included."; public static final VariantQueryParam INCLUDE_FILE = new VariantQueryParam("includeFile", TEXT_ARRAY, INCLUDE_FILE_DESCR); - - public static final String COHORT_DESCR - = "Select variants with calculated stats for the selected cohorts"; public static final VariantQueryParam COHORT = new VariantQueryParam("cohort", TEXT_ARRAY, COHORT_DESCR); - - public static final String STATS_REF_DESCR - = "Reference Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; public static final VariantQueryParam STATS_REF = new VariantQueryParam("cohortStatsRef", TEXT_ARRAY, STATS_REF_DESCR); - - public static final String STATS_ALT_DESCR - = "Alternate Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; public static final VariantQueryParam STATS_ALT = new VariantQueryParam("cohortStatsAlt", TEXT_ARRAY, STATS_ALT_DESCR); - - public static final String STATS_MAF_DESCR - = "Minor Allele Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; public static final VariantQueryParam STATS_MAF = new VariantQueryParam("cohortStatsMaf", TEXT_ARRAY, STATS_MAF_DESCR); - - public static final String STATS_MGF_DESCR - = "Minor Genotype Frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL<=0.4"; public static final VariantQueryParam STATS_MGF = new VariantQueryParam("cohortStatsMgf", TEXT_ARRAY, STATS_MGF_DESCR); - - public static final String STATS_PASS_FREQ_DESCR - = "Filter PASS frequency: [{study:}]{cohort}[<|>|<=|>=]{number}. e.g. ALL>0.8"; public static final VariantQueryParam STATS_PASS_FREQ = new VariantQueryParam("cohortStatsPass", TEXT_ARRAY, STATS_PASS_FREQ_DESCR); - - public static final String MISSING_ALLELES_DESCR - = "Number of missing alleles: [{study:}]{cohort}[<|>|<=|>=]{number}"; public static final VariantQueryParam MISSING_ALLELES = new VariantQueryParam("missingAlleles", TEXT_ARRAY, MISSING_ALLELES_DESCR); - - public static final String MISSING_GENOTYPES_DESCR - = "Number of missing genotypes: [{study:}]{cohort}[<|>|<=|>=]{number}"; public static final VariantQueryParam MISSING_GENOTYPES = new VariantQueryParam("missingGenotypes", TEXT_ARRAY, MISSING_GENOTYPES_DESCR); - - public static final String SCORE_DESCR - = "Filter by variant score: [{study:}]{score}[<|>|<=|>=]{number}"; public static final VariantQueryParam SCORE = new VariantQueryParam("score", TEXT_ARRAY, MISSING_GENOTYPES_DESCR); - - public static final String ANNOT_EXISTS_DESCR - = "Return only annotated variants"; public static final VariantQueryParam ANNOTATION_EXISTS = new VariantQueryParam("annotationExists", BOOLEAN, ANNOT_EXISTS_DESCR); - - public static final String ANNOT_XREF_DESCR - = "List of any external reference, these can be genes, proteins or variants. " - + "Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ..."; public static final VariantQueryParam ANNOT_XREF = new VariantQueryParam("xref", TEXT_ARRAY, ANNOT_XREF_DESCR); - - public static final String GENE_DESCR - = "List of genes, most gene IDs are accepted (HGNC, Ensembl gene, ...). This is an alias to 'xref' parameter"; public static final VariantQueryParam GENE = new VariantQueryParam("gene", TEXT_ARRAY, GENE_DESCR); - - public static final String ANNOT_BIOTYPE_DESCR - = "List of biotypes, e.g. protein_coding"; public static final VariantQueryParam ANNOT_BIOTYPE = new VariantQueryParam("biotype", TEXT_ARRAY, ANNOT_BIOTYPE_DESCR); - - public static final String ANNOT_CONSEQUENCE_TYPE_DESCR - = "List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. " - + "Accepts aliases 'loss_of_function' and 'protein_altering'"; public static final VariantQueryParam ANNOT_CONSEQUENCE_TYPE = new VariantQueryParam("ct", TEXT_ARRAY, ANNOT_CONSEQUENCE_TYPE_DESCR); - - @Deprecated - public static final String ANNOT_POLYPHEN_DESCR - = "Polyphen, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. <=0.9 , =benign"; - @Deprecated - public static final VariantQueryParam ANNOT_POLYPHEN - = new VariantQueryParam("polyphen", TEXT_ARRAY, ANNOT_POLYPHEN_DESCR); - - @Deprecated - public static final String ANNOT_SIFT_DESCR - = "Sift, protein substitution score. [<|>|<=|>=]{number} or [~=|=|]{description} e.g. >0.1 , ~=tolerant"; - @Deprecated - public static final VariantQueryParam ANNOT_SIFT - = new VariantQueryParam("sift", TEXT_ARRAY, ANNOT_SIFT_DESCR); - - public static final String ANNOT_PROTEIN_SUBSTITUTION_DESCR - = "Protein substitution scores include SIFT and PolyPhen. You can query using the score {protein_score}[<|>|<=|>=]{number}" - + " or the description {protein_score}[~=|=]{description} e.g. polyphen>0.1,sift=tolerant"; public static final VariantQueryParam ANNOT_PROTEIN_SUBSTITUTION = new VariantQueryParam("proteinSubstitution", TEXT_ARRAY, ANNOT_PROTEIN_SUBSTITUTION_DESCR); - - public static final String ANNOT_CONSERVATION_DESCR - = "Filter by conservation score: {conservation_score}[<|>|<=|>=]{number} e.g. phastCons>0.5,phylop<0.1,gerp>0.1"; public static final VariantQueryParam ANNOT_CONSERVATION = new VariantQueryParam("conservation", TEXT_ARRAY, ANNOT_CONSERVATION_DESCR); - - public static final String ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR - = "Alternate Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. " - + ParamConstants.POP_FREQ_1000G + ":ALL<0.01"; public static final VariantQueryParam ANNOT_POPULATION_ALTERNATE_FREQUENCY = new VariantQueryParam("populationFrequencyAlt", TEXT_ARRAY, ANNOT_POPULATION_ALTERNATE_FREQUENCY_DESCR); - - public static final String ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR - = "Reference Population Frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. " - + ParamConstants.POP_FREQ_1000G + ":ALL<0.01"; public static final VariantQueryParam ANNOT_POPULATION_REFERENCE_FREQUENCY = new VariantQueryParam("populationFrequencyRef", TEXT_ARRAY, ANNOT_POPULATION_REFERENCE_FREQUENCY_DESCR); - - public static final String ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR - = "Population minor allele frequency: {study}:{population}[<|>|<=|>=]{number}. e.g. " - + ParamConstants.POP_FREQ_1000G + ":ALL<0.01"; public static final VariantQueryParam ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY = new VariantQueryParam("populationFrequencyMaf", TEXT_ARRAY, ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY_DESCR); - - public static final String ANNOT_TRANSCRIPT_FLAG_DESCR - = "List of transcript flags. e.g. canonical, CCDS, basic, LRG, MANE Select, MANE Plus Clinical, EGLH_HaemOnc, TSO500"; public static final VariantQueryParam ANNOT_TRANSCRIPT_FLAG = new VariantQueryParam("transcriptFlag", TEXT_ARRAY, ANNOT_TRANSCRIPT_FLAG_DESCR); - - public static final String ANNOT_GENE_TRAIT_ID_DESCR - = "List of gene trait association id. e.g. \"umls:C0007222\" , \"OMIM:269600\""; public static final VariantQueryParam ANNOT_GENE_TRAIT_ID = new VariantQueryParam("geneTraitId", TEXT_ARRAY, ANNOT_GENE_TRAIT_ID_DESCR); - - @Deprecated - public static final String ANNOT_GENE_TRAIT_NAME_DESCR - = "List of gene trait association names. e.g. Cardiovascular Diseases"; - @Deprecated - public static final VariantQueryParam ANNOT_GENE_TRAIT_NAME - = new VariantQueryParam("geneTraitName", TEXT_ARRAY, ANNOT_GENE_TRAIT_NAME_DESCR); - - public static final String ANNOT_TRAIT_DESCR - = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,..."; public static final VariantQueryParam ANNOT_TRAIT = new VariantQueryParam("trait", TEXT_ARRAY, ANNOT_TRAIT_DESCR); - - public static final String ANNOT_CLINICAL_DESCR - = "Clinical source: clinvar, cosmic"; public static final VariantQueryParam ANNOT_CLINICAL = new VariantQueryParam("clinical", TEXT_ARRAY, ANNOT_CLINICAL_DESCR); - - public static final String ANNOT_CLINICAL_SIGNIFICANCE_DESCR - = "Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic"; public static final VariantQueryParam ANNOT_CLINICAL_SIGNIFICANCE = new VariantQueryParam("clinicalSignificance", TEXT_ARRAY, ANNOT_CLINICAL_SIGNIFICANCE_DESCR); - - public static final String ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR - = "Clinical confirmed status"; public static final VariantQueryParam ANNOT_CLINICAL_CONFIRMED_STATUS = new VariantQueryParam("clinicalConfirmedStatus", BOOLEAN, ANNOT_CLINICAL_CONFIRMED_STATUS_DESCR); - - @Deprecated - public static final String ANNOT_CLINVAR_DESCR - = "List of ClinVar accessions"; - @Deprecated - public static final VariantQueryParam ANNOT_CLINVAR = new VariantQueryParam("clinvar", TEXT_ARRAY, ANNOT_CLINVAR_DESCR); - - @Deprecated - public static final String ANNOT_COSMIC_DESCR - = "List of COSMIC mutation IDs."; - @Deprecated - public static final VariantQueryParam ANNOT_COSMIC = new VariantQueryParam("cosmic", TEXT_ARRAY, ANNOT_COSMIC_DESCR); - - @Deprecated - public static final String ANNOT_HPO_DESCR - = "List of HPO terms. e.g. \"HP:0000545,HP:0002812\""; - @Deprecated - public static final VariantQueryParam ANNOT_HPO = new VariantQueryParam("hpo", TEXT_ARRAY, ANNOT_HPO_DESCR); - - public static final String ANNOT_GO_DESCR - = "List of GO (Gene Ontology) terms. e.g. \"GO:0002020\""; public static final VariantQueryParam ANNOT_GO = new VariantQueryParam("go", TEXT_ARRAY, ANNOT_GO_DESCR); - - public static final String ANNOT_EXPRESSION_DESCR - = "List of tissues of interest. e.g. \"lung\""; public static final VariantQueryParam ANNOT_EXPRESSION = new VariantQueryParam("expression", TEXT_ARRAY, ANNOT_EXPRESSION_DESCR); - - public static final String ANNOT_GENE_ROLE_IN_CANCER_DESCR - = ""; public static final VariantQueryParam ANNOT_GENE_ROLE_IN_CANCER = new VariantQueryParam("geneRoleInCancer", TEXT_ARRAY, ANNOT_GENE_ROLE_IN_CANCER_DESCR); - - public static final String ANNOT_PROTEIN_KEYWORD_DESCR - = "List of Uniprot protein variant annotation keywords"; public static final VariantQueryParam ANNOT_PROTEIN_KEYWORD = new VariantQueryParam("proteinKeyword", TEXT_ARRAY, ANNOT_PROTEIN_KEYWORD_DESCR); - - public static final String ANNOT_DRUG_DESCR - = "List of drug names"; public static final VariantQueryParam ANNOT_DRUG = new VariantQueryParam("drug", TEXT_ARRAY, ANNOT_DRUG_DESCR); - - public static final String ANNOT_FUNCTIONAL_SCORE_DESCR - = "Functional score: {functional_score}[<|>|<=|>=]{number} e.g. cadd_scaled>5.2 , cadd_raw<=0.3"; public static final VariantQueryParam ANNOT_FUNCTIONAL_SCORE = new VariantQueryParam("functionalScore", TEXT_ARRAY, ANNOT_FUNCTIONAL_SCORE_DESCR); - - public static final String CUSTOM_ANNOTATION_DESCR - = "Custom annotation: {key}[<|>|<=|>=]{number} or {key}[~=|=]{text}"; public static final VariantQueryParam CUSTOM_ANNOTATION = new VariantQueryParam("customAnnotation", TEXT_ARRAY, CUSTOM_ANNOTATION_DESCR); - - public static final String UNKNOWN_GENOTYPE_DESCR - = "Returned genotype for unknown genotypes. Common values: [0/0, 0|0, ./.]"; public static final VariantQueryParam UNKNOWN_GENOTYPE = new VariantQueryParam("unknownGenotype", TEXT, UNKNOWN_GENOTYPE_DESCR); + public static final VariantQueryParam RELEASE = new VariantQueryParam("release", INTEGER, RELEASE_DESCR); + public static final VariantQueryParam SOURCE = new VariantQueryParam("source", TEXT, SOURCE_DESCR); - - public static final String RELEASE_DESCR - = ""; - public static final VariantQueryParam RELEASE - = new VariantQueryParam("release", INTEGER, RELEASE_DESCR); + @Deprecated + public static final VariantQueryParam ANNOT_GENE_TRAIT_NAME + = new VariantQueryParam("geneTraitName", TEXT_ARRAY, ANNOT_GENE_TRAIT_NAME_DESCR); + @Deprecated + public static final VariantQueryParam ANNOT_POLYPHEN + = new VariantQueryParam("polyphen", TEXT_ARRAY, ANNOT_POLYPHEN_DESCR); + @Deprecated + public static final VariantQueryParam ANNOT_SIFT + = new VariantQueryParam("sift", TEXT_ARRAY, ANNOT_SIFT_DESCR); + @Deprecated + public static final VariantQueryParam ANNOT_CLINVAR = new VariantQueryParam("clinvar", TEXT_ARRAY, ANNOT_CLINVAR_DESCR); + @Deprecated + public static final VariantQueryParam ANNOT_COSMIC = new VariantQueryParam("cosmic", TEXT_ARRAY, ANNOT_COSMIC_DESCR); + @Deprecated + public static final VariantQueryParam ANNOT_HPO = new VariantQueryParam("hpo", TEXT_ARRAY, ANNOT_HPO_DESCR); private VariantQueryParam(String key, Type type, String description) { this.key = key; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java index 8468ab34317..2ee3b0b59f2 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/ParsedVariantQuery.java @@ -39,6 +39,7 @@ public class ParsedVariantQuery { private List regions; private List> clinicalCombination; private List clinicalCombinationList; + private VariantQuerySource source; // private VariantAnnotationQuery annotationQuery; @@ -71,6 +72,7 @@ public ParsedVariantQuery(ParsedVariantQuery other) { this.regions = new ArrayList<>(other.regions); this.clinicalCombination = new ArrayList<>(other.clinicalCombination); this.clinicalCombinationList = new ArrayList<>(other.clinicalCombinationList); + this.source = other.source; } public Query getInputQuery() { @@ -239,6 +241,15 @@ public ParsedVariantQuery setClinicalCombinationList(List clinicalCombin return this; } + public VariantQuerySource getSource() { + return source; + } + + public ParsedVariantQuery setSource(VariantQuerySource source) { + this.source = source; + return this; + } + public static class VariantStudyQuery { private ParsedQuery studies; private ParsedQuery>> genotypes; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java index 641e365a51d..0807453b5da 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java @@ -169,6 +169,7 @@ public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boo variantQuery.setApproximateCountSamplingSize(options.getInt( VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(), VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.defaultValue())); + variantQuery.setSource(VariantQuerySource.get(inputQuery)); variantQuery.setProjection(projectionParser.parseVariantQueryProjection(inputQuery, options)); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQuerySource.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQuerySource.java new file mode 100644 index 00000000000..91d3d438e3f --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQuerySource.java @@ -0,0 +1,39 @@ +package org.opencb.opencga.storage.core.variant.query; + +import org.opencb.commons.datastore.core.Query; + +import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.SOURCE; + +public enum VariantQuerySource { +// SECONDARY_ANNOTATION_INDEX, + SECONDARY_SAMPLE_INDEX, + VARIANT_INDEX; + + + public boolean isSecondary() { + return this != VARIANT_INDEX; + } + + public static VariantQuerySource get(Query query) { + if (query == null) { + return VARIANT_INDEX; + } + return get(query.getString(SOURCE.key(), null)); + } + + public static VariantQuerySource get(String source) { + if (source == null) { + return VARIANT_INDEX; + } + switch (source.toLowerCase().replace("_", "").replace("-", "")){ + case "variantindex": + return VARIANT_INDEX; + case "secondarysampleindex": + return SECONDARY_SAMPLE_INDEX; +// case "secondaryannotationindex": +// return SECONDARY_ANNOTATION_INDEX; + default: + throw new IllegalArgumentException("Unknown VariantQuerySource " + source); + } + } +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java index 9cf773d001f..e45d7e4dc73 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java @@ -166,7 +166,8 @@ public final class VariantQueryUtils { UNKNOWN_GENOTYPE, SAMPLE_METADATA, SAMPLE_LIMIT, - SAMPLE_SKIP + SAMPLE_SKIP, + SOURCE ))); public static final String SKIP_MISSING_GENES = "skipMissingGenes"; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java index 26d53e89e5c..c75fc3e4afa 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java @@ -2,7 +2,6 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -10,6 +9,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQuerySource; import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.QUERY_DEFAULT_TIMEOUT; import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.QUERY_MAX_TIMEOUT; @@ -63,14 +63,27 @@ public static void setDefaultTimeout(QueryOptions queryOptions, ObjectMap config queryOptions.put(QueryOptions.TIMEOUT, timeout); } + public final boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { + boolean canUseThisExecutor = canUseThisExecutor(variantQuery, variantQuery.getInputOptions()); + if (canUseThisExecutor) { + if (variantQuery.getSource().isSecondary()) { + // Querying for a secondary index source. This executor can only be used if the source is the same + if (getSource() != variantQuery.getSource()) { + canUseThisExecutor = false; + } + } + } + return canUseThisExecutor; + } + /** * Determine if this VariantQueryExecutor can run the given query. - * @param query Query to execute + * @param variantQuery Query to execute * @param options Options for the query * @return True if this variant query executor is valid for the query * @throws StorageEngineException if there is an error */ - public abstract boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException; + protected abstract boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException; protected abstract Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException; @@ -82,6 +95,10 @@ protected String getStorageEngineId() { return storageEngineId; } + protected VariantQuerySource getSource() { + return VariantQuerySource.VARIANT_INDEX; + } + protected ObjectMap getOptions() { return options; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 0c9420f2eda..43028f7220a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -11,6 +11,7 @@ import org.opencb.biodata.models.variant.avro.OriginalCall; import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.biodata.tools.commons.Converter; +import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -27,6 +28,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; +import org.opencb.opencga.storage.core.variant.query.VariantQuerySource; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; @@ -89,9 +91,16 @@ public SampleIndexOnlyVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, Sam } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { + VariantQuery query = variantQuery.getQuery(); + if (variantQuery.getSource() == VariantQuerySource.SECONDARY_SAMPLE_INDEX) { + if (SampleIndexQueryParser.validSampleIndexQuery(query) && isQueryCovered(query)) { + return true; + } else { + throw new VariantQueryException("Unable to apply given filter using only the secondary sample index."); + } + } if (SampleIndexQueryParser.validSampleIndexQuery(query)) { - if (isFullyCoveredQuery(query, options)) { return true; } @@ -113,6 +122,10 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); logger.info("HBase SampleIndex, skip variants table"); + if (variantQuery.getSource() == VariantQuerySource.SECONDARY_SAMPLE_INDEX) { + variantQuery.getEvents().add(new Event(Event.Type.INFO, "Using only the secondary sample index. Skip main variants index." + + " Results might be partial.")); + } boolean count; Future asyncCountFuture; @@ -148,6 +161,11 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator } } + @Override + protected VariantQuerySource getSource() { + return VariantQuerySource.SECONDARY_SAMPLE_INDEX; + } + private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery, ParsedVariantQuery parsedQuery) { QueryOptions options = parsedQuery.getInputOptions(); VariantDBIterator variantIterator; @@ -163,7 +181,8 @@ private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery } catch (IOException e) { throw VariantQueryException.internalException(e).setQuery(parsedQuery.getInputQuery()); } - boolean includeAll = parsedQuery.getInputQuery().getBoolean("includeAllFromSampleIndex", false); + boolean includeAll = parsedQuery.getSource() == VariantQuerySource.SECONDARY_SAMPLE_INDEX + || parsedQuery.getInputQuery().getBoolean("includeAllFromSampleIndex", false); SampleVariantIndexEntryToVariantConverter converter = new SampleVariantIndexEntryToVariantConverter( parsedQuery, sampleIndexQuery, dbAdaptor.getMetadataManager(), includeAll); variantIterator = VariantDBIterator.wrapper(Iterators.transform(rawIterator, converter::convert)); @@ -182,13 +201,21 @@ protected boolean shouldGetCount(QueryOptions options, boolean iterator) { private boolean isFullyCoveredQuery(Query inputQuery, QueryOptions options) { Query query = new Query(inputQuery); -// ParsedVariantQuery parsedVariantQuery = variantQueryProjectionParser.parseQuery(query, options, true); + SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); + return isQueryCovered(sampleIndexQuery) + && isIncludeCovered(sampleIndexQuery, inputQuery, options); + } + + private boolean isQueryCovered(Query inputQuery) { + Query query = new Query(inputQuery); + SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(query); - return isQueryCovered(query) && isIncludeCovered(sampleIndexQuery, inputQuery, options); + return isQueryCovered(sampleIndexQuery); } - private boolean isQueryCovered(Query query) { + private boolean isQueryCovered(SampleIndexQuery sampleIndexQuery) { + Query query = sampleIndexQuery.getUncoveredQuery(); if (VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_MENDELIAN_ERROR) || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO) || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO_STRICT)) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index c7660e2475a..f00210c8b95 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -26,7 +26,6 @@ import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; -import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.core.testclassification.duration.LongTests; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; @@ -38,9 +37,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; -import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; -import org.opencb.opencga.storage.core.variant.query.Values; -import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; +import org.opencb.opencga.storage.core.variant.query.*; import org.opencb.opencga.storage.core.variant.query.executors.VariantQueryExecutor; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageEngine; @@ -1202,6 +1199,30 @@ public void testSampleIndexOnlyVariantQueryExecutor() { } return v; }); + + testSampleIndexOnlyVariantQueryExecutor( + new VariantQuery() + .study(STUDY_NAME_6) + .sample("NA19600") + .includeGenotype(true) + .source(VariantQuerySource.SECONDARY_SAMPLE_INDEX), + new QueryOptions(), + SampleIndexOnlyVariantQueryExecutor.class, + av -> { + assertEquals(av.toString(), 1, av.getStudies().get(0).getFiles().size()); + for (FileEntry fe : av.getStudies().get(0).getFiles()) { + assertNotNull(fe.getData().get(StudyEntry.FILTER)); + fe.setData(Collections.emptyMap()); + } + return av; + }, + ev -> { + for (FileEntry fe : ev.getStudies().get(0).getFiles()) { + fe.setData(Collections.emptyMap()); + } + ev.setAnnotation(null); + return ev; + }); } private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOptions options, Class expected) { @@ -1209,7 +1230,13 @@ private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOp } private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOptions options, Class expected, - Function mapper) { + Function expectedVariantMapper) { + testSampleIndexOnlyVariantQueryExecutor(query, options, expected, expectedVariantMapper, v -> v); + } + + private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOptions options, Class expected, + Function actualVariantMapper, + Function expectedVariantMapper) { ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); VariantQueryExecutor variantQueryExecutor = variantStorageEngine.getVariantQueryExecutor(variantQuery); @@ -1243,7 +1270,8 @@ private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOp for (int i = 0; i < actualVariants.size(); i++) { Variant av = actualVariants.get(i); Variant ev = expectedVariants.get(i); - mapper.apply(av); + actualVariantMapper.apply(av); + expectedVariantMapper.apply(ev); if (!ev.getStudies().isEmpty()) { if (av.getLengthAlternate() == 0 || av.getLengthReference() == 0) { // System.out.println("-------" + av + "----------"); @@ -1259,8 +1287,8 @@ private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOp assertEquals(ev, av); } catch (AssertionError error) { System.out.println("-------" + av + "----------"); - System.out.println("DBAdaptor " + ev.toJson()); - System.out.println("Actual " + av.toJson()); + System.out.println("Expected (DBAdaptor) " + ev.toJson()); + System.out.println("Actual " + av.toJson()); throw error; } } From 7fac8bd1455ea3e4ab6bb55298cabf7530f5cff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 10 Sep 2024 08:52:32 +0100 Subject: [PATCH 08/19] storage: Fix NPE building sample index. #TASK-6765 --- .../hadoop/variant/index/sample/SampleIndexDriver.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java index ce25d55819b..7e053fc9332 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java @@ -41,6 +41,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.*; import java.util.stream.Collectors; @@ -510,7 +511,13 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t if (filePosition >= 0) { schema.getFileIndex().getFilePositionIndex().write(filePosition, sampleFileIndex); } - builder.add(gt, new SampleVariantIndexEntry(variant, sampleFileIndex, indexEntry.getFileData().get(0))); + ByteBuffer fileData; + if (indexEntry.getFileData().isEmpty()) { + fileData = null; + } else { + fileData = indexEntry.getFileData().get(0); + } + builder.add(gt, new SampleVariantIndexEntry(variant, sampleFileIndex, fileData)); countSampleGt(context, sampleId, gt); } } From 40378838f7b9c2e5c7cc2f382d714be87c4a2c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 11 Sep 2024 15:05:53 +0100 Subject: [PATCH 09/19] storage: Ensure SampleIndexConfiguration always exists. Add migration. #TASK-6765 --- .../manager/VariantStorageManager.java | 7 ++- .../AnalysisVariantCommandExecutor.java | 1 + .../AnalysisClinicalCommandOptions.java | 2 +- .../AnalysisVariantCommandOptions.java | 5 +- .../app/migrations/StorageMigrationTool.java | 2 +- .../storage/AddAllelesColumnToPhoenix.java | 2 +- ...llegalConcurrentFileLoadingsMigration.java | 2 +- ...mpleIndexConfigurationIsAlwaysDefined.java | 53 +++++++++++++++++++ .../catalog/managers/AuditManager.java | 13 +++++ .../catalog/managers/StudyManager.java | 27 +++++++--- .../src/main/R/R/Clinical-methods.R | 2 +- opencga-client/src/main/R/R/Variant-methods.R | 2 +- .../rest/clients/ClinicalAnalysisClient.java | 9 ++-- .../client/rest/clients/VariantClient.java | 9 ++-- .../src/main/javascript/ClinicalAnalysis.js | 6 ++- opencga-client/src/main/javascript/Variant.js | 6 ++- .../rest_clients/clinical_analysis_client.py | 13 +++-- .../pyopencga/rest_clients/variant_client.py | 13 +++-- .../core/cellbase/CellBaseValidator.java | 10 ++++ .../storage/SampleIndexConfiguration.java | 10 +++- .../core/models/study/StudyInternal.java | 5 +- .../configuration/StudyConfiguration.java | 5 +- .../models/variant/VariantQueryParams.java | 4 +- .../opencga/server/rest/MetaWSServer.java | 8 ++- .../VariantStorageMetadataManager.java | 43 ++++++++++----- .../core/metadata/models/StudyMetadata.java | 12 +++-- .../core/variant/VariantStorageEngine.java | 16 ++++++ .../core/variant/VariantStoragePipeline.java | 18 ++----- .../storage/hadoop/app/SampleIndexMain.java | 26 ++++++--- .../variant/HadoopVariantStorageEngine.java | 1 + .../SampleIndexOnlyVariantQueryExecutor.java | 4 +- .../index/sample/SampleIndexDBAdaptor.java | 6 ++- .../index/sample/SampleIndexSchema.java | 2 +- 33 files changed, 257 insertions(+), 87 deletions(-) create mode 100644 opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index 6fb44855fb7..9823ca22ffd 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -541,9 +541,12 @@ public OpenCGAResult configureSampleIndex(String studyStr, SampleIndexConfi boolean skipRebuild, String token) throws CatalogException, StorageEngineException { return secureOperation("configure", studyStr, new ObjectMap(), token, engine -> { - String version = engine.getCellBaseUtils().getCellBaseClient().getClientConfiguration().getVersion(); - sampleIndexConfiguration.validate(version); + String cellbaseVersion = engine.getCellBaseUtils().getVersionFromServer(); + sampleIndexConfiguration.validate(cellbaseVersion); String studyFqn = getStudyFqn(studyStr, token); + if (!engine.getMetadataManager().studyExists(studyFqn)) { + engine.getMetadataManager().createStudy(studyFqn, cellbaseVersion); + } engine.getMetadataManager().addSampleIndexConfiguration(studyFqn, sampleIndexConfiguration, true); catalogManager.getStudyManager() diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java index 5b2e21cee46..9e57f8c8e3c 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/AnalysisVariantCommandExecutor.java @@ -551,6 +551,7 @@ private RestResponse runExport() throws Exception { putNestedIfNotEmpty(beanParams, "proteinKeyword",commandOptions.proteinKeyword, true); putNestedIfNotEmpty(beanParams, "drug",commandOptions.drug, true); putNestedIfNotEmpty(beanParams, "customAnnotation",commandOptions.customAnnotation, true); + putNestedIfNotEmpty(beanParams, "source",commandOptions.source, true); putNestedIfNotEmpty(beanParams, "unknownGenotype",commandOptions.unknownGenotype, true); putNestedIfNotNull(beanParams, "sampleMetadata",commandOptions.sampleMetadata, true); putNestedIfNotNull(beanParams, "sort",commandOptions.sort, true); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java index 90870dfc8d6..33f2c3e414d 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisClinicalCommandOptions.java @@ -1937,7 +1937,7 @@ public class QueryVariantCommandOptions { @Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean panelIntersection = false; - @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.", required = false, arity = 1) + @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.", required = false, arity = 1) public String source; @Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java index 7d97404fd6c..7d2911599c1 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/AnalysisVariantCommandOptions.java @@ -709,6 +709,9 @@ public class RunExportCommandOptions { @Parameter(names = {"--custom-annotation"}, description = "Custom annotation: {key}[<|>|<=|>=]{number} or {key}[~=|=]{text}", required = false, arity = 1) public String customAnnotation; + @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.", required = false, arity = 1) + public String source; + @Parameter(names = {"--unknown-genotype"}, description = "Returned genotype for unknown genotypes. Common values: [0/0, 0|0, ./.]", required = false, arity = 1) public String unknownGenotype; @@ -1956,7 +1959,7 @@ public class QueryCommandOptions { @Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0) public boolean panelIntersection = false; - @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.", required = false, arity = 1) + @Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.", required = false, arity = 1) public String source; @Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/StorageMigrationTool.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/StorageMigrationTool.java index c9aeee6c7e2..f07666d7eb8 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/StorageMigrationTool.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/StorageMigrationTool.java @@ -46,7 +46,7 @@ protected final VariantStorageEngine getVariantStorageEngineByProject(String pro * @return List of projects * @throws Exception on error */ - protected final List getVariantStorageProjects(String organizationId) throws Exception { + protected final List getVariantStorageProjects() throws Exception { Set projects = new LinkedHashSet<>(); for (String studyFqn : getVariantStorageStudies(organizationId)) { diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/AddAllelesColumnToPhoenix.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/AddAllelesColumnToPhoenix.java index 4efa260d965..c71f0483427 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/AddAllelesColumnToPhoenix.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/AddAllelesColumnToPhoenix.java @@ -11,7 +11,7 @@ public class AddAllelesColumnToPhoenix extends StorageMigrationTool { @Override protected void run() throws Exception { - for (String project : getVariantStorageProjects(organizationId)) { + for (String project : getVariantStorageProjects()) { VariantStorageEngine engine = getVariantStorageEngineByProject(project); if (engine.getStorageEngineId().equals("hadoop")) { logger.info("Adding missing columns (if any) for project " + project); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java index 132e1357f1b..1b916a8316a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v2_12_5/storage/DetectIllegalConcurrentFileLoadingsMigration.java @@ -42,7 +42,7 @@ public class DetectIllegalConcurrentFileLoadingsMigration extends StorageMigrati @Override protected void run() throws Exception { - for (String project : getVariantStorageProjects(organizationId)) { + for (String project : getVariantStorageProjects()) { VariantStorageEngine engine = getVariantStorageEngineByProject(project); if (!engine.getStorageEngineId().equals("hadoop")) { continue; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java new file mode 100644 index 00000000000..dceb209fd9a --- /dev/null +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java @@ -0,0 +1,53 @@ +package org.opencb.opencga.app.migrations.v3.v3_4_0.storage; + +import org.opencb.opencga.app.migrations.StorageMigrationTool; +import org.opencb.opencga.catalog.migration.Migration; +import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; +import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.variant.VariantStorageEngine; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +@Migration(id = "ensure_sample_index_configuration_is_defined", + description = "Ensure that the SampleIndexConfiguration object is correctly defined. #TASK-6765", version = "3.4.0", + language = Migration.MigrationLanguage.JAVA, + domain = Migration.MigrationDomain.STORAGE, + patch = 1, + date = 20240910) +public class EnsureSampleIndexConfigurationIsAlwaysDefined extends StorageMigrationTool { + + @Override + protected void run() throws Exception { + + for (String variantStorageProject : getVariantStorageProjects()) { + VariantStorageEngine engine = getVariantStorageEngineByProject(variantStorageProject); + if (engine.getMetadataManager().exists()) { + for (Integer studyId : engine.getMetadataManager().getStudyIds()) { + StudyMetadata studyMetadata = engine.getMetadataManager().getStudyMetadata(studyId); + List configurations = studyMetadata.getSampleIndexConfigurations(); + if (configurations == null || configurations.isEmpty()) { + configurations = new ArrayList<>(1); + logger.info("Creating default SampleIndexConfiguration for study '" + studyMetadata.getName() + "' (" + studyId + ")"); + configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned( + preFileDataConfiguration(), + StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION, + Date.from(Instant.now()), StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE)); + studyMetadata.setSampleIndexConfigurations(configurations); + } + } + } + } + } + + + public static SampleIndexConfiguration preFileDataConfiguration() { + // If missing, it was assuming cellbase v5 + SampleIndexConfiguration sampleIndexConfiguration = SampleIndexConfiguration.defaultConfiguration(false); + sampleIndexConfiguration.getFileDataConfiguration().setIncludeOriginalCall(false); + sampleIndexConfiguration.getFileDataConfiguration().setIncludeSecondaryAlternates(false); + return sampleIndexConfiguration; + } +} diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java index 6c25baa36ff..c16b58ca166 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/AuditManager.java @@ -20,6 +20,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.datastore.core.result.Error; import org.opencb.opencga.catalog.auth.authorization.AuthorizationManager; import org.opencb.opencga.catalog.db.DBAdaptorFactory; import org.opencb.opencga.catalog.db.api.AuditDBAdaptor; @@ -183,6 +184,18 @@ public void audit(String organizationId, String userId, Enums.Action action, Enu audit(organizationId, userId, action, resource, resourceId, resourceUuid, studyId, studyUuid, params, status, new ObjectMap()); } + public void auditError(String organizationId, String userId, Enums.Action action, Enums.Resource resource, String resourceId, + String resourceUuid, String studyId, String studyUuid, ObjectMap params, Exception e) { + Error error; + if (e instanceof CatalogException) { + error = ((CatalogException) e).getError(); + } else { + error = new Error(0, "", e.getMessage()); + } + AuditRecord.Status status = new AuditRecord.Status(AuditRecord.Status.Result.ERROR, error); + audit(organizationId, userId, action, resource, resourceId, resourceUuid, studyId, studyUuid, params, status, new ObjectMap()); + } + public void audit(String organizationId, String userId, Enums.Action action, Enums.Resource resource, String resourceId, String resourceUuid, String studyId, String studyUuid, ObjectMap params, AuditRecord.Status status, ObjectMap attributes) { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java index 848cb8e5e2d..421b4ba5a8f 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/StudyManager.java @@ -42,6 +42,7 @@ import org.opencb.opencga.catalog.utils.ParamUtils; import org.opencb.opencga.catalog.utils.UuidUtils; import org.opencb.opencga.core.api.ParamConstants; +import org.opencb.opencga.core.cellbase.CellBaseValidator; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.config.Configuration; @@ -406,7 +407,6 @@ public OpenCGAResult create(String projectStr, Study study, QueryOptions String organizationId = catalogFqn.getOrganizationId(); String userId = tokenPayload.getUserId(organizationId); Project project = catalogManager.getProjectManager().resolveId(catalogFqn, null, tokenPayload).first(); - ObjectMap auditParams = new ObjectMap() .append("projectId", projectStr) .append("study", study) @@ -417,7 +417,16 @@ public OpenCGAResult create(String projectStr, Study study, QueryOptions options = ParamUtils.defaultObject(options, QueryOptions::new); authorizationManager.checkIsAtLeastOrganizationOwnerOrAdmin(organizationId, userId); - + String cellbaseVersion; + if (project.getCellbase() == null || StringUtils.isEmpty(project.getCellbase().getUrl())) { + CellBaseValidator cellBaseValidator = new CellBaseValidator( + project.getCellbase(), + project.getOrganism().getScientificName(), + project.getOrganism().getAssembly()); + cellbaseVersion = cellBaseValidator.getVersionFromServer(); + } else { + cellbaseVersion = null; + } long projectUid = project.getUid(); // Initialise fields @@ -427,7 +436,7 @@ public OpenCGAResult create(String projectStr, Study study, QueryOptions study.setType(ParamUtils.defaultObject(study.getType(), StudyType::init)); study.setSources(ParamUtils.defaultObject(study.getSources(), Collections::emptyList)); study.setDescription(ParamUtils.defaultString(study.getDescription(), "")); - study.setInternal(StudyInternal.init()); + study.setInternal(StudyInternal.init(cellbaseVersion)); study.setStatus(ParamUtils.defaultObject(study.getStatus(), Status::new)); study.setCreationDate(ParamUtils.checkDateOrGetCurrentDate(study.getCreationDate(), StudyDBAdaptor.QueryParams.CREATION_DATE.key())); @@ -496,10 +505,14 @@ public OpenCGAResult create(String projectStr, Study study, QueryOptions result.setResults(Arrays.asList(study)); } return result; - } catch (CatalogException e) { - auditManager.auditCreate(organizationId, userId, Enums.Resource.STUDY, study.getId(), "", study.getId(), "", auditParams, - new AuditRecord.Status(AuditRecord.Status.Result.ERROR, e.getError())); - throw e; + } catch (Exception e) { + auditManager.auditError(organizationId, userId, Enums.Action.CREATE, Enums.Resource.STUDY, study.getId(), + "", study.getId(), "", auditParams, e); + if (e instanceof CatalogException) { + throw (CatalogException) e; + } else { + throw new CatalogException("Error creating study '" + study.getId() + "'", e); + } } } diff --git a/opencga-client/src/main/R/R/Clinical-methods.R b/opencga-client/src/main/R/R/Clinical-methods.R index 7860e7ee0f5..0c21fe15568 100644 --- a/opencga-client/src/main/R/R/Clinical-methods.R +++ b/opencga-client/src/main/R/R/Clinical-methods.R @@ -626,7 +626,7 @@ setMethod("clinicalClient", "OpencgaR", function(OpencgaR, annotationSet, clinic #' @param panelRoleInCancer Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]. #' @param panelFeatureType Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. #' @param panelIntersection Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel. - #' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. + #' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial. #' @param trait List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... queryVariant=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="clinical/variant", subcategoryId=NULL, action="query", params=params, httpMethod="GET", diff --git a/opencga-client/src/main/R/R/Variant-methods.R b/opencga-client/src/main/R/R/Variant-methods.R index 0686020512d..58ff6b3b6fb 100644 --- a/opencga-client/src/main/R/R/Variant-methods.R +++ b/opencga-client/src/main/R/R/Variant-methods.R @@ -538,7 +538,7 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N #' @param panelRoleInCancer Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]. #' @param panelFeatureType Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. #' @param panelIntersection Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel. - #' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. + #' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial. #' @param trait List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... query=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="variant", subcategoryId=NULL, action="query", params=params, httpMethod="GET", as.queryParam=NULL, ...), diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java index 501c2c52407..fb2455f8114 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/ClinicalAnalysisClient.java @@ -822,10 +822,11 @@ public RestResponse search(ObjectMap params) throws ClientExce * panelFeatureType: Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. * panelIntersection: Intersect panel genes and regions with given genes and regions from que input query. This will prevent * returning variants from regions out of the panel. - * source: Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), - * 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved - * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing - * or be partial. + * source: Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and + * 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary + * index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the + * secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, + * some data might be missing or be partial. * trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @return a RestResponse object. * @throws ClientException ClientException if there is any server error. diff --git a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java index b78d73dda9b..572907688d6 100644 --- a/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java +++ b/opencga-client/src/main/java/org/opencb/opencga/client/rest/clients/VariantClient.java @@ -805,10 +805,11 @@ public RestResponse runPlink(PlinkWrapperParams data, ObjectMap params) thr * panelFeatureType: Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ]. * panelIntersection: Intersect panel genes and regions with given genes and regions from que input query. This will prevent * returning variants from regions out of the panel. - * source: Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), - * 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved - * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing - * or be partial. + * source: Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and + * 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary + * index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the + * secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, + * some data might be missing or be partial. * trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @return a RestResponse object. * @throws ClientException ClientException if there is any server error. diff --git a/opencga-client/src/main/javascript/ClinicalAnalysis.js b/opencga-client/src/main/javascript/ClinicalAnalysis.js index d1b7228f7ed..125efe0e84b 100644 --- a/opencga-client/src/main/javascript/ClinicalAnalysis.js +++ b/opencga-client/src/main/javascript/ClinicalAnalysis.js @@ -720,8 +720,10 @@ export default class ClinicalAnalysis extends OpenCGAParentClass { * @param {Boolean} [params.panelIntersection] - Intersect panel genes and regions with given genes and regions from que input query. * This will prevent returning variants from regions out of the panel. * @param {String} [params.source] - Select the variant data source from where to fetch the data. Accepted values are 'variant_index' - * (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved - * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. + * (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary + * index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, + * an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or + * be partial. * @param {String} [params.trait] - List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @returns {Promise} Promise object in the form of RestResponse instance. */ diff --git a/opencga-client/src/main/javascript/Variant.js b/opencga-client/src/main/javascript/Variant.js index ae291e7a9e5..b6664ce7c8f 100644 --- a/opencga-client/src/main/javascript/Variant.js +++ b/opencga-client/src/main/javascript/Variant.js @@ -697,8 +697,10 @@ export default class Variant extends OpenCGAParentClass { * @param {Boolean} [params.panelIntersection] - Intersect panel genes and regions with given genes and regions from que input query. * This will prevent returning variants from regions out of the panel. * @param {String} [params.source] - Select the variant data source from where to fetch the data. Accepted values are 'variant_index' - * (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved - * exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial. + * (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary + * index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, + * an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or + * be partial. * @param {String} [params.trait] - List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... * @returns {Promise} Promise object in the form of RestResponse instance. */ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py index 976de7948d5..ef5aa308611 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/clinical_analysis_client.py @@ -919,11 +919,14 @@ def query_variant(self, **options): given genes and regions from que input query. This will prevent returning variants from regions out of the panel. :param str source: Select the variant data source from where to fetch - the data. Accepted values are 'variant_index' (default), - 'secondary_annotation_index' and 'secondary_sample_index'. When - selecting a secondary_index, the data will be retrieved exclusively - from that secondary index, and the 'include/exclude' parameters - will be ignored. Some data might be missing or be partial. + the data. Accepted values are 'variant_index' (default) and + 'secondary_sample_index'. When selecting a secondary_index, the + data will be retrieved exclusively from that secondary index, and + the 'include/exclude' parameters will be ignored. If the given + query can not be fully resolved using the secondary index, an + exception will be raised. As the returned variants will only + contain data from the secondary_index, some data might be missing + or be partial. :param str trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py index 24aa650aa41..fa2087eb567 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py @@ -950,11 +950,14 @@ def query(self, **options): given genes and regions from que input query. This will prevent returning variants from regions out of the panel. :param str source: Select the variant data source from where to fetch - the data. Accepted values are 'variant_index' (default), - 'secondary_annotation_index' and 'secondary_sample_index'. When - selecting a secondary_index, the data will be retrieved exclusively - from that secondary index, and the 'include/exclude' parameters - will be ignored. Some data might be missing or be partial. + the data. Accepted values are 'variant_index' (default) and + 'secondary_sample_index'. When selecting a secondary_index, the + data will be retrieved exclusively from that secondary index, and + the 'include/exclude' parameters will be ignored. If the given + query can not be fully resolved using the secondary index, an + exception will be raised. As the returned variants will only + contain data from the secondary_index, some data might be missing + or be partial. :param str trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... """ diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java b/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java index 4dd5543e9b8..f5ff4e965d4 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/cellbase/CellBaseValidator.java @@ -280,6 +280,11 @@ public static boolean supportsApiKey(String serverVersion) { return VersionUtils.isMinVersion("5.4.0", serverVersion); } + /** + * Get the major version of a version string. Does not include the starting "v". + * @return Major version + * @throws IOException if unable to get version from server + */ public String getVersionFromServerMajor() throws IOException { return major(getVersionFromServer()); } @@ -290,6 +295,11 @@ public String getVersionFromServerMajorMinor() throws IOException { return serverVersion; } + /** + * Get the major version of a version string. + * @param version Version string in the form "major.minor.patch" + * @return Major version + */ private static String major(String version) { // return String.valueOf(new VersionUtils.Version(version).getMajor()); return version.split("\\.")[0]; diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java index cb89309369c..6003423b0fc 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java @@ -4,6 +4,7 @@ import org.apache.commons.collections4.CollectionUtils; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.avro.ClinicalSignificance; +import org.opencb.commons.utils.VersionUtils; import org.opencb.opencga.core.api.ParamConstants; import java.beans.ConstructorProperties; @@ -21,10 +22,17 @@ public class SampleIndexConfiguration { private final FileDataConfiguration fileDataConfiguration = new FileDataConfiguration(); private final AnnotationIndexConfiguration annotationIndexConfiguration = new AnnotationIndexConfiguration(); + + // Test only + @Deprecated public static SampleIndexConfiguration defaultConfiguration() { return defaultConfiguration(false); } + public static SampleIndexConfiguration defaultConfiguration(String cellbaseVersion) { + return defaultConfiguration(new VersionUtils.Version(cellbaseVersion).getMajor() == 4); + } + public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) { SampleIndexConfiguration sampleIndexConfiguration = new SampleIndexConfiguration() .addPopulation(new Population(cellbaseV4 @@ -181,7 +189,7 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) } public void validate(String cellbaseVersion) { - addMissingValues(defaultConfiguration("v4".equalsIgnoreCase(cellbaseVersion))); + addMissingValues(defaultConfiguration(cellbaseVersion)); for (IndexFieldConfiguration customField : fileIndexConfiguration.getCustomFields()) { customField.validate(); diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java index d87b6cb7f6a..c3c840f383e 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/StudyInternal.java @@ -49,8 +49,9 @@ public StudyInternal(InternalStatus status, String registrationDate, String modi this.configuration = configuration; } - public static StudyInternal init() { - return new StudyInternal(new InternalStatus(), TimeUtils.getTime(), TimeUtils.getTime(), StudyIndex.init(), StudyConfiguration.init()); + public static StudyInternal init(String cellbaseVersion) { + return new StudyInternal(new InternalStatus(), TimeUtils.getTime(), TimeUtils.getTime(), StudyIndex.init(), + StudyConfiguration.init(cellbaseVersion)); } @Override diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/configuration/StudyConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/configuration/StudyConfiguration.java index 9f01df828c7..4fd3db9326f 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/study/configuration/StudyConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/study/configuration/StudyConfiguration.java @@ -17,9 +17,10 @@ public StudyConfiguration(ClinicalAnalysisStudyConfiguration clinical, StudyVari this.variantEngine = variantEngine; } - public static StudyConfiguration init() { + public static StudyConfiguration init(String cellbaseVersion) { return new StudyConfiguration(ClinicalAnalysisStudyConfiguration.defaultConfiguration(), - new StudyVariantEngineConfiguration(new ObjectMap(), SampleIndexConfiguration.defaultConfiguration())); + new StudyVariantEngineConfiguration(new ObjectMap(), + cellbaseVersion == null ? null : SampleIndexConfiguration.defaultConfiguration(cellbaseVersion))); } @Override diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java index 3a018796c21..32f889f9515 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantQueryParams.java @@ -202,7 +202,7 @@ public class VariantQueryParams extends BasicVariantQueryParams { public static final String RELEASE_DESCR = ""; public static final String SOURCE_DESCR = "Select the variant data source from where to fetch the data." - + " Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. " + + " Accepted values are 'variant_index' (default) and 'secondary_sample_index'. " + "When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, " + "and the 'include/exclude' parameters will be ignored. " + "If the given query can not be fully resolved using the secondary index, an exception will be raised. " @@ -343,7 +343,7 @@ public class VariantQueryParams extends BasicVariantQueryParams { private String drug; @DataField(description = CUSTOM_ANNOTATION_DESCR) private String customAnnotation; - @DataField(since = "3.2.1", description = SOURCE_DESCR) + @DataField(since = "3.4.0", description = SOURCE_DESCR) private String source; @DataField(description = UNKNOWN_GENOTYPE_DESCR) diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java index 6569dd3effe..527f213f5f9 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java @@ -144,7 +144,13 @@ public Response api(@ApiParam(value = "List of categories to get API from") @Que // Check if some categories have been selected if (StringUtils.isNotEmpty(categoryStr)) { for (String category : categoryStr.split(",")) { - classes.add(classMap.get(category)); + Class clazz = classMap.get(category); + if (clazz != null) { + classes.add(clazz); + } else { + return createErrorResponse("meta/api", + "Category '" + category + "' not found. Available categories: " + String.join(",", classMap.keySet())); + } } } else { // Get API for all categories diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 680d30a20db..c91a63a8e36 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -213,13 +213,19 @@ public Lock lockStudy(int studyId, long lockDuration, long timeout, String lockN return studyDBAdaptor.lock(studyId, lockDuration, timeout, lockName); } + // Test purposes only + @Deprecated public StudyMetadata createStudy(String studyName) throws StorageEngineException { + return createStudy(studyName, "v5"); + } + + public StudyMetadata createStudy(String studyName, String cellbaseVersion) throws StorageEngineException { updateProjectMetadata(projectMetadata -> { if (!getStudies().containsKey(studyName)) { StudyMetadata studyMetadata = new StudyMetadata(newStudyId(), studyName); + initSampleIndexConfigurationIfNeeded(studyMetadata, cellbaseVersion); unsecureUpdateStudyMetadata(studyMetadata); } - return projectMetadata; }); return getStudyMetadata(studyName); } @@ -229,7 +235,7 @@ public StudyMetadata.SampleIndexConfigurationVersioned addSampleIndexConfigurati Integer idOrNull = getStudyIdOrNull(study); int studyId; if (idOrNull == null) { - studyId = createStudy(study).getId(); + studyId = createStudy(study, null).getId(); } else { studyId = idOrNull; } @@ -237,17 +243,14 @@ public StudyMetadata.SampleIndexConfigurationVersioned addSampleIndexConfigurati ? StudyMetadata.SampleIndexConfigurationVersioned.Status.STAGING : StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE; return updateStudyMetadata(studyId, studyMetadata -> { - List configurations = studyMetadata.getSampleIndexConfigurations(); - if (configurations == null || configurations.isEmpty()) { - configurations = new ArrayList<>(2); - configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned( - SampleIndexConfiguration.defaultConfiguration(), - StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION, - Date.from(Instant.now()), StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE)); - studyMetadata.setSampleIndexConfigurations(configurations); + int version; + if (CollectionUtils.isEmpty(studyMetadata.getSampleIndexConfigurations())) { + studyMetadata.setSampleIndexConfigurations(new ArrayList<>(1)); + version = StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION + 1; + } else { + version = studyMetadata.getSampleIndexConfigurationLatest().getVersion() + 1; } - int version = studyMetadata.getSampleIndexConfigurationLatest().getVersion() + 1; - configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned( + studyMetadata.getSampleIndexConfigurations().add(new StudyMetadata.SampleIndexConfigurationVersioned( configuration, version, Date.from(Instant.now()), @@ -255,6 +258,22 @@ public StudyMetadata.SampleIndexConfigurationVersioned addSampleIndexConfigurati }).getSampleIndexConfigurationLatest(); } + private static void initSampleIndexConfigurationIfNeeded(StudyMetadata studyMetadata, String cellbaseVersion) { + List configurations = studyMetadata.getSampleIndexConfigurations(); + if (cellbaseVersion == null) { + logger.info("CellBase version not provided. Skipping SampleIndexConfiguration initialization"); + return; + } + if (configurations == null || configurations.isEmpty()) { + configurations = new ArrayList<>(2); + configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned( + SampleIndexConfiguration.defaultConfiguration(cellbaseVersion), + StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION, + Date.from(Instant.now()), StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE)); + studyMetadata.setSampleIndexConfigurations(configurations); + } + } + public boolean studyExists(String studyName) { return exists() && getStudyIdOrNull(studyName) != null; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/StudyMetadata.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/StudyMetadata.java index 8968fb5f10a..f067856ad64 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/StudyMetadata.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/StudyMetadata.java @@ -12,7 +12,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.time.Instant; import java.util.*; import java.util.stream.Collectors; @@ -141,8 +140,10 @@ public SampleIndexConfigurationVersioned getSampleIndexConfigurationLatest() { public SampleIndexConfigurationVersioned getSampleIndexConfigurationLatest(boolean includeStagingSchemas) { if (sampleIndexConfigurations == null || sampleIndexConfigurations.isEmpty()) { - return new SampleIndexConfigurationVersioned(SampleIndexConfiguration.defaultConfiguration(), - DEFAULT_SAMPLE_INDEX_VERSION, Date.from(Instant.now()), SampleIndexConfigurationVersioned.Status.ACTIVE); + // This should never happen. + // Might need to run EnsureSampleIndexConfigurationIsAlwaysDefined migration + logger.warn("No SampleIndexConfiguration found on study '" + getName() + "'. This should not happen."); + return null; } else { SampleIndexConfigurationVersioned conf = null; for (SampleIndexConfigurationVersioned thisConf : sampleIndexConfigurations) { @@ -166,8 +167,9 @@ public List getSampleIndexConfigurations() { public SampleIndexConfigurationVersioned getSampleIndexConfiguration(int version) { if (sampleIndexConfigurations == null || sampleIndexConfigurations.isEmpty()) { if (version == DEFAULT_SAMPLE_INDEX_VERSION) { - return new SampleIndexConfigurationVersioned(SampleIndexConfiguration.defaultConfiguration(), - DEFAULT_SAMPLE_INDEX_VERSION, Date.from(Instant.now()), SampleIndexConfigurationVersioned.Status.ACTIVE); + logger.warn("No SampleIndexConfiguration found in study '" + getName() + "' for default version " + version + ". " + + "This should not happen."); + return null; } else { return null; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index e9081225b82..676d1faae47 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -304,6 +304,7 @@ protected VariantExporter newVariantExporter(VariantMetadataFactory metadataFact @Override public List index(List inputFiles, URI outdirUri, boolean doExtract, boolean doTransform, boolean doLoad) throws StorageEngineException { + createStudyIfNeeded(); List results = super.index(inputFiles, outdirUri, doExtract, doTransform, doLoad); if (doLoad) { annotateLoadedFiles(outdirUri, inputFiles, results, getOptions()); @@ -1462,6 +1463,21 @@ protected List initVariantAggregationExecutors() { return executors; } + protected void createStudyIfNeeded() throws StorageEngineException { + String studyName = getOptions().getString(VariantStorageOptions.STUDY.key(), VariantStorageOptions.STUDY.defaultValue()); + StudyMetadata studyMetadata = getMetadataManager().getStudyMetadata(studyName); + if (studyMetadata == null) { + logger.info("Creating a new StudyMetadata '{}'", studyName); + String cellbaseVersion; + try { + cellbaseVersion = getCellBaseUtils().getVersionFromServer(); + } catch (IOException e) { + throw new StorageEngineException("Unable to get CellBase version", e); + } + getMetadataManager().createStudy(studyName, cellbaseVersion); + } + } + /** * Determine which {@link VariantQueryExecutor} should be used to execute the given query. * diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java index 5b37d2512b1..618693421ae 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStoragePipeline.java @@ -164,7 +164,7 @@ public URI preTransform(URI input) throws StorageEngineException, IOException, F privateStudyId = -1; } else { VariantStorageMetadataManager smm = dbAdaptor.getMetadataManager(); - ensureStudyMetadataExists(null); + ensureStudyMetadataExists(); StudyMetadata studyMetadata = smm.updateStudyMetadata(study, existingStudyMetadata -> { if (existingStudyMetadata.getAggregation() == null) { @@ -502,7 +502,7 @@ public URI postTransform(URI input) throws IOException, FileFormatException { */ @Override public URI preLoad(URI input, URI output) throws StorageEngineException { - getOrCreateStudyMetadata(); + ensureStudyMetadataExists(); int studyId = getStudyId(); int currentRelease = getMetadataManager().getAndUpdateProjectMetadata(options).getRelease(); @@ -794,18 +794,10 @@ public VariantFileMetadata readVariantFileMetadata(URI input) throws StorageEngi /* StudyMetadata utils methods */ /* --------------------------------------- */ - protected StudyMetadata getOrCreateStudyMetadata() throws StorageEngineException { - return ensureStudyMetadataExists(getStudyMetadata()); - } - - protected StudyMetadata ensureStudyMetadataExists(StudyMetadata studyMetadata) throws StorageEngineException { + protected StudyMetadata ensureStudyMetadataExists() throws StorageEngineException { + StudyMetadata studyMetadata = getStudyMetadata(); if (studyMetadata == null) { - studyMetadata = getStudyMetadata(); - if (studyMetadata == null) { - String studyName = options.getString(VariantStorageOptions.STUDY.key(), VariantStorageOptions.STUDY.defaultValue()); - logger.info("Creating a new StudyMetadata '{}'", studyName); - studyMetadata = getMetadataManager().createStudy(studyName); - } + throw new StorageEngineException("StudyMetadata not found"); } // privateStudyMetadata = studyMetadata; setStudyId(studyMetadata.getId()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java index 6a2af48a917..16f0083184e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java @@ -12,19 +12,18 @@ import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.io.bit.BitInputStream; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; +import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.utils.iterators.CloseableIterator; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleIndexQuery; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexEntry; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleVariantIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.sample.*; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; import org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator; import java.nio.ByteBuffer; +import java.util.HashMap; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.TimeUnit; @@ -117,7 +116,8 @@ public void run(String[] args) throws Exception { System.out.println(" query-detailed [--study ] [--sample ] [--region ] [--quiet] " + "[query...]"); System.out.println(" query-raw [--study ] [--sample ] [--quiet]"); - System.out.println(" index-stats [--study ] [--sample ]"); + System.out.println(" index-stats [--study ] [--sample ] [--region ] " + + "[--version ]"); break; } System.err.println("--------------------------"); @@ -197,12 +197,22 @@ private void indexStats(SampleIndexDBAdaptor dbAdaptor, ObjectMap argsMap) throw Map counts = new TreeMap<>(); Map> countsByGt = new TreeMap<>(); - try (CloseableIterator iterator = dbAdaptor.rawIterator(studyId, sampleId, region)) { + SampleIndexSchema schema; + if (argsMap.containsKey("version")) { + StudyMetadata.SampleIndexConfigurationVersioned sampleIndexConfiguration + = metadataManager.getStudyMetadata(studyId).getSampleIndexConfiguration(argsMap.getInt("version")); + schema = new SampleIndexSchema(sampleIndexConfiguration.getConfiguration(), sampleIndexConfiguration.getVersion()); + } else { + schema = dbAdaptor.getSchemaFactory().getSchema(studyId, sampleId, false); + } + try (CloseableIterator iterator = dbAdaptor.rawIterator(studyId, sampleId, region, schema)) { while (iterator.hasNext()) { SampleIndexEntry entry = iterator.next(); for (SampleIndexEntry.SampleIndexGtEntry gtEntry : entry.getGts().values()) { String gt = gtEntry.getGt(); - addLength(gt, counts, countsByGt, "variants", new BitInputStream( + counts.merge("variants", gtEntry.getCount(), Integer::sum); + countsByGt.computeIfAbsent(gt, k -> new HashMap<>()).merge(gt + "_variants", gtEntry.getCount(), Integer::sum); + addLength(gt, counts, countsByGt, "variant_array", new BitInputStream( gtEntry.getVariants(), gtEntry.getVariantsOffset(), gtEntry.getVariantsLength())); addLength(gt, counts, countsByGt, "fileIndex", gtEntry.getFileIndexStream()); addLength(gt, counts, countsByGt, "fileDataIndex", gtEntry.getFileDataIndexBuffer()); @@ -223,6 +233,8 @@ private void indexStats(SampleIndexDBAdaptor dbAdaptor, ObjectMap argsMap) throw } } print(new ObjectMap() + .append("version", schema.getVersion()) + .append("configuration", schema.getConfiguration()) .append("total_bytes", bytes) .append("total_size", IOUtils.humanReadableByteCount(bytes, true)) .append("counts", counts) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index e0cbbfb3963..83f39a0fb56 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -167,6 +167,7 @@ public List index(List inputFiles, URI outdirUri, bo if (inputFiles.size() == 1 || !doLoad) { return super.index(inputFiles, outdirUri, doExtract, doTransform, doLoad); } + createStudyIfNeeded(); final int nThreadArchive = getOptions().getInt(HADOOP_LOAD_FILES_IN_PARALLEL.key(), HADOOP_LOAD_FILES_IN_PARALLEL.defaultValue()); ObjectMap extraOptions = new ObjectMap(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 43028f7220a..840e1e1f7d1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -123,8 +123,8 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator logger.info("HBase SampleIndex, skip variants table"); if (variantQuery.getSource() == VariantQuerySource.SECONDARY_SAMPLE_INDEX) { - variantQuery.getEvents().add(new Event(Event.Type.INFO, "Using only the secondary sample index. Skip main variants index." + - " Results might be partial.")); + variantQuery.getEvents().add(new Event(Event.Type.INFO, "Using only the secondary sample index. Skip main variants index." + + " Results might be partial.")); } boolean count; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 8218ca08a6c..609f832afcd 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -233,7 +233,11 @@ public CloseableIterator rawIterator(int study, int sample) th } public CloseableIterator rawIterator(int study, int sample, Region region) throws IOException { - SampleIndexSchema schema = schemaFactory.getSchema(study, sample, false); + return rawIterator(study, sample, region, schemaFactory.getSchema(study, sample, false)); + } + + public CloseableIterator rawIterator(int study, int sample, Region region, SampleIndexSchema schema) + throws IOException { String tableName = getSampleIndexTableName(study, schema.getVersion()); return hBaseManager.act(tableName, table -> { Scan scan = new Scan(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index 2d4d700cf76..bd90329a739 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -164,7 +164,7 @@ public SampleIndexSchema(SampleIndexConfiguration configuration, int version) { * @return Default schema */ public static SampleIndexSchema defaultSampleIndexSchema() { - SampleIndexConfiguration sampleIndexConfiguration = SampleIndexConfiguration.defaultConfiguration(); + SampleIndexConfiguration sampleIndexConfiguration = SampleIndexConfiguration.defaultConfiguration(false); return new SampleIndexSchema(sampleIndexConfiguration, StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION); } From 98be1595e89d01bc4608a045a113f5bc471e6727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 11 Sep 2024 15:55:25 +0100 Subject: [PATCH 10/19] app: Fix migration EnsureSampleIndexConfigurationIsAlwaysDefined. #TASK-6765 --- ...mpleIndexConfigurationIsAlwaysDefined.java | 23 +++++++++++-------- .../catalog/migration/MigrationManager.java | 3 ++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java index dceb209fd9a..2f336ed8896 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java @@ -1,5 +1,6 @@ package org.opencb.opencga.app.migrations.v3.v3_4_0.storage; +import org.apache.commons.collections4.CollectionUtils; import org.opencb.opencga.app.migrations.StorageMigrationTool; import org.opencb.opencga.catalog.migration.Migration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; @@ -27,15 +28,19 @@ protected void run() throws Exception { if (engine.getMetadataManager().exists()) { for (Integer studyId : engine.getMetadataManager().getStudyIds()) { StudyMetadata studyMetadata = engine.getMetadataManager().getStudyMetadata(studyId); - List configurations = studyMetadata.getSampleIndexConfigurations(); - if (configurations == null || configurations.isEmpty()) { - configurations = new ArrayList<>(1); - logger.info("Creating default SampleIndexConfiguration for study '" + studyMetadata.getName() + "' (" + studyId + ")"); - configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned( - preFileDataConfiguration(), - StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION, - Date.from(Instant.now()), StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE)); - studyMetadata.setSampleIndexConfigurations(configurations); + if (CollectionUtils.isEmpty(studyMetadata.getSampleIndexConfigurations())) { + engine.getMetadataManager().updateStudyMetadata(studyId, sm -> { + if (CollectionUtils.isEmpty(sm.getSampleIndexConfigurations())) { + List configurations = new ArrayList<>(1); + logger.info("Creating default SampleIndexConfiguration for study '" + studyMetadata.getName() + "'" + + " (" + studyId + ")"); + configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned( + preFileDataConfiguration(), + StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION, + Date.from(Instant.now()), StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE)); + sm.setSampleIndexConfigurations(configurations); + } + }); } } } diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationManager.java index fa8fd7b9ee0..d0893232b65 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/migration/MigrationManager.java @@ -239,7 +239,8 @@ private MigrationSummary getMigrationSummary(String organizationId) throws Catal p -> p.getValue().getStatus(), () -> new EnumMap<>(MigrationRun.MigrationStatus.class), Collectors.counting()))) - .setVersionCount(runs.stream().collect(Collectors.groupingBy(p -> p.getKey().version(), Collectors.counting()))); + .setVersionCount(runs.stream().collect(Collectors.groupingBy(p -> p.getKey().version(), + TreeMap::new, Collectors.counting()))); long toBeApplied = migrationSummary .getStatusCount() From 1f92650f0acd2734ffe5471426e11f43cf5cfd90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 20 Sep 2024 13:26:05 +0100 Subject: [PATCH 11/19] storage: Fix compilation issues #TASK-6765 --- ...nsureSampleIndexConfigurationIsAlwaysDefined.java | 2 +- opencga-client/src/main/R/R/Clinical-methods.R | 2 -- .../executors/BreakendVariantQueryExecutor.java | 4 +++- .../executors/CompoundHeterozygousQueryExecutor.java | 9 +++------ .../executors/DBAdaptorVariantQueryExecutor.java | 12 +++++++----- .../query/executors/NoOpVariantQueryExecutor.java | 4 +++- .../query/executors/VariantQueryExecutor.java | 8 +++++++- .../SamplesSearchIndexVariantQueryExecutor.java | 4 +++- .../search/SearchIndexVariantQueryExecutor.java | 8 +++----- .../query/executors/VariantQueryExecutorTest.java | 8 ++++---- .../HBaseColumnIntersectVariantQueryExecutor.java | 5 +++-- .../SampleIndexMendelianErrorQueryExecutor.java | 9 ++++----- .../index/SampleIndexVariantQueryExecutor.java | 4 ++-- 13 files changed, 43 insertions(+), 36 deletions(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java index 2f336ed8896..a18a4218aa5 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/migrations/v3/v3_4_0/storage/EnsureSampleIndexConfigurationIsAlwaysDefined.java @@ -13,7 +13,7 @@ import java.util.List; @Migration(id = "ensure_sample_index_configuration_is_defined", - description = "Ensure that the SampleIndexConfiguration object is correctly defined. #TASK-6765", version = "3.4.0", + description = "Ensure that the SampleIndexConfiguration object is correctly defined. #TASK-6765", version = "4.0.0", language = Migration.MigrationLanguage.JAVA, domain = Migration.MigrationDomain.STORAGE, patch = 1, diff --git a/opencga-client/src/main/R/R/Clinical-methods.R b/opencga-client/src/main/R/R/Clinical-methods.R index 0c21fe15568..d8b87cea0db 100644 --- a/opencga-client/src/main/R/R/Clinical-methods.R +++ b/opencga-client/src/main/R/R/Clinical-methods.R @@ -42,8 +42,6 @@ #' | queryRgaVariant | /{apiVersion}/analysis/clinical/rga/variant/query | include, exclude, limit, skip, count, includeIndividual, skipIndividual, limitIndividual, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study | #' | summaryRgaVariant | /{apiVersion}/analysis/clinical/rga/variant/summary | limit, skip, count, sampleId, individualId, sex, phenotypes, disorders, numParents, geneId, geneName, chromosome, start, end, transcriptId, variants, dbSnps, knockoutType, filter, type, clinicalSignificance, populationFrequency, consequenceType, study | #' | search | /{apiVersion}/analysis/clinical/search | include, exclude, limit, skip, count, flattenAnnotations, study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, dueDate, qualityControlSummary, release, status, internalStatus, annotation, deleted | -#' | queryVariant | /{apiVersion}/analysis/clinical/variant/query | include, exclude, limit, skip, count, approximateCount, approximateCountSamplingSize, savedFilter, includeInterpretation, id, region, type, study, file, filter, qual, fileData, sample, sampleData, sampleAnnotation, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, trait | -#' | search | /{apiVersion}/analysis/clinical/search | include, exclude, limit, skip, count, flattenAnnotations, study, id, uuid, type, disorder, files, sample, individual, proband, probandSamples, family, familyMembers, familyMemberSamples, panels, locked, analystId, priority, flags, creationDate, modificationDate, dueDate, qualityControlSummary, release, snapshot, status, internalStatus, annotation, deleted | #' | queryVariant | /{apiVersion}/analysis/clinical/variant/query | include, exclude, limit, skip, count, approximateCount, approximateCountSamplingSize, savedFilter, includeInterpretation, id, region, type, study, file, filter, qual, fileData, sample, sampleData, sampleAnnotation, cohort, cohortStatsRef, cohortStatsAlt, cohortStatsMaf, cohortStatsMgf, cohortStatsPass, missingAlleles, missingGenotypes, score, family, familyDisorder, familySegregation, familyMembers, familyProband, gene, ct, xref, biotype, proteinSubstitution, conservation, populationFrequencyAlt, populationFrequencyRef, populationFrequencyMaf, transcriptFlag, geneTraitId, go, expression, proteinKeyword, drug, functionalScore, clinical, clinicalSignificance, clinicalConfirmedStatus, customAnnotation, panel, panelModeOfInheritance, panelConfidence, panelRoleInCancer, panelFeatureType, panelIntersection, source, trait | #' | acl | /{apiVersion}/analysis/clinical/{clinicalAnalyses}/acl | clinicalAnalyses[*], study, member, silent | #' | delete | /{apiVersion}/analysis/clinical/{clinicalAnalyses}/delete | study, force, clinicalAnalyses[*] | diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java index 6eb237ea4b3..bc40c5b6418 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java @@ -14,6 +14,7 @@ import org.opencb.commons.datastore.core.QueryParam; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; @@ -40,7 +41,8 @@ public BreakendVariantQueryExecutor(String storageEngineId, ObjectMap options, } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); return query.getString(VariantQueryParam.TYPE.key()).equals(VariantType.BREAKEND.name()) && VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java index c6f4b87a5f0..05a764a3e9d 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java @@ -12,10 +12,7 @@ import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; -import org.opencb.opencga.storage.core.variant.adaptors.VariantField; -import org.opencb.opencga.storage.core.variant.adaptors.VariantIterable; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.adaptors.iterators.UnionMultiVariantKeyIterator; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIteratorWithCounts; @@ -64,8 +61,8 @@ public CompoundHeterozygousQueryExecutor(VariantStorageMetadataManager metadataM } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { - return isValidParam(query, VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS); + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + return isValidParam(variantQuery.getQuery(), VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS); } @Override diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java index 474cbc3fa9f..f83d938534c 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java @@ -1,12 +1,14 @@ package org.opencb.opencga.storage.core.variant.query.executors; import org.opencb.biodata.models.variant.Variant; -import org.opencb.commons.datastore.core.*; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.datastore.core.QueryParam; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; -import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; -import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; +import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,9 +54,9 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { for (QueryParam unsupportedParam : UNSUPPORTED_PARAMS) { - if (VariantQueryUtils.isValidParam(query, unsupportedParam)) { + if (VariantQueryUtils.isValidParam(variantQuery.getQuery(), unsupportedParam)) { logger.warn("Unsupported variant query param {} in {}", unsupportedParam.key(), DBAdaptorVariantQueryExecutor.class.getSimpleName()); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java index e286b4a07ce..5f2da2c2056 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java @@ -8,6 +8,7 @@ import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.*; @@ -34,7 +35,8 @@ public NoOpVariantQueryExecutor(VariantStorageMetadataManager metadataManager, S } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); boolean sampleQuery = false; String sample = null; if (VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE)) { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java index c75fc3e4afa..c08f2626afb 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java @@ -63,6 +63,12 @@ public static void setDefaultTimeout(QueryOptions queryOptions, ObjectMap config queryOptions.put(QueryOptions.TIMEOUT, timeout); } + /** + * Determine if this VariantQueryExecutor can run the given query. + * @param variantQuery Query to execute + * @return True if this variant query executor is valid for the query + * @throws StorageEngineException if there is an error + */ public final boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { boolean canUseThisExecutor = canUseThisExecutor(variantQuery, variantQuery.getInputOptions()); if (canUseThisExecutor) { @@ -77,7 +83,7 @@ public final boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws } /** - * Determine if this VariantQueryExecutor can run the given query. + * Internal method to determine if this VariantQueryExecutor can run the given query. * @param variantQuery Query to execute * @param options Options for the query * @return True if this variant query executor is valid for the query diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java index 200b2eb463d..cfa794cbe10 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java @@ -7,6 +7,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager; @@ -28,7 +29,8 @@ public SamplesSearchIndexVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Varian } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); String samplesCollection = inferSpecificSearchIndexSamplesCollection(query, options, getMetadataManager(), dbName); return samplesCollection != null && searchActiveAndAlive(samplesCollection); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java index 12c86cc4e2b..bd6b8e6437f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java @@ -11,10 +11,7 @@ import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.exceptions.VariantSearchException; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; -import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; -import org.opencb.opencga.storage.core.variant.adaptors.VariantField; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; @@ -76,7 +73,8 @@ public SearchIndexVariantQueryExecutor setIntersectParamsThreshold(int intersect } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + VariantQuery query = variantQuery.getQuery(); return doQuerySearchManager(query, options) || doIntersectWithSearch(query, options); } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java index 1fc7617422c..cd56447fcd3 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java @@ -213,15 +213,15 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, logger.info("########## TEST QUERY :" + query.toJson()); logger.info("####################################################"); logger.info("## Allowed VariantQueryExecutors:"); + ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { - if (variantQueryExecutor.canUseThisExecutor(query, options)) { + if (variantQueryExecutor.canUseThisExecutor(variantQuery, options)) { logger.info("## - " + variantQueryExecutor.getClass().getSimpleName()); } } logger.info("## Using DBAdaptorVariantQueryExecutor for expected results"); - Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(query, options)); + Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(variantQuery, options)); - ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); VariantQueryResult expected = dbQueryExecutor.get(variantQuery); VariantQueryResult unfilteredResult = null; @@ -265,7 +265,7 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, } for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { - if (variantQueryExecutor.canUseThisExecutor(query, options)) { + if (variantQueryExecutor.canUseThisExecutor(variantQuery, options)) { logger.info(""); logger.info("###################"); logger.info("### Testing " + variantQueryExecutor.getClass().getSimpleName()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java index c672e94fb58..ceb52e5eeaa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java @@ -7,6 +7,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery; import org.opencb.opencga.storage.core.variant.query.ParsedQuery; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryResult; @@ -43,8 +44,8 @@ public HBaseColumnIntersectVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Stri } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { - + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { + VariantQuery query = variantQuery.getQuery(); if (!options.getBoolean(HBASE_COLUMN_INTERSECT, ACTIVE_BY_DEFAULT)) { // HBase column intersect not active return false; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java index 4dd50c9eacf..5063ca1fe4b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java @@ -9,7 +9,6 @@ import org.opencb.biodata.tools.pedigree.MendelianError; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.Trio; @@ -33,10 +32,10 @@ public SampleIndexMendelianErrorQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { - if (VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_MENDELIAN_ERROR) - || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO) - || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO_STRICT)) { + public boolean canUseThisExecutor(ParsedVariantQuery query, QueryOptions options) { + if (VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_MENDELIAN_ERROR) + || VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_DE_NOVO) + || VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_DE_NOVO_STRICT)) { return super.canUseThisExecutor(query, options); } else { return false; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java index 6175128018d..03e8a5fe880 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java @@ -58,9 +58,9 @@ public SampleIndexVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, SampleI } @Override - public boolean canUseThisExecutor(Query query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery query, QueryOptions options) { if (options.getBoolean(SAMPLE_INDEX_INTERSECT, true)) { - return SampleIndexQueryParser.validSampleIndexQuery(query); + return SampleIndexQueryParser.validSampleIndexQuery(query.getQuery()); } return false; } From b02d45d4025ccb711e4cbe4c52ae570a45ea0825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 18 Oct 2024 15:23:04 +0100 Subject: [PATCH 12/19] storage: Rename schema "entry" with "document". Create parent schema class. #TASK-6765 --- .../variant/index/core/AbstractField.java | 33 +++++ .../variant/index/core/AbstractSchema.java | 15 ++ .../variant/index/core/DataFieldBase.java | 26 +--- .../hadoop/variant/index/core/DataSchema.java | 139 +++++++++--------- .../index/core/FixedSizeIndexSchema.java | 10 +- .../hadoop/variant/index/core/IndexField.java | 25 +--- .../variant/index/core/IndexSchema.java | 28 +++- ...taIndexSchema.java => FileDataSchema.java} | 4 +- .../sample/HBaseToSampleIndexConverter.java | 6 +- .../sample/SampleIndexEntryPutBuilder.java | 8 +- .../index/sample/SampleIndexSchema.java | 23 ++- .../sample/SampleIndexVariantBiConverter.java | 6 +- .../SampleVariantIndexEntryConverter.java | 2 +- .../variant/index/core/DataSchemaTest.java | 87 ++++++----- ...chemaTest.java => FileDataSchemaTest.java} | 4 +- 15 files changed, 223 insertions(+), 193 deletions(-) create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java create mode 100644 opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{FileDataIndexSchema.java => FileDataSchema.java} (98%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{FileDataIndexSchemaTest.java => FileDataSchemaTest.java} (93%) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java new file mode 100644 index 00000000000..bbc53dc0c43 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java @@ -0,0 +1,33 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +public abstract class AbstractField { + + protected final IndexFieldConfiguration configuration; + + protected AbstractField(IndexFieldConfiguration configuration) { + this.configuration = configuration; + } + + public String getId() { + return configuration.getId(); + } + + public IndexFieldConfiguration.Source getSource() { + return configuration.getSource(); + } + + public String getKey() { + return configuration.getKey(); + } + + public IndexFieldConfiguration getConfiguration() { + return configuration; + } + + public IndexFieldConfiguration.Type getType() { + return configuration.getType(); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java new file mode 100644 index 00000000000..09dc7e238f1 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java @@ -0,0 +1,15 @@ +package org.opencb.opencga.storage.hadoop.variant.index.core; + +import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; + +import java.util.List; + +public abstract class AbstractSchema { + + public abstract List getFields(); + + public FIELD getField(IndexFieldConfiguration.Source source, String key) { + return getFields().stream().filter(i -> i.getSource() == source && i.getKey().equals(key)).findFirst().orElse(null); + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java index 59ad0ad59f3..ad1769d6ff9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java @@ -15,32 +15,10 @@ * The ByteBuffer contains a set of entries, each entry contains a set of fields. * @param */ -public abstract class DataFieldBase { - - private final IndexFieldConfiguration configuration; +public abstract class DataFieldBase extends AbstractField { public DataFieldBase(IndexFieldConfiguration configuration) { - this.configuration = configuration; - } - - public String getId() { - return configuration.getId(); - } - - public IndexFieldConfiguration.Source getSource() { - return configuration.getSource(); - } - - public String getKey() { - return configuration.getKey(); - } - - public IndexFieldConfiguration getConfiguration() { - return configuration; - } - - public IndexFieldConfiguration.Type getType() { - return configuration.getType(); + super(configuration); } public void move(ByteBuffer bb) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java index eb4bf0aee83..02da5c50288 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java @@ -13,159 +13,156 @@ * This class contains the schema of the data stored in the index. The schema is defined by a set of fields. *

* The generated data is stored in a ByteBuffer, and this class is used to read and write the data. - * The ByteBuffer contains a set of entries, each entry contains a set of fields. + * The ByteBuffer contains a list of documents, each document contains a set of fieldValues. *

- * The fields of each entry are stored in the same order as they are added to the schema. + * The fields of each document are stored in the same order as they are added to the schema. *

* - ByteBuffer - * - Entry 1 - * - Entry length - * - Field 1 + * - Doc 1 + * - Doc length + * - FieldValue 1 * - ... - * - Field n + * - FieldValue n * - ... - * - Entry n + * - Doc n */ -public abstract class DataSchema { +public abstract class DataSchema extends AbstractSchema> { private final List> fields; - protected final DataField entryLengthField; - private ByteBuffer defaultEntry; + protected final DataField documentLengthField; + private ByteBuffer defaultDocument; // private boolean sparse = false; public DataSchema() { fields = new ArrayList<>(); - entryLengthField = new VarIntDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "ENTRY_LENGTH", null)); - defaultEntry = ByteBuffer.allocate(0); + documentLengthField = new VarIntDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "DOC_LENGTH", null)); + defaultDocument = ByteBuffer.allocate(0); } protected void addField(DataFieldBase field) { fields.add(field); - ExposedByteArrayOutputStream defaultEntryStream = new ExposedByteArrayOutputStream(); + ExposedByteArrayOutputStream defaultDocumentStream = new ExposedByteArrayOutputStream(); for (DataFieldBase dataField : fields) { - writeDefaultValue(dataField, defaultEntryStream); + writeDefaultValue(dataField, defaultDocumentStream); } - defaultEntry = defaultEntryStream.toByteByffer().asReadOnlyBuffer(); + defaultDocument = defaultDocumentStream.toByteByffer().asReadOnlyBuffer(); } - private static void writeDefaultValue(DataFieldBase dataField, ByteArrayOutputStream defaultEntry) { + private static void writeDefaultValue(DataFieldBase dataField, ByteArrayOutputStream documentStream) { T defaultValue = dataField.getDefault(); - dataField.write(defaultValue, defaultEntry); - } - - public DataFieldBase getField(IndexFieldConfiguration.Source source, String key) { - return fields.stream().filter(i -> i.getSource() == source && i.getKey().equals(key)).findFirst().orElse(null); + dataField.write(defaultValue, documentStream); } + @Override public List> getFields() { return fields; } - public void writeEntry(ByteBuffer buffer, ByteBuffer entryBuffer) { - entryBuffer.rewind(); - if (isDefaultEntry(entryBuffer)) { - // This is the default entry - entryLengthField.write(0, buffer); + public void writeDocument(ByteBuffer buffer, ByteBuffer docBuffer) { + docBuffer.rewind(); + if (isDefaultDocument(docBuffer)) { + // This is the default document + documentLengthField.write(0, buffer); return; } - int entryLength = entryBuffer.limit(); - entryLengthField.write(entryLength, buffer); - buffer.put(entryBuffer.array(), buffer.arrayOffset(), entryLength); + int documentLength = docBuffer.limit(); + documentLengthField.write(documentLength, buffer); + buffer.put(docBuffer.array(), buffer.arrayOffset(), documentLength); } - public void writeEntry(ByteArrayOutputStream stream, ByteBuffer entryBuffer) { - entryBuffer.rewind(); - if (isDefaultEntry(entryBuffer)) { - // This is the default entry - entryLengthField.write(0, stream); + public void writeDocument(ByteArrayOutputStream stream, ByteBuffer docBuffer) { + docBuffer.rewind(); + if (isDefaultDocument(docBuffer)) { + // This is the default document + documentLengthField.write(0, stream); return; } - int entryLength = entryBuffer.limit(); - entryLengthField.write(entryLength, stream); - stream.write(entryBuffer.array(), entryBuffer.arrayOffset(), entryLength); + int docLength = docBuffer.limit(); + documentLengthField.write(docLength, stream); + stream.write(docBuffer.array(), docBuffer.arrayOffset(), docLength); } - private boolean isDefaultEntry(ByteBuffer entryBuffer) { - return defaultEntry.limit() == entryBuffer.limit() - && defaultEntry.compareTo(entryBuffer) == 0; + private boolean isDefaultDocument(ByteBuffer docBuffer) { + return defaultDocument.limit() == docBuffer.limit() + && defaultDocument.compareTo(docBuffer) == 0; } - public ByteBuffer readEntry(ByteBuffer buffer, int entryPosition) { + public ByteBuffer readDocument(ByteBuffer buffer, int docPosition) { try { buffer.rewind(); - for (int i = 0; i < entryPosition; i++) { + for (int i = 0; i < docPosition; i++) { if (!buffer.hasRemaining()) { return ByteBuffer.allocate(0); } - int entryLength = entryLengthField.readAndDecode(buffer); - buffer.position(buffer.position() + entryLength); + int docLength = documentLengthField.readAndDecode(buffer); + buffer.position(buffer.position() + docLength); } - return readNextEntry(buffer); + return readNextDocument(buffer); } catch (Exception e) { throw e; } } - public ByteBuffer readNextEntry(ByteBuffer buffer) { + public ByteBuffer readNextDocument(ByteBuffer buffer) { try { if (!buffer.hasRemaining()) { return ByteBuffer.allocate(0); } - int entryLength = entryLengthField.readAndDecode(buffer); - if (entryLength == 0) { - return defaultEntry; + int docLength = documentLengthField.readAndDecode(buffer); + if (docLength == 0) { + return defaultDocument; } - ByteBuffer elementBuffer = ByteBuffer.allocate(entryLength); - buffer.get(elementBuffer.array(), elementBuffer.arrayOffset(), entryLength); - elementBuffer.rewind(); - return elementBuffer; + ByteBuffer docBuffer = ByteBuffer.allocate(docLength); + buffer.get(docBuffer.array(), docBuffer.arrayOffset(), docLength); + docBuffer.rewind(); + return docBuffer; } catch (Exception e) { throw e; } } - public T readFieldAndDecode(ByteBuffer buffer, DataField field) { - buffer.rewind(); + public T readFieldAndDecode(ByteBuffer docBuffer, DataField field) { + docBuffer.rewind(); for (DataFieldBase thisField : fields) { - if (thisField == entryLengthField) { - // Skip entry length field + if (thisField == documentLengthField) { + // Skip document length field continue; } else if (thisField == field) { - return field.readAndDecode(buffer); + return field.readAndDecode(docBuffer); } else { - thisField.move(buffer); + thisField.move(docBuffer); } } throw new IllegalArgumentException("Unknown field " + field); } - public T readFieldAndDecode(ByteBuffer buffer, DataFieldWithContext field, C context) { - buffer.rewind(); + public T readFieldAndDecode(ByteBuffer docBuffer, DataFieldWithContext field, C context) { + docBuffer.rewind(); for (DataFieldBase thisField : fields) { - if (thisField == entryLengthField) { - // Skip entry length field + if (thisField == documentLengthField) { + // Skip document length field continue; } else if (thisField == field) { - return field.readAndDecode(context, buffer); + return field.readAndDecode(context, docBuffer); } else { - thisField.move(buffer); + thisField.move(docBuffer); } } throw new IllegalArgumentException("Unknown field " + field); } - public ByteBuffer readField(ByteBuffer buffer, DataFieldBase field) { - buffer.rewind(); + public ByteBuffer readField(ByteBuffer docBuffer, DataFieldBase field) { + docBuffer.rewind(); for (DataFieldBase thisField : fields) { - if (thisField == entryLengthField) { - // Skip entry length field + if (thisField == documentLengthField) { + // Skip document length field continue; } else if (thisField == field) { - return field.read(buffer); + return field.read(docBuffer); } else { - thisField.move(buffer); + thisField.move(docBuffer); } } throw new IllegalArgumentException("Unknown field " + field); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java index 0866a687797..878b544b664 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/FixedSizeIndexSchema.java @@ -30,21 +30,21 @@ public int getBitsLength() { } /** - * Read index element from the bit input stream. + * Read index document from the bit input stream. * @param buffer BitBuffer - * @param i element position + * @param i document position * @return BitBuffer containing all fields from the index. */ - public BitBuffer readEntry(BitBuffer buffer, int i) { + public BitBuffer readDocument(BitBuffer buffer, int i) { return buffer.getBitBuffer(i * indexSizeBits, indexSizeBits); } /** - * Read next index element from the bit input stream. + * Read next index document from the bit input stream. * @param stream BitBuffer * @return BitBuffer containing all fields from the index. */ - public BitBuffer readEntry(BitInputStream stream) { + public BitBuffer readDocument(BitInputStream stream) { return stream.readBitBuffer(getBitsLength()); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java index ae0715ff52e..0d5b00758e9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java @@ -17,36 +17,15 @@ import java.util.function.Function; import java.util.stream.Collectors; -public abstract class IndexField { +public abstract class IndexField extends AbstractField { - private final IndexFieldConfiguration configuration; private final int bitOffset; public IndexField(IndexFieldConfiguration configuration, int bitOffset) { - this.configuration = configuration; + super(configuration); this.bitOffset = bitOffset; } - public String getId() { - return configuration.getId(); - } - - public IndexFieldConfiguration.Source getSource() { - return configuration.getSource(); - } - - public String getKey() { - return configuration.getKey(); - } - - public IndexFieldConfiguration getConfiguration() { - return configuration; - } - - public IndexFieldConfiguration.Type getType() { - return configuration.getType(); - } - public abstract int getBitLength(); public int getBitOffset() { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexSchema.java index ef13b30b7ec..c3b55725ade 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexSchema.java @@ -1,13 +1,32 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFilter; import java.util.List; -public abstract class IndexSchema { +/** + * Index schema. + * Contains a list of fields that define the schema of the index. + *

+ * The main purpose of an index is to filter data. + * This schema is used to build the filters and to read the data from the index. + *

+ * Each input data will produce a document in the index. Each document contains a set of fields. + * The generated entries are stored in a BitBuffer or BitInputStream. + *

+ * The fields of each document are stored in the same order as they are added to the schema. + *

+ * - BitBuffer + * - Doc 1 + * - FieldValue 1 + * - ... + * - FieldValue n + * - ... + * - Doc n + */ +public abstract class IndexSchema extends AbstractSchema> { protected List> fields; @@ -18,10 +37,7 @@ public IndexSchema(List> fields) { this.fields = fields; } - public IndexField getField(IndexFieldConfiguration.Source source, String key) { - return fields.stream().filter(i -> i.getSource().equals(source) && i.getKey().equals(key)).findFirst().orElse(null); - } - + @Override public List> getFields() { return fields; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java similarity index 98% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java index 371657ae3bf..246b2bfa581 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java @@ -17,13 +17,13 @@ import java.util.Collections; import java.util.List; -public class FileDataIndexSchema extends DataSchema { +public class FileDataSchema extends DataSchema { private final DataFieldWithContext originalCallField; private final DataFieldWithContext> secondaryAlternatesField; private final SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration; - public FileDataIndexSchema(SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration) { + public FileDataSchema(SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration) { this.fileDataConfiguration = fileDataConfiguration; if (fileDataConfiguration.isIncludeOriginalCall()) { originalCallField = new VarBinaryDataField( diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java index 033105e49a9..d6e8be6f22b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java @@ -28,7 +28,7 @@ public class HBaseToSampleIndexConverter implements Converter> convertToMapSampleVariantIn for (Variant variant : map.get(gt)) { BitBuffer fileIndexEntry; do { - fileIndexEntry = fileIndexSchema.readEntry(fileIndexStream); + fileIndexEntry = fileIndexSchema.readDocument(fileIndexStream); ByteBuffer fileDataEntry; if (fileDataBuffer == null) { fileDataEntry = null; } else { - fileDataEntry = fileDataSchema.readNextEntry(fileDataBuffer); + fileDataEntry = fileDataSchema.readNextDocument(fileDataBuffer); } values.add(new SampleVariantIndexEntry(variant, fileIndexEntry, fileDataEntry)); } while (this.fileIndexSchema.isMultiFile(fileIndexEntry)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java index 20e04c2c401..029aa655f72 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java @@ -25,7 +25,7 @@ public class SampleIndexEntryPutBuilder { private static final byte[] COLUMN_FAMILY = GenomeHelper.COLUMN_FAMILY_BYTES; private final SampleIndexSchema schema; private final FileIndexSchema fileIndexSchema; - private final FileDataIndexSchema fileDataIndexSchema; + private final FileDataSchema fileDataSchema; private final SampleVariantIndexEntry.SampleVariantIndexEntryComparator comparator; private final boolean orderedInput; private final boolean multiFileSample; @@ -58,7 +58,7 @@ public SampleIndexEntryPutBuilder(int sampleId, String chromosome, int position, variantConverter = new SampleIndexVariantBiConverter(schema); this.schema = schema; fileIndexSchema = this.schema.getFileIndex(); - fileDataIndexSchema = this.schema.getFileData(); + fileDataSchema = this.schema.getFileData(); comparator = new SampleVariantIndexEntry.SampleVariantIndexEntryComparator(schema); } @@ -170,7 +170,7 @@ public void build(Put put) { } offset = fileIndexBuffer.setBitBuffer(gtEntry.getFileIndex(), offset); if (!gtEntry.getFileData().isEmpty()) { - fileDataIndexSchema.writeEntry(fileDataIndexBuffer, gtEntry.getFileData().get(0)); + fileDataSchema.writeDocument(fileDataIndexBuffer, gtEntry.getFileData().get(0)); } prev = gtEntry; } @@ -407,7 +407,7 @@ private void partialBuild(boolean flush) { offset += fileIndexSchema.getBitsLength(); prev = gtEntry; if (!gtEntry.getFileData().isEmpty()) { - fileDataIndexSchema.writeEntry(fileDataBuffer, gtEntry.getFileData().get(0)); + fileDataSchema.writeDocument(fileDataBuffer, gtEntry.getFileData().get(0)); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index bd90329a739..a0e4ff5905f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -20,8 +20,23 @@ import static org.apache.hadoop.hbase.util.Bytes.SIZEOF_INT; /** - * Define RowKey, column names, and fields. Used to build the sample index. + * Define RowKey, column names, and individual schemas. Used to build the sample index. * + * {@link SampleIndexEntry}: HBase row. Contains all the information from a sample in a specific region. + * {@link SampleIndexEntry.SampleIndexGtEntry}: HBase columns grouped by genotype. + * {@link SampleIndexEntryIterator}: Iterator over the variants of a {@link SampleIndexEntry} + * {@link SampleVariantIndexEntry}: Logical view over an entry for a specific variant and corresponding keys + *

+ * - Row : {SAMPLE_ID}_{CHROMOSOME}_{BATCH_START} + * - Variants columns: {GT} -> [{variant1}, {variant2}, {variant3}, ...] + * - Genotype columns: _{key}_{GT} -> [{doc1}, {doc2}, {doc3}, ...] + * - doc1 = [{fieldValue1}, {fieldValue2}, {fieldValue3}, ...] + * - doc2 = [{fieldValue1}, {fieldValue2}, {fieldValue3}, ...] + * - Meta columns: _{key} -> [{doc1}, {doc2}, {doc3}, ...] + *

+ * Documents from genotype columns are ordered as the variants in the variants cell. + * Each variant is associated with a list of documents from each + *

* Created on 11/04/19. * * @author Jacobo Coll <jacobo167@gmail.com> @@ -131,7 +146,7 @@ public final class SampleIndexSchema { private final int version; private final SampleIndexConfiguration configuration; private final FileIndexSchema fileIndex; - private final FileDataIndexSchema fileData; + private final FileDataSchema fileData; private final PopulationFrequencyIndexSchema popFreqIndex; private final ConsequenceTypeIndexSchema ctIndex; private final BiotypeIndexSchema biotypeIndex; @@ -144,7 +159,7 @@ public SampleIndexSchema(SampleIndexConfiguration configuration, int version) { this.version = version; this.configuration = configuration; fileIndex = new FileIndexSchema(configuration.getFileIndexConfiguration()); - fileData = new FileDataIndexSchema(configuration.getFileDataConfiguration()); + fileData = new FileDataSchema(configuration.getFileDataConfiguration()); // annotationSummaryIndexSchema = new AnnotationSummaryIndexSchema(); ctIndex = new ConsequenceTypeIndexSchema(configuration.getAnnotationIndexConfiguration().getConsequenceType()); biotypeIndex = new BiotypeIndexSchema(configuration.getAnnotationIndexConfiguration().getBiotype()); @@ -208,7 +223,7 @@ public FileIndexSchema getFileIndex() { return fileIndex; } - public FileDataIndexSchema getFileData() { + public FileDataSchema getFileData() { return fileData; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java index b3a41a9b3b8..1b26888615e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java @@ -302,11 +302,11 @@ public BitBuffer nextMultiFileIndexEntry() { } private BitBuffer getFileIndex(int i) { - return schema.getFileIndex().readEntry(fileIndex, i); + return schema.getFileIndex().readDocument(fileIndex, i); } private ByteBuffer getFileDataIndex(int i) { - return schema.getFileData().readEntry(fileDataIndex, i); + return schema.getFileData().readDocument(fileDataIndex, i); } @Override @@ -374,7 +374,7 @@ public AnnotationIndexEntry nextAnnotationIndexEntry() { if (clinical) { int nextClinical = nextClinicalIndex(); // TODO: Reuse BitBuffer - annotationIndexEntry.setClinicalIndex(schema.getClinicalIndexSchema().readEntry(clinicalIndex, nextClinical)); + annotationIndexEntry.setClinicalIndex(schema.getClinicalIndexSchema().readDocument(clinicalIndex, nextClinical)); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java index 8916ad975fe..7b2324102bc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java @@ -21,7 +21,7 @@ public class SampleVariantIndexEntryConverter { private final FileIndexSchema fileIndex; - private final FileDataIndexSchema fileDataSchema; + private final FileDataSchema fileDataSchema; public SampleVariantIndexEntryConverter(SampleIndexSchema configuration) { fileIndex = configuration.getFileIndex(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java index 33844f6ff01..298ba2f0b9a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java @@ -42,7 +42,7 @@ public void readWriteDefault() { ByteBuffer byteByffer = stream.toByteByffer(); ExposedByteArrayOutputStream stream2 = new ExposedByteArrayOutputStream(); - dataSchema.writeEntry(stream2, byteByffer); + dataSchema.writeDocument(stream2, byteByffer); Assert.assertEquals(1, stream2.toByteByffer().limit()); Assert.assertEquals(0, stream2.toByteByffer().get()); } @@ -50,71 +50,68 @@ public void readWriteDefault() { @Test public void readWrite() { - ByteBuffer bbEntry1 = ByteBuffer.allocate(100); - ByteBuffer bbEntry2 = ByteBuffer.allocate(100); + ByteBuffer bbDoc1 = ByteBuffer.allocate(100); + ByteBuffer bbDoc2 = ByteBuffer.allocate(100); ByteBuffer bb = ByteBuffer.allocate(100); - key1.write("key1_value", bbEntry1); - key2.write("key2_value", bbEntry1); - key3.write(1234255, bbEntry1); - key4.write("key4_value", bbEntry1); - bbEntry1.limit(bbEntry1.position()); + key1.write("key1_value", bbDoc1); + key2.write("key2_value", bbDoc1); + key3.write(1234255, bbDoc1); + key4.write("key4_value", bbDoc1); + bbDoc1.limit(bbDoc1.position()); - key1.write("key1_value", bbEntry2); - key2.write("key2_value", bbEntry2); - key3.write(32, bbEntry2); - key4.write("key4_value", bbEntry2); - bbEntry2.limit(bbEntry2.position()); + key1.write("key1_value", bbDoc2); + key2.write("key2_value", bbDoc2); + key3.write(32, bbDoc2); + key4.write("key4_value", bbDoc2); + bbDoc2.limit(bbDoc2.position()); - dataSchema.writeEntry(bb, bbEntry1); - dataSchema.writeEntry(bb, bbEntry2); + dataSchema.writeDocument(bb, bbDoc1); + dataSchema.writeDocument(bb, bbDoc2); - bbEntry1.rewind(); - bbEntry2.rewind(); + bbDoc1.rewind(); + bbDoc2.rewind(); bb.rewind(); -// System.out.println("Bytes.toStringBinary(bbEntry) = " + Bytes.toStringBinary(bbEntry)); -// System.out.println("Bytes.toStringBinary(bbEntry) = " + Bytes.toStringBinary(bb)); - // Read entries sequentially - ByteBuffer readEntry = dataSchema.readNextEntry(bb); - checkEntry(bbEntry1, readEntry, 1234255); + ByteBuffer readDoc = dataSchema.readNextDocument(bb); + checkDocument(bbDoc1, readDoc, 1234255); - ByteBuffer readEntry2 = dataSchema.readNextEntry(bb); - checkEntry(bbEntry2, readEntry2, 32); + ByteBuffer readDoc2 = dataSchema.readNextDocument(bb); + checkDocument(bbDoc2, readDoc2, 32); // Read entries random - readEntry2 = dataSchema.readEntry(bb, 1); - checkEntry(bbEntry2, readEntry2, 32); + readDoc2 = dataSchema.readDocument(bb, 1); + checkDocument(bbDoc2, readDoc2, 32); - readEntry = dataSchema.readEntry(bb, 0); - checkEntry(bbEntry1, readEntry, 1234255); + readDoc = dataSchema.readDocument(bb, 0); + checkDocument(bbDoc1, readDoc, 1234255); } - private void checkEntry(ByteBuffer expected, ByteBuffer readEntry, int key3NumberValue) { - Assert.assertEquals(expected, readEntry); -// System.out.println("Bytes.toStringBinary(readEntry) = " + Bytes.toStringBinary(readEntry)); + private void checkDocument(ByteBuffer expected, ByteBuffer readDoc, int key3NumberValue) { + Assert.assertEquals(expected, readDoc); +// System.out.println("Bytes.toStringBinary(readDoc) = " + Bytes.toStringBinary(readDoc)); // Sequential field read order - Assert.assertEquals("key1_value", key1.readAndDecode(readEntry)); - Assert.assertEquals("key2_value", key2.readAndDecode(readEntry)); - Assert.assertEquals(key3NumberValue, key3.readAndDecode(readEntry).intValue()); - Assert.assertEquals("key4_value", key4.readAndDecode(readEntry)); + Assert.assertEquals("key1_value", key1.readAndDecode(readDoc)); + Assert.assertEquals("key2_value", key2.readAndDecode(readDoc)); + Assert.assertEquals(key3NumberValue, key3.readAndDecode(readDoc).intValue()); + Assert.assertEquals("key4_value", key4.readAndDecode(readDoc)); - readEntry.rewind(); + readDoc.rewind(); // Wrong order. - Assert.assertEquals("key1_value", key4.readAndDecode(readEntry)); - Assert.assertEquals("key2_value", key1.readAndDecode(readEntry)); - Assert.assertEquals(key3NumberValue, key3.readAndDecode(readEntry).intValue()); - Assert.assertEquals("key4_value", key2.readAndDecode(readEntry)); + Assert.assertEquals("key1_value", key4.readAndDecode(readDoc)); + Assert.assertEquals("key2_value", key1.readAndDecode(readDoc)); + Assert.assertEquals(key3NumberValue, key3.readAndDecode(readDoc).intValue()); + Assert.assertEquals("key4_value", key2.readAndDecode(readDoc)); - readEntry.rewind(); + readDoc.rewind(); // Random field access order - Assert.assertEquals("key4_value", dataSchema.readFieldAndDecode(readEntry, key4)); - Assert.assertEquals("key1_value", dataSchema.readFieldAndDecode(readEntry, key1)); - Assert.assertEquals(key3NumberValue, dataSchema.readFieldAndDecode(readEntry, key3).intValue()); - Assert.assertEquals("key2_value", dataSchema.readFieldAndDecode(readEntry, key2)); + Assert.assertEquals("key4_value", dataSchema.readFieldAndDecode(readDoc, key4)); + Assert.assertEquals("key1_value", dataSchema.readFieldAndDecode(readDoc, key1)); + Assert.assertEquals(key3NumberValue, dataSchema.readFieldAndDecode(readDoc, key3).intValue()); + Assert.assertEquals("key2_value", dataSchema.readFieldAndDecode(readDoc, key2)); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchemaTest.java similarity index 93% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchemaTest.java index 4d93133a1d3..a7d14f6e6a8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchemaTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchemaTest.java @@ -14,7 +14,7 @@ import static org.junit.Assert.*; @Category(ShortTests.class) -public class FileDataIndexSchemaTest { +public class FileDataSchemaTest { @Before public void setUp() throws Exception { @@ -34,7 +34,7 @@ public void testOriginalCallConverter() { } private static void testOriginalCallEncoding(Variant variant, OriginalCall expected) { - FileDataIndexSchema.VariantOriginalCallToBytesConverter cpair = new FileDataIndexSchema.VariantOriginalCallToBytesConverter(); + FileDataSchema.VariantOriginalCallToBytesConverter cpair = new FileDataSchema.VariantOriginalCallToBytesConverter(); Pair pair = cpair.to(Pair.of(variant, expected)); System.out.println("Bytes2 length : " + pair.getValue().limit()); From f5fd1aa1fad57ac0d0b81dd198fba7fba80152e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 18 Oct 2024 15:25:08 +0100 Subject: [PATCH 13/19] storage: Rename IndexFieldConfiguration with FieldConfiguration #TASK-6765 --- ...iguration.java => FieldConfiguration.java} | 24 ++--- .../storage/SampleIndexConfiguration.java | 98 +++++++++---------- .../storage/FieldConfigurationTest.java | 28 ++++++ .../storage/IndexFieldConfigurationTest.java | 28 ------ .../query/executors/accumulators/Range.java | 10 +- .../executors/accumulators/RangeTest.java | 6 +- .../SampleIndexOnlyVariantQueryExecutor.java | 6 +- .../index/annotation/BiotypeIndexSchema.java | 4 +- .../index/annotation/ClinicalIndexSchema.java | 4 +- .../ConsequenceTypeIndexSchema.java | 4 +- .../PopulationFrequencyIndexSchema.java | 4 +- .../annotation/TranscriptFlagIndexSchema.java | 4 +- .../variant/index/core/AbstractField.java | 12 +-- .../variant/index/core/AbstractSchema.java | 4 +- .../index/core/CategoricalIndexField.java | 10 +- .../CategoricalMultiValuedIndexField.java | 10 +- .../hadoop/variant/index/core/DataField.java | 4 +- .../variant/index/core/DataFieldBase.java | 4 +- .../index/core/DataFieldWithContext.java | 4 +- .../hadoop/variant/index/core/DataSchema.java | 4 +- .../hadoop/variant/index/core/IndexField.java | 4 +- .../variant/index/core/IntegerDataField.java | 4 +- .../variant/index/core/RangeIndexField.java | 8 +- .../index/core/VarBinaryDataField.java | 4 +- .../variant/index/core/VarCharDataField.java | 4 +- .../variant/index/core/VarIntDataField.java | 4 +- .../variant/index/core/VarSIntDataField.java | 4 +- .../index/core/VariableWidthDataField.java | 4 +- .../index/query/SampleFileIndexQuery.java | 8 +- .../variant/index/sample/FileDataSchema.java | 6 +- .../variant/index/sample/FileIndexSchema.java | 26 ++--- .../index/sample/SampleIndexQueryParser.java | 12 +-- .../SampleVariantIndexEntryConverter.java | 6 +- .../index/core/CategoricalIndexFieldTest.java | 32 +++--- .../CombinationTripleIndexSchemaTest.java | 6 +- .../variant/index/core/DataSchemaTest.java | 10 +- .../index/core/RangeIndexFieldTest.java | 38 +++---- .../filters/RangeIndexFieldFilterTest.java | 10 +- .../sample/SampleIndexQueryParserTest.java | 44 ++++----- .../variant/index/sample/SampleIndexTest.java | 12 +-- .../SampleVariantIndexEntryConverterTest.java | 14 +-- 41 files changed, 266 insertions(+), 266 deletions(-) rename opencga-core/src/main/java/org/opencb/opencga/core/config/storage/{IndexFieldConfiguration.java => FieldConfiguration.java} (89%) create mode 100644 opencga-core/src/test/java/org/opencb/opencga/core/config/storage/FieldConfigurationTest.java delete mode 100644 opencga-core/src/test/java/org/opencb/opencga/core/config/storage/IndexFieldConfigurationTest.java diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/FieldConfiguration.java similarity index 89% rename from opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java rename to opencga-core/src/main/java/org/opencb/opencga/core/config/storage/FieldConfiguration.java index 12578efab7a..98323eca559 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/IndexFieldConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/FieldConfiguration.java @@ -7,7 +7,7 @@ import java.beans.ConstructorProperties; import java.util.*; -public class IndexFieldConfiguration { +public class FieldConfiguration { protected final Source source; protected final String key; protected Type type; @@ -16,7 +16,7 @@ public class IndexFieldConfiguration { protected Map> valuesMapping; protected boolean nullable = true; - public IndexFieldConfiguration(IndexFieldConfiguration other) { + public FieldConfiguration(FieldConfiguration other) { this.source = other.source; this.key = other.key; this.type = other.type; @@ -26,17 +26,17 @@ public IndexFieldConfiguration(IndexFieldConfiguration other) { } @ConstructorProperties({"source", "key", "type"}) - protected IndexFieldConfiguration(Source source, String key, Type type) { + protected FieldConfiguration(Source source, String key, Type type) { this.source = source; this.key = key; this.type = type; } - public IndexFieldConfiguration(Source source, String key, double[] thresholds) { + public FieldConfiguration(Source source, String key, double[] thresholds) { this(source, key, thresholds, Type.RANGE_LT); } - public IndexFieldConfiguration(Source source, String key, double[] thresholds, Type rangeType) { + public FieldConfiguration(Source source, String key, double[] thresholds, Type rangeType) { this.key = key; this.source = source; this.type = rangeType; @@ -44,7 +44,7 @@ public IndexFieldConfiguration(Source source, String key, double[] thresholds, T this.values = null; } - public IndexFieldConfiguration(Source source, String key, Type type, String... values) { + public FieldConfiguration(Source source, String key, Type type, String... values) { this.key = key; this.source = source; this.type = type; @@ -69,7 +69,7 @@ public Type getType() { return type; } - public IndexFieldConfiguration setType(Type type) { + public FieldConfiguration setType(Type type) { this.type = type; return this; } @@ -78,7 +78,7 @@ public double[] getThresholds() { return thresholds; } - public IndexFieldConfiguration setThresholds(double[] thresholds) { + public FieldConfiguration setThresholds(double[] thresholds) { this.thresholds = thresholds; return this; } @@ -87,7 +87,7 @@ public String[] getValues() { return values; } - public IndexFieldConfiguration setValues(String... values) { + public FieldConfiguration setValues(String... values) { this.values = values; return this; } @@ -96,7 +96,7 @@ public Map> getValuesMapping() { return valuesMapping; } - public IndexFieldConfiguration setValuesMapping(Map> valuesMapping) { + public FieldConfiguration setValuesMapping(Map> valuesMapping) { this.valuesMapping = valuesMapping; return this; } @@ -105,7 +105,7 @@ public boolean getNullable() { return nullable; } - public IndexFieldConfiguration setNullable(boolean nullable) { + public FieldConfiguration setNullable(boolean nullable) { this.nullable = nullable; return this; } @@ -223,7 +223,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - IndexFieldConfiguration that = (IndexFieldConfiguration) o; + FieldConfiguration that = (FieldConfiguration) o; return source == that.source && Objects.equals(key, that.key) && type == that.type diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java index 6003423b0fc..2a60326264e 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java @@ -39,15 +39,15 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) ? ParamConstants.POP_FREQ_1000G_CB_V4 : ParamConstants.POP_FREQ_1000G_CB_V5, "ALL")) .addPopulation(new Population(ParamConstants.POP_FREQ_GNOMAD_GENOMES, "ALL")) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.FILE, + .addFileIndexField(new FieldConfiguration( + FieldConfiguration.Source.FILE, StudyEntry.FILTER, - IndexFieldConfiguration.Type.CATEGORICAL, + FieldConfiguration.Type.CATEGORICAL, VCFConstants.PASSES_FILTERS_v4)) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL, QUAL_THRESHOLDS).setNullable(false)) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY, DP_THRESHOLDS_NULLABLE).setNullable(true)); + .addFileIndexField(new FieldConfiguration( + FieldConfiguration.Source.FILE, StudyEntry.QUAL, QUAL_THRESHOLDS).setNullable(false)) + .addFileIndexField(new FieldConfiguration( + FieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY, DP_THRESHOLDS_NULLABLE).setNullable(true)); sampleIndexConfiguration.getFileIndexConfiguration() .setFilePositionBits(DEFAULT_FILE_POSITION_SIZE_BITS); @@ -55,9 +55,9 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) .setIncludeOriginalCall(true) .setIncludeSecondaryAlternates(true); - IndexFieldConfiguration biotypeConfiguration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.ANNOTATION, + FieldConfiguration biotypeConfiguration = new FieldConfiguration(FieldConfiguration.Source.ANNOTATION, "biotype", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) + FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) .setValues( NONSENSE_MEDIATED_DECAY, LINCRNA, @@ -103,10 +103,10 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) )); sampleIndexConfiguration.getAnnotationIndexConfiguration().setBiotype(biotypeConfiguration); - IndexFieldConfiguration consequenceType = new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, + FieldConfiguration consequenceType = new FieldConfiguration( + FieldConfiguration.Source.ANNOTATION, "consequenceType", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) + FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) .setValues( MISSENSE_VARIANT, FRAMESHIFT_VARIANT, @@ -137,10 +137,10 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) sampleIndexConfiguration.getAnnotationIndexConfiguration().setConsequenceType(consequenceType); sampleIndexConfiguration.getAnnotationIndexConfiguration().setTranscriptFlagIndexConfiguration( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, + new FieldConfiguration( + FieldConfiguration.Source.ANNOTATION, "transcriptFlag", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, + FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, "canonical", "MANE Select", "MANE Plus Clinical", @@ -153,18 +153,18 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) sampleIndexConfiguration.getAnnotationIndexConfiguration().setTranscriptCombination(true); sampleIndexConfiguration.getAnnotationIndexConfiguration().setClinicalSource( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, "clinicalSource", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, + new FieldConfiguration( + FieldConfiguration.Source.ANNOTATION, "clinicalSource", + FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, "clinvar", "cosmic") .setNullable(false) ); sampleIndexConfiguration.getAnnotationIndexConfiguration().setClinicalSignificance( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, "clinicalSignificance", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, + new FieldConfiguration( + FieldConfiguration.Source.ANNOTATION, "clinicalSignificance", + FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, "clinvar_" + ClinicalSignificance.benign.toString(), "clinvar_" + ClinicalSignificance.likely_benign.toString(), "clinvar_" + ClinicalSignificance.uncertain_significance.toString(), @@ -191,10 +191,10 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) public void validate(String cellbaseVersion) { addMissingValues(defaultConfiguration(cellbaseVersion)); - for (IndexFieldConfiguration customField : fileIndexConfiguration.getCustomFields()) { + for (FieldConfiguration customField : fileIndexConfiguration.getCustomFields()) { customField.validate(); } - for (IndexFieldConfiguration configuration : annotationIndexConfiguration.getPopulationFrequency().toIndexFieldConfiguration()) { + for (FieldConfiguration configuration : annotationIndexConfiguration.getPopulationFrequency().toIndexFieldConfiguration()) { configuration.validate(); } annotationIndexConfiguration.biotype.validate(); @@ -296,7 +296,7 @@ public String toString() { public static class FileIndexConfiguration { - private final List customFields = new ArrayList<>(); + private final List customFields = new ArrayList<>(); private int filePositionBits = DEFAULT_FILE_POSITION_SIZE_BITS; private boolean fixedFieldsFirst = true; @@ -308,12 +308,12 @@ public FileIndexConfiguration(int filePositionBits, boolean fixedFieldsFirst) { this.fixedFieldsFirst = fixedFieldsFirst; } - public List getCustomFields() { + public List getCustomFields() { return customFields; } - public IndexFieldConfiguration getCustomField(IndexFieldConfiguration.Source source, String key) { - for (IndexFieldConfiguration s : customFields) { + public FieldConfiguration getCustomField(FieldConfiguration.Source source, String key) { + for (FieldConfiguration s : customFields) { if (s.getKey().equals(key) && s.getSource() == source) { return s; } @@ -371,11 +371,11 @@ public String toString() { public static class AnnotationIndexConfiguration { private PopulationFrequencyIndexConfiguration populationFrequency = new PopulationFrequencyIndexConfiguration(); - private IndexFieldConfiguration biotype; - private IndexFieldConfiguration consequenceType; - private IndexFieldConfiguration clinicalSource; - private IndexFieldConfiguration clinicalSignificance; - private IndexFieldConfiguration transcriptFlagIndexConfiguration; + private FieldConfiguration biotype; + private FieldConfiguration consequenceType; + private FieldConfiguration clinicalSource; + private FieldConfiguration clinicalSignificance; + private FieldConfiguration transcriptFlagIndexConfiguration; private Boolean transcriptCombination; public PopulationFrequencyIndexConfiguration getPopulationFrequency() { @@ -387,46 +387,46 @@ public AnnotationIndexConfiguration setPopulationFrequency(PopulationFrequencyIn return this; } - public IndexFieldConfiguration getBiotype() { + public FieldConfiguration getBiotype() { return biotype; } - public AnnotationIndexConfiguration setBiotype(IndexFieldConfiguration biotype) { + public AnnotationIndexConfiguration setBiotype(FieldConfiguration biotype) { this.biotype = biotype; return this; } - public IndexFieldConfiguration getConsequenceType() { + public FieldConfiguration getConsequenceType() { return consequenceType; } - public AnnotationIndexConfiguration setConsequenceType(IndexFieldConfiguration consequenceType) { + public AnnotationIndexConfiguration setConsequenceType(FieldConfiguration consequenceType) { this.consequenceType = consequenceType; return this; } - public IndexFieldConfiguration getTranscriptFlagIndexConfiguration() { + public FieldConfiguration getTranscriptFlagIndexConfiguration() { return transcriptFlagIndexConfiguration; } - public void setTranscriptFlagIndexConfiguration(IndexFieldConfiguration transcriptFlagIndexConfiguration) { + public void setTranscriptFlagIndexConfiguration(FieldConfiguration transcriptFlagIndexConfiguration) { this.transcriptFlagIndexConfiguration = transcriptFlagIndexConfiguration; } - public IndexFieldConfiguration getClinicalSource() { + public FieldConfiguration getClinicalSource() { return clinicalSource; } - public AnnotationIndexConfiguration setClinicalSource(IndexFieldConfiguration clinicalSource) { + public AnnotationIndexConfiguration setClinicalSource(FieldConfiguration clinicalSource) { this.clinicalSource = clinicalSource; return this; } - public IndexFieldConfiguration getClinicalSignificance() { + public FieldConfiguration getClinicalSignificance() { return clinicalSignificance; } - public AnnotationIndexConfiguration setClinicalSignificance(IndexFieldConfiguration clinicalSignificance) { + public AnnotationIndexConfiguration setClinicalSignificance(FieldConfiguration clinicalSignificance) { this.clinicalSignificance = clinicalSignificance; return this; } @@ -502,17 +502,17 @@ public PopulationFrequencyIndexConfiguration addPopulation(Population population return this; } - public List toIndexFieldConfiguration() { - List indexFieldConfigurations = new ArrayList<>(populations.size()); + public List toIndexFieldConfiguration() { + List fieldConfigurations = new ArrayList<>(populations.size()); for (Population population : populations) { - indexFieldConfigurations.add(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, + fieldConfigurations.add(new FieldConfiguration( + FieldConfiguration.Source.ANNOTATION, population.getKey(), - IndexFieldConfiguration.Type.RANGE_LT) + FieldConfiguration.Type.RANGE_LT) .setNullable(false) .setThresholds(thresholds)); } - return indexFieldConfigurations; + return fieldConfigurations; } @Override @@ -617,7 +617,7 @@ public FileDataConfiguration getFileDataConfiguration() { return fileDataConfiguration; } - public SampleIndexConfiguration addFileIndexField(IndexFieldConfiguration fileIndex) { + public SampleIndexConfiguration addFileIndexField(FieldConfiguration fileIndex) { if (fileIndexConfiguration.getCustomFields().contains(fileIndex)) { throw new IllegalArgumentException("Duplicated file index '" + fileIndex.getKey() + "' in SampleIndexConfiguration"); diff --git a/opencga-core/src/test/java/org/opencb/opencga/core/config/storage/FieldConfigurationTest.java b/opencga-core/src/test/java/org/opencb/opencga/core/config/storage/FieldConfigurationTest.java new file mode 100644 index 00000000000..1b2f0a03740 --- /dev/null +++ b/opencga-core/src/test/java/org/opencb/opencga/core/config/storage/FieldConfigurationTest.java @@ -0,0 +1,28 @@ +package org.opencb.opencga.core.config.storage; + +import com.fasterxml.jackson.core.JsonProcessingException; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.testclassification.duration.ShortTests; + +import static org.junit.Assert.*; + +@Category(ShortTests.class) +public class FieldConfigurationTest { + + @Test + public void testDeserialize() throws JsonProcessingException { + assertEquals(FieldConfiguration.Type.RANGE_LT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE\"", FieldConfiguration.Type.class)); + assertEquals(FieldConfiguration.Type.RANGE_LT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_LT\"", FieldConfiguration.Type.class)); + assertEquals(FieldConfiguration.Type.RANGE_LT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_GE\"", FieldConfiguration.Type.class)); + + assertEquals(FieldConfiguration.Type.RANGE_GT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_GT\"", FieldConfiguration.Type.class)); + assertEquals(FieldConfiguration.Type.RANGE_GT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_LE\"", FieldConfiguration.Type.class)); + assertEquals(FieldConfiguration.Type.RANGE_GT, JacksonUtils.getDefaultObjectMapper().readValue("\"rangeLe\"", FieldConfiguration.Type.class)); + + assertEquals(FieldConfiguration.Type.CATEGORICAL, JacksonUtils.getDefaultObjectMapper().readValue("\"Categorical\"", FieldConfiguration.Type.class)); + assertEquals(FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, JacksonUtils.getDefaultObjectMapper().readValue("\"CategoricalMultiValue\"", FieldConfiguration.Type.class)); + } + +} \ No newline at end of file diff --git a/opencga-core/src/test/java/org/opencb/opencga/core/config/storage/IndexFieldConfigurationTest.java b/opencga-core/src/test/java/org/opencb/opencga/core/config/storage/IndexFieldConfigurationTest.java deleted file mode 100644 index fe3efc3e89e..00000000000 --- a/opencga-core/src/test/java/org/opencb/opencga/core/config/storage/IndexFieldConfigurationTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.opencb.opencga.core.config.storage; - -import com.fasterxml.jackson.core.JsonProcessingException; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.common.JacksonUtils; -import org.opencb.opencga.core.testclassification.duration.ShortTests; - -import static org.junit.Assert.*; - -@Category(ShortTests.class) -public class IndexFieldConfigurationTest { - - @Test - public void testDeserialize() throws JsonProcessingException { - assertEquals(IndexFieldConfiguration.Type.RANGE_LT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE\"", IndexFieldConfiguration.Type.class)); - assertEquals(IndexFieldConfiguration.Type.RANGE_LT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_LT\"", IndexFieldConfiguration.Type.class)); - assertEquals(IndexFieldConfiguration.Type.RANGE_LT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_GE\"", IndexFieldConfiguration.Type.class)); - - assertEquals(IndexFieldConfiguration.Type.RANGE_GT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_GT\"", IndexFieldConfiguration.Type.class)); - assertEquals(IndexFieldConfiguration.Type.RANGE_GT, JacksonUtils.getDefaultObjectMapper().readValue("\"RANGE_LE\"", IndexFieldConfiguration.Type.class)); - assertEquals(IndexFieldConfiguration.Type.RANGE_GT, JacksonUtils.getDefaultObjectMapper().readValue("\"rangeLe\"", IndexFieldConfiguration.Type.class)); - - assertEquals(IndexFieldConfiguration.Type.CATEGORICAL, JacksonUtils.getDefaultObjectMapper().readValue("\"Categorical\"", IndexFieldConfiguration.Type.class)); - assertEquals(IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, JacksonUtils.getDefaultObjectMapper().readValue("\"CategoricalMultiValue\"", IndexFieldConfiguration.Type.class)); - } - -} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/Range.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/Range.java index 3e6898f24b4..4ff88fc9e6a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/Range.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/Range.java @@ -1,7 +1,7 @@ package org.opencb.opencga.storage.core.variant.query.executors.accumulators; import org.apache.commons.lang3.StringUtils; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.util.ArrayList; import java.util.LinkedList; @@ -77,18 +77,18 @@ public Range(N start, boolean startInclusive, N end, boolean endInclusive) { s = sb.toString(); } - public static List> buildRanges(IndexFieldConfiguration index) { + public static List> buildRanges(FieldConfiguration index) { return buildRanges(index, null, null); } - public static List> buildRanges(IndexFieldConfiguration index, Double min, Double max) { + public static List> buildRanges(FieldConfiguration index, Double min, Double max) { List> ranges = new LinkedList<>(); if (index.getNullable()) { ranges.add(new Range.NA<>()); } double[] thresholds = index.getThresholds(); - boolean startInclusive = index.getType() == IndexFieldConfiguration.Type.RANGE_LT; - boolean endInclusive = index.getType() == IndexFieldConfiguration.Type.RANGE_GT; + boolean startInclusive = index.getType() == FieldConfiguration.Type.RANGE_LT; + boolean endInclusive = index.getType() == FieldConfiguration.Type.RANGE_GT; ranges.add(new Range<>(min, false, thresholds[0], endInclusive)); for (int i = 1; i < thresholds.length; i++) { ranges.add(new Range<>(thresholds[i - 1], startInclusive, thresholds[i], endInclusive)); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/RangeTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/RangeTest.java index 571d09e8027..7821b45da00 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/RangeTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/accumulators/RangeTest.java @@ -2,7 +2,7 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.testclassification.duration.ShortTests; import java.util.Arrays; @@ -26,7 +26,7 @@ public void testParse() { @Test public void buildFromIndex() { - IndexFieldConfiguration configuration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "QUAL", new double[]{20d, 30d, 250d}, IndexFieldConfiguration.Type.RANGE_LT); + FieldConfiguration configuration = new FieldConfiguration(FieldConfiguration.Source.FILE, "QUAL", new double[]{20d, 30d, 250d}, FieldConfiguration.Type.RANGE_LT); List> ranges = Range.buildRanges(configuration); assertEquals(Arrays.asList( Range.parse("NA"), @@ -36,7 +36,7 @@ public void buildFromIndex() { Range.parse("[250.0, inf)") ), ranges); - configuration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "QUAL", new double[]{20d, 30d, 250d}, IndexFieldConfiguration.Type.RANGE_GT); + configuration = new FieldConfiguration(FieldConfiguration.Source.FILE, "QUAL", new double[]{20d, 30d, 250d}, FieldConfiguration.Type.RANGE_GT); ranges = Range.buildRanges(configuration); assertEquals(Arrays.asList( Range.parse("NA"), diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 840e1e1f7d1..fbfaa8634be 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -18,7 +18,7 @@ import org.opencb.commons.run.Task; import org.opencb.opencga.core.common.BatchUtils; import org.opencb.opencga.core.common.TimeUtils; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; @@ -402,9 +402,9 @@ enum FamilyRole { if (includeAll) { filterField = schema.getFileIndex() - .getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER); + .getCustomField(FieldConfiguration.Source.FILE, StudyEntry.FILTER); qualField = schema.getFileIndex() - .getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL); + .getCustomField(FieldConfiguration.Source.FILE, StudyEntry.QUAL); } else { filterField = null; qualField = null; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/BiotypeIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/BiotypeIndexSchema.java index b9d14300bbd..0353f3076c6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/BiotypeIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/BiotypeIndexSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.annotation; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.hadoop.variant.index.core.CategoricalMultiValuedIndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.SingleFieldIndexSchema; @@ -8,7 +8,7 @@ public class BiotypeIndexSchema extends SingleFieldIndexSchema> { - public BiotypeIndexSchema(IndexFieldConfiguration configuration) { + public BiotypeIndexSchema(FieldConfiguration configuration) { super(new CategoricalMultiValuedIndexField<>(configuration, 0, configuration.getValues(), configuration.getValuesMapping())); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ClinicalIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ClinicalIndexSchema.java index cced10852ab..facb2b74cbe 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ClinicalIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ClinicalIndexSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.annotation; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.hadoop.variant.index.core.CategoricalMultiValuedIndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.FixedSizeIndexSchema; @@ -11,7 +11,7 @@ public class ClinicalIndexSchema extends FixedSizeIndexSchema { private final CategoricalMultiValuedIndexField sourceField; private final CategoricalMultiValuedIndexField clinicalSignificanceField; - public ClinicalIndexSchema(IndexFieldConfiguration sourceConfiguration, IndexFieldConfiguration clinicalConfiguration) { + public ClinicalIndexSchema(FieldConfiguration sourceConfiguration, FieldConfiguration clinicalConfiguration) { sourceField = new CategoricalMultiValuedIndexField<>(sourceConfiguration, 0, sourceConfiguration.getValues()); clinicalSignificanceField = new CategoricalMultiValuedIndexField<>( clinicalConfiguration, sourceField.getBitLength(), clinicalConfiguration.getValues()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ConsequenceTypeIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ConsequenceTypeIndexSchema.java index ff27bc5c3f6..ebd74958997 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ConsequenceTypeIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/ConsequenceTypeIndexSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.annotation; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.hadoop.variant.index.core.CategoricalMultiValuedIndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.SingleFieldIndexSchema; @@ -8,7 +8,7 @@ public class ConsequenceTypeIndexSchema extends SingleFieldIndexSchema> { - public ConsequenceTypeIndexSchema(IndexFieldConfiguration ctConfiguration) { + public ConsequenceTypeIndexSchema(FieldConfiguration ctConfiguration) { super(new CategoricalMultiValuedIndexField<>(ctConfiguration, 0, ctConfiguration.getValues(), ctConfiguration.getValuesMapping())); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/PopulationFrequencyIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/PopulationFrequencyIndexSchema.java index 9073dd5bc40..cd47629f077 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/PopulationFrequencyIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/PopulationFrequencyIndexSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.annotation; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.storage.hadoop.variant.index.core.FixedSizeIndexSchema; import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; @@ -21,7 +21,7 @@ public PopulationFrequencyIndexSchema(SampleIndexConfiguration.PopulationFrequen this.populationFrequencyIndexConfiguration = populationFrequencyIndexConfiguration; populations = new HashMap<>(); int bitOffset = 0; - for (IndexFieldConfiguration configuration : populationFrequencyIndexConfiguration.toIndexFieldConfiguration()) { + for (FieldConfiguration configuration : populationFrequencyIndexConfiguration.toIndexFieldConfiguration()) { IndexField field; switch (configuration.getType()) { case RANGE_LT: diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/TranscriptFlagIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/TranscriptFlagIndexSchema.java index a8ae73ae008..b0ce06d761a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/TranscriptFlagIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/TranscriptFlagIndexSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.annotation; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.hadoop.variant.index.core.CategoricalMultiValuedIndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.SingleFieldIndexSchema; @@ -8,7 +8,7 @@ public class TranscriptFlagIndexSchema extends SingleFieldIndexSchema> { - public TranscriptFlagIndexSchema(IndexFieldConfiguration transcriptFlagConfiguration) { + public TranscriptFlagIndexSchema(FieldConfiguration transcriptFlagConfiguration) { super(new CategoricalMultiValuedIndexField<>( transcriptFlagConfiguration, 0, transcriptFlagConfiguration.getValues(), transcriptFlagConfiguration.getValuesMapping())); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java index bbc53dc0c43..87dae48f7be 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractField.java @@ -1,12 +1,12 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; public abstract class AbstractField { - protected final IndexFieldConfiguration configuration; + protected final FieldConfiguration configuration; - protected AbstractField(IndexFieldConfiguration configuration) { + protected AbstractField(FieldConfiguration configuration) { this.configuration = configuration; } @@ -14,7 +14,7 @@ public String getId() { return configuration.getId(); } - public IndexFieldConfiguration.Source getSource() { + public FieldConfiguration.Source getSource() { return configuration.getSource(); } @@ -22,11 +22,11 @@ public String getKey() { return configuration.getKey(); } - public IndexFieldConfiguration getConfiguration() { + public FieldConfiguration getConfiguration() { return configuration; } - public IndexFieldConfiguration.Type getType() { + public FieldConfiguration.Type getType() { return configuration.getType(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java index 09dc7e238f1..f4a205fa9b3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/AbstractSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.util.List; @@ -8,7 +8,7 @@ public abstract class AbstractSchema { public abstract List getFields(); - public FIELD getField(IndexFieldConfiguration.Source source, String key) { + public FIELD getField(FieldConfiguration.Source source, String key) { return getFields().stream().filter(i -> i.getSource() == source && i.getKey().equals(key)).findFirst().orElse(null); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexField.java index 59e373257d7..2dd8caad82b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexField.java @@ -1,7 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; import org.apache.commons.lang3.StringUtils; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.variant.query.OpValue; import org.opencb.opencga.storage.hadoop.variant.index.IndexUtils; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter; @@ -22,15 +22,15 @@ public class CategoricalIndexField extends IndexField implements IndexCode private final int bitLength; private final IndexCodec codec; - public static CategoricalIndexField create(IndexFieldConfiguration configuration, int bitOffset) { + public static CategoricalIndexField create(FieldConfiguration configuration, int bitOffset) { return new CategoricalIndexField<>(configuration, bitOffset, configuration.getValues(), configuration.getValuesMapping()); } - public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values) { + public CategoricalIndexField(FieldConfiguration configuration, int bitOffset, T[] values) { this(configuration, bitOffset, values, null); } - public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values, Map> valuesMapping) { + public CategoricalIndexField(FieldConfiguration configuration, int bitOffset, T[] values, Map> valuesMapping) { super(configuration, bitOffset); int numValues; if (configuration.getNullable()) { @@ -43,7 +43,7 @@ public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffse this.bitLength = Math.max(1, IndexUtils.log2(numValues - 1) + 1); } - public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffset, int numValues, IndexCodec codec) { + public CategoricalIndexField(FieldConfiguration configuration, int bitOffset, int numValues, IndexCodec codec) { super(configuration, bitOffset); this.bitLength = IndexUtils.log2(numValues - 1) + 1; this.codec = codec; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalMultiValuedIndexField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalMultiValuedIndexField.java index e7bafeb4b11..184e2f01641 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalMultiValuedIndexField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalMultiValuedIndexField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.variant.query.OpValue; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.MaskIndexFieldFilter; @@ -22,7 +22,7 @@ public class CategoricalMultiValuedIndexField extends CategoricalIndexField createMultiValued(IndexFieldConfiguration configuration, int bitOffset) { + public static CategoricalMultiValuedIndexField createMultiValued(FieldConfiguration configuration, int bitOffset) { return new CategoricalMultiValuedIndexField<>( configuration, bitOffset, @@ -30,16 +30,16 @@ public static CategoricalMultiValuedIndexField createMultiValued(IndexFi configuration.getValuesMapping()); } - public CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values) { + public CategoricalMultiValuedIndexField(FieldConfiguration configuration, int bitOffset, T[] values) { this(configuration, bitOffset, values, (Map>) null); } - public CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values, + public CategoricalMultiValuedIndexField(FieldConfiguration configuration, int bitOffset, T[] values, Map> valuesMapping) { this(configuration, bitOffset, values, new MaskValueCodec<>(values, valuesMapping, configuration.getNullable())); } - private CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values, MaskValueCodec codec) { + private CategoricalMultiValuedIndexField(FieldConfiguration configuration, int bitOffset, T[] values, MaskValueCodec codec) { super(configuration, bitOffset, values.length, codec); bitLength = codec.numBits; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java index 6f1fc2ed6d4..54b025eef24 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataField.java @@ -2,7 +2,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.opencb.biodata.tools.commons.BiConverter; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -15,7 +15,7 @@ */ public abstract class DataField extends DataFieldBase { - public DataField(IndexFieldConfiguration configuration) { + public DataField(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java index ad1769d6ff9..3a6cc3e6811 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldBase.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -17,7 +17,7 @@ */ public abstract class DataFieldBase extends AbstractField { - public DataFieldBase(IndexFieldConfiguration configuration) { + public DataFieldBase(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java index 5a8908f1ce8..70611e68aa1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataFieldWithContext.java @@ -1,7 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; import org.apache.commons.lang3.tuple.Pair; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -13,7 +13,7 @@ * @param Value type */ public abstract class DataFieldWithContext extends DataFieldBase> { - public DataFieldWithContext(IndexFieldConfiguration configuration) { + public DataFieldWithContext(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java index 02da5c50288..8e173c0adb0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchema.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; import java.io.ByteArrayOutputStream; @@ -36,7 +36,7 @@ public abstract class DataSchema extends AbstractSchema> { public DataSchema() { fields = new ArrayList<>(); - documentLengthField = new VarIntDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "DOC_LENGTH", null)); + documentLengthField = new VarIntDataField(new FieldConfiguration(FieldConfiguration.Source.META, "DOC_LENGTH", null)); defaultDocument = ByteBuffer.allocate(0); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java index 0d5b00758e9..63e97aab3ea 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IndexField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.variant.query.OpValue; import org.opencb.opencga.storage.core.variant.query.Values; @@ -21,7 +21,7 @@ public abstract class IndexField extends AbstractField { private final int bitOffset; - public IndexField(IndexFieldConfiguration configuration, int bitOffset) { + public IndexField(FieldConfiguration configuration, int bitOffset) { super(configuration); this.bitOffset = bitOffset; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java index 944c69b6310..5600eff773b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/IntegerDataField.java @@ -1,7 +1,7 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; import org.apache.hadoop.hbase.util.Bytes; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -10,7 +10,7 @@ public class IntegerDataField extends DataField { - public IntegerDataField(IndexFieldConfiguration configuration) { + public IntegerDataField(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexField.java index 5e37863cee7..16f09c92a4e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.variant.query.OpValue; import org.opencb.opencga.storage.core.variant.query.executors.accumulators.Range; import org.opencb.opencga.storage.hadoop.variant.index.IndexUtils; @@ -23,11 +23,11 @@ public class RangeIndexField extends IndexField { private final IndexCodec codec; private int numRanges; - public RangeIndexField(IndexFieldConfiguration configuration, int bitOffset) { + public RangeIndexField(FieldConfiguration configuration, int bitOffset) { this(configuration, bitOffset, Double.MIN_VALUE, MAX); } - public RangeIndexField(IndexFieldConfiguration configuration, int bitOffset, double minValue, double max) { + public RangeIndexField(FieldConfiguration configuration, int bitOffset, double minValue, double max) { super(configuration, bitOffset); this.thresholds = getConfiguration().getThresholds().clone(); min = minValue; @@ -42,7 +42,7 @@ public RangeIndexField(IndexFieldConfiguration configuration, int bitOffset, dou codec = new NonNullableRangeCodec(); } bitLength = Math.max(1, IndexUtils.log2(numRanges - 1) + 1); - if (configuration.getType().equals(IndexFieldConfiguration.Type.RANGE_GT)) { + if (configuration.getType().equals(FieldConfiguration.Type.RANGE_GT)) { // Add one DELTA to each value to invert ranges from [s, e) to (s, e], therefore the operation ">" is exact for (int i = 0; i < thresholds.length; i++) { thresholds[i] += DELTA; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java index d1729817039..24e739c0cad 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarBinaryDataField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -9,7 +9,7 @@ public class VarBinaryDataField extends VariableWidthDataField { private final VarIntDataField lengthField; - public VarBinaryDataField(IndexFieldConfiguration configuration) { + public VarBinaryDataField(FieldConfiguration configuration) { super(configuration); lengthField = new VarIntDataField(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java index 911f18404df..d244327b021 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarCharDataField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -16,7 +16,7 @@ public class VarCharDataField extends VariableWidthDataField { protected static final byte FIELD_SEPARATOR = (byte) 0; - public VarCharDataField(IndexFieldConfiguration configuration) { + public VarCharDataField(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java index 585ba900d69..af82961daa4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarIntDataField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -14,7 +14,7 @@ public class VarIntDataField extends VariableWidthDataField { private static final int VALUE_MASK = 0b0111_1111; private static final int CONTINUATION_BIT_MASK = 0b1000_0000; - public VarIntDataField(IndexFieldConfiguration configuration) { + public VarIntDataField(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java index 93755a6ba8b..b754ab8d7e3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VarSIntDataField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; @@ -11,7 +11,7 @@ */ public class VarSIntDataField extends VarIntDataField { - public VarSIntDataField(IndexFieldConfiguration configuration) { + public VarSIntDataField(FieldConfiguration configuration) { super(configuration); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java index 8c940f6ffcb..a1ebdce41d7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/core/VariableWidthDataField.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.core; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; /** * Data field with variable data length. @@ -8,7 +8,7 @@ */ public abstract class VariableWidthDataField extends DataField { - public VariableWidthDataField(IndexFieldConfiguration configuration) { + public VariableWidthDataField(FieldConfiguration configuration) { super(configuration); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleFileIndexQuery.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleFileIndexQuery.java index 79276ef5a6f..49e07d200cb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleFileIndexQuery.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleFileIndexQuery.java @@ -1,6 +1,6 @@ package org.opencb.opencga.storage.hadoop.variant.index.query; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter; import org.opencb.opencga.storage.hadoop.variant.index.sample.FileIndexSchema; @@ -30,14 +30,14 @@ public List getFilters() { } public IndexFieldFilter getVariantTypeFilter() { - return getFilter(IndexFieldConfiguration.Source.VARIANT, FileIndexSchema.TYPE_KEY); + return getFilter(FieldConfiguration.Source.VARIANT, FileIndexSchema.TYPE_KEY); } public IndexFieldFilter getFilePositionFilter() { - return getFilter(IndexFieldConfiguration.Source.META, FileIndexSchema.FILE_POSITION_KEY); + return getFilter(FieldConfiguration.Source.META, FileIndexSchema.FILE_POSITION_KEY); } - public IndexFieldFilter getFilter(IndexFieldConfiguration.Source source, String key) { + public IndexFieldFilter getFilter(FieldConfiguration.Source source, String key) { return filters.stream() .filter(i -> i.getIndex().getSource().equals(source) && i.getIndex().getKey().equals(key)) .findFirst() diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java index 246b2bfa581..43dc9c289ef 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java @@ -6,7 +6,7 @@ import org.opencb.biodata.models.variant.avro.AlternateCoordinate; import org.opencb.biodata.models.variant.avro.OriginalCall; import org.opencb.biodata.tools.commons.BiConverter; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory; @@ -27,7 +27,7 @@ public FileDataSchema(SampleIndexConfiguration.FileDataConfiguration fileDataCon this.fileDataConfiguration = fileDataConfiguration; if (fileDataConfiguration.isIncludeOriginalCall()) { originalCallField = new VarBinaryDataField( - new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "ORIGINAL_CALL", null)) + new FieldConfiguration(FieldConfiguration.Source.FILE, "ORIGINAL_CALL", null)) .fromWithContext(new VariantOriginalCallToBytesConverter()); addField(originalCallField); } else { @@ -35,7 +35,7 @@ public FileDataSchema(SampleIndexConfiguration.FileDataConfiguration fileDataCon } if (fileDataConfiguration.isIncludeOriginalCall()) { secondaryAlternatesField = new VarBinaryDataField( - new IndexFieldConfiguration(IndexFieldConfiguration.Source.STUDY, "SECONDARY_ALTERNATES", null)) + new FieldConfiguration(FieldConfiguration.Source.STUDY, "SECONDARY_ALTERNATES", null)) .fromWithContext(new AlternateCoordinateToBytesConverter()); addField(secondaryAlternatesField); } else { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileIndexSchema.java index a383109139d..8ec4ba88348 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileIndexSchema.java @@ -3,7 +3,7 @@ import htsjdk.variant.vcf.VCFConstants; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.avro.VariantType; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; @@ -33,7 +33,7 @@ public FileIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileIndex fileIndexConfiguration.isFixedFieldsFirst()); } - public FileIndexSchema(List customFieldConfigurations, int filePositionSizeBits, boolean fixedFieldsFirst) { + public FileIndexSchema(List customFieldConfigurations, int filePositionSizeBits, boolean fixedFieldsFirst) { if (fixedFieldsFirst) { multiFileIndex = buildMultiFile(null); filePositionIndex = buildFilePositionIndexField(multiFileIndex, filePositionSizeBits); @@ -60,13 +60,13 @@ public FileIndexSchema(List customFieldConfigurations, this.fields.addAll(fixedFields); this.fields.addAll(customFields); customFieldsSourceSample = customFields.stream() - .filter(c -> c.getSource().equals(IndexFieldConfiguration.Source.SAMPLE)) + .filter(c -> c.getSource().equals(FieldConfiguration.Source.SAMPLE)) .collect(Collectors.toList()); updateIndexSizeBits(); } - public IndexField getCustomField(IndexFieldConfiguration.Source source, String key) { + public IndexField getCustomField(FieldConfiguration.Source source, String key) { return customFields.stream().filter(i -> i.getSource().equals(source) && i.getKey().equals(key)).findFirst().orElse(null); } @@ -125,7 +125,7 @@ public static void setFilePosition(BitBuffer fileIndex, int filePosition) { protected CategoricalIndexField buildMultiFile(IndexField prevIndex) { return new CategoricalIndexField<>( - new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "multiFile", IndexFieldConfiguration.Type.CATEGORICAL) + new FieldConfiguration(FieldConfiguration.Source.FILE, "multiFile", FieldConfiguration.Type.CATEGORICAL) .setNullable(false), prevIndex == null ? 0 : (prevIndex.getBitOffset() + prevIndex.getBitLength()), new Boolean[]{false, true}); @@ -133,8 +133,8 @@ protected CategoricalIndexField buildMultiFile(IndexField prevIndex) private CategoricalIndexField buildVariantTypeIndexField(IndexField prevIndex) { return new CategoricalIndexField<>( - new IndexFieldConfiguration(IndexFieldConfiguration.Source.VARIANT, TYPE_KEY, - IndexFieldConfiguration.Type.CATEGORICAL), + new FieldConfiguration(FieldConfiguration.Source.VARIANT, TYPE_KEY, + FieldConfiguration.Type.CATEGORICAL), prevIndex == null ? 0 : (prevIndex.getBitOffset() + prevIndex.getBitLength()), VariantTypeIndexCodec.TYPE_NUM_VALUES, new VariantTypeIndexCodec()); } @@ -142,8 +142,8 @@ private CategoricalIndexField buildVariantTypeIndexField(IndexField private static CategoricalIndexField buildFilePositionIndexField(IndexField prevIndex, int filePositionSize) { int maxValues = (1 << filePositionSize) - 1; return new CategoricalIndexField<>( - new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, FILE_POSITION_KEY, - IndexFieldConfiguration.Type.CATEGORICAL), + new FieldConfiguration(FieldConfiguration.Source.META, FILE_POSITION_KEY, + FieldConfiguration.Type.CATEGORICAL), prevIndex == null ? 0 : (prevIndex.getBitOffset() + prevIndex.getBitLength()), maxValues, new IndexCodec() { @Override @@ -168,7 +168,7 @@ public boolean ambiguous(int code) { } private static List> buildCustomIndexFields(List> fixedIndexFields, - List configurations) { + List configurations) { int bitOffset = 0; for (IndexField indexField : fixedIndexFields) { if (indexField.getBitOffset() != bitOffset) { @@ -179,7 +179,7 @@ private static List> buildCustomIndexFields(List> list = new ArrayList<>(); - for (IndexFieldConfiguration conf : configurations) { + for (FieldConfiguration conf : configurations) { IndexField stringIndexField = buildCustomIndexField(conf, bitOffset); list.add(stringIndexField); bitOffset += stringIndexField.getBitLength(); @@ -187,7 +187,7 @@ private static List> buildCustomIndexFields(List buildCustomIndexField(IndexFieldConfiguration conf, int bitOffset) { + private static IndexField buildCustomIndexField(FieldConfiguration conf, int bitOffset) { switch (conf.getType()) { case RANGE_LT: case RANGE_GT: @@ -205,7 +205,7 @@ private static IndexField buildCustomIndexField(IndexFieldConfiguration case CATEGORICAL: return new CategoricalIndexField<>(conf, bitOffset, conf.getValues()); case CATEGORICAL_MULTI_VALUE: - if (conf.getSource() == IndexFieldConfiguration.Source.FILE && conf.getKey().equals(StudyEntry.FILTER)) { + if (conf.getSource() == FieldConfiguration.Source.FILE && conf.getKey().equals(StudyEntry.FILTER)) { return new CategoricalMultiValuedIndexField<>(conf, bitOffset, conf.getValues()) .from(s -> { if (s == null || s.isEmpty() || s.equals(VCFConstants.MISSING_VALUE_v4)) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java index 027d241282c..2fc267b7ff8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java @@ -8,7 +8,7 @@ import org.opencb.biodata.models.variant.annotation.ConsequenceTypeMappings; import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.commons.datastore.core.Query; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; @@ -329,7 +329,7 @@ public SampleIndexQuery parse(Query query) { // SampleData filter exists for this sample! // Check if ANY filter is covered by the index for (KeyOpValue entry : sampleDataFilter.getValues()) { - if (schema.getFileIndex().getField(IndexFieldConfiguration.Source.SAMPLE, entry.getKey()) != null) { + if (schema.getFileIndex().getField(FieldConfiguration.Source.SAMPLE, entry.getKey()) != null) { // This key is covered by the sample index. Do not discard this parent! discardParent = false; break; @@ -1082,7 +1082,7 @@ protected SampleFileIndexQuery parseFileQuery(SampleIndexSchema schema, Query qu if (isValidParam(query, FILTER)) { IndexField filterIndexField = schema.getFileIndex() - .getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER); + .getCustomField(FieldConfiguration.Source.FILE, StudyEntry.FILTER); if (filterIndexField != null) { Values filterValues = splitValue(query, FILTER); IndexFieldFilter indexFieldFilter = filterIndexField.buildFilter(filterValues.getOperation(), filterValues.getValues()); @@ -1095,7 +1095,7 @@ protected SampleFileIndexQuery parseFileQuery(SampleIndexSchema schema, Query qu if (isValidParam(query, QUAL)) { IndexField qualIndexField = schema.getFileIndex() - .getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL); + .getCustomField(FieldConfiguration.Source.FILE, StudyEntry.QUAL); if (qualIndexField != null) { OpValue opValue = parseOpValue(query.getString(QUAL.key())); IndexFieldFilter indexFieldFilter = qualIndexField.buildFilter(opValue); @@ -1128,7 +1128,7 @@ protected SampleFileIndexQuery parseFileQuery(SampleIndexSchema schema, Query qu } for (KeyOpValue keyOpValue : keyValues.getValues()) { IndexField fileDataIndexField = schema.getFileIndex() - .getCustomField(IndexFieldConfiguration.Source.FILE, keyOpValue.getKey()); + .getCustomField(FieldConfiguration.Source.FILE, keyOpValue.getKey()); if (fileDataIndexField == null) { // Unknown key fileDataCovered = false; @@ -1159,7 +1159,7 @@ protected SampleFileIndexQuery parseFileQuery(SampleIndexSchema schema, Query qu if (!sampleDataFilter.isEmpty() && sampleDataOp != QueryOperation.OR) { for (KeyOpValue keyOpValue : sampleDataFilter) { IndexField sampleDataIndexField = schema.getFileIndex() - .getCustomField(IndexFieldConfiguration.Source.SAMPLE, keyOpValue.getKey()); + .getCustomField(FieldConfiguration.Source.SAMPLE, keyOpValue.getKey()); if (sampleDataIndexField != null) { IndexFieldFilter indexFieldFilter = sampleDataIndexField.buildFilter(keyOpValue); filtersList.add(indexFieldFilter); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java index 7b2324102bc..d7200fcc234 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java @@ -8,7 +8,7 @@ import org.opencb.biodata.models.variant.avro.FileEntry; import org.opencb.biodata.models.variant.avro.OriginalCall; import org.opencb.biodata.models.variant.avro.VariantType; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; @@ -106,9 +106,9 @@ private BitBuffer createFileIndexValue(VariantType type, int filePosition, Funct for (IndexField fileDataIndexField : fileIndex.getCustomFields()) { String key = fileDataIndexField.getKey(); String value; - if (fileDataIndexField.getSource() == IndexFieldConfiguration.Source.FILE) { + if (fileDataIndexField.getSource() == FieldConfiguration.Source.FILE) { value = fileAttributes.apply(key); - } else if (fileDataIndexField.getSource() == IndexFieldConfiguration.Source.SAMPLE) { + } else if (fileDataIndexField.getSource() == FieldConfiguration.Source.SAMPLE) { value = sampleData.apply(key); } else { throw new IllegalArgumentException("Unable to build file index with index source " diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexFieldTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexFieldTest.java index feae2ca31e6..0ba6b5615bb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexFieldTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CategoricalIndexFieldTest.java @@ -4,7 +4,7 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import org.opencb.biodata.models.variant.StudyEntry; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.io.bit.BitBuffer; @@ -17,8 +17,8 @@ import java.util.Set; import static org.junit.Assert.assertEquals; -import static org.opencb.opencga.core.config.storage.IndexFieldConfiguration.Source.FILE; -import static org.opencb.opencga.core.config.storage.IndexFieldConfiguration.Source.SAMPLE; +import static org.opencb.opencga.core.config.storage.FieldConfiguration.Source.FILE; +import static org.opencb.opencga.core.config.storage.FieldConfiguration.Source.SAMPLE; @Category(ShortTests.class) public class CategoricalIndexFieldTest { @@ -26,20 +26,20 @@ public class CategoricalIndexFieldTest { @Test public void testLength() { boolean nullable = false; - assertEquals(1, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1").setNullable(nullable), 0).getBitLength()); - assertEquals(1, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2").setNullable(nullable), 0).getBitLength()); - assertEquals(2, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3").setNullable(nullable), 0).getBitLength()); - assertEquals(2, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4").setNullable(nullable), 0).getBitLength()); - assertEquals(3, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5").setNullable(nullable), 0).getBitLength()); - assertEquals(3, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5", "6").setNullable(nullable), 0).getBitLength()); + assertEquals(1, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1").setNullable(nullable), 0).getBitLength()); + assertEquals(1, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2").setNullable(nullable), 0).getBitLength()); + assertEquals(2, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3").setNullable(nullable), 0).getBitLength()); + assertEquals(2, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4").setNullable(nullable), 0).getBitLength()); + assertEquals(3, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5").setNullable(nullable), 0).getBitLength()); + assertEquals(3, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5", "6").setNullable(nullable), 0).getBitLength()); nullable = true; - assertEquals(1, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1").setNullable(nullable), 0).getBitLength()); - assertEquals(2, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2").setNullable(nullable), 0).getBitLength()); - assertEquals(2, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3").setNullable(nullable), 0).getBitLength()); - assertEquals(3, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4").setNullable(nullable), 0).getBitLength()); - assertEquals(3, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5").setNullable(nullable), 0).getBitLength()); - assertEquals(3, CategoricalIndexField.create(new IndexFieldConfiguration(SAMPLE, "K", IndexFieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5", "6").setNullable(nullable), 0).getBitLength()); + assertEquals(1, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1").setNullable(nullable), 0).getBitLength()); + assertEquals(2, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2").setNullable(nullable), 0).getBitLength()); + assertEquals(2, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3").setNullable(nullable), 0).getBitLength()); + assertEquals(3, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4").setNullable(nullable), 0).getBitLength()); + assertEquals(3, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5").setNullable(nullable), 0).getBitLength()); + assertEquals(3, CategoricalIndexField.create(new FieldConfiguration(SAMPLE, "K", FieldConfiguration.Type.CATEGORICAL, "1", "2", "3", "4", "5", "6").setNullable(nullable), 0).getBitLength()); } @Test @@ -67,7 +67,7 @@ public void testEncodeDecodeQual() { public void testEncodeDecodeFilter() { SampleIndexConfiguration indexConfiguration = SampleIndexConfiguration.defaultConfiguration(); indexConfiguration.getFileIndexConfiguration().getCustomField(FILE, StudyEntry.FILTER) - .setType(IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) + .setType(FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) .setValues("PASS", "noPass"); SampleIndexSchema indexSchema = new SampleIndexSchema(indexConfiguration, 0); IndexField field = indexSchema.getFileIndex().getCustomField(FILE, StudyEntry.FILTER); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CombinationTripleIndexSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CombinationTripleIndexSchemaTest.java index 7d441c300da..e379a3e3613 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CombinationTripleIndexSchemaTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/CombinationTripleIndexSchemaTest.java @@ -4,7 +4,7 @@ import org.apache.commons.lang3.tuple.Triple; import org.junit.Test; import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.io.bit.BitInputStream; @@ -170,8 +170,8 @@ private void testFilter(CombinationTripleIndexSchema schema, List buildIndex(String name, boolean nullable, String... values) { - return new CategoricalMultiValuedIndexField<>(new IndexFieldConfiguration(IndexFieldConfiguration.Source.ANNOTATION, + return new CategoricalMultiValuedIndexField<>(new FieldConfiguration(FieldConfiguration.Source.ANNOTATION, name, - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, values).setNullable(nullable), 0, values); + FieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, values).setNullable(nullable), 0, values); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java index 298ba2f0b9a..3d059f600ff 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/DataSchemaTest.java @@ -3,7 +3,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.storage.core.io.bit.ExposedByteArrayOutputStream; import java.nio.ByteBuffer; @@ -19,10 +19,10 @@ public class DataSchemaTest { @Before public void setUp() throws Exception { - key1 = new VarCharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key1", null)); - key2 = new VarCharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key2", null)); - key3 = new IntegerDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key3", null)); - key4 = new VarCharDataField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.META, "key4", null)); + key1 = new VarCharDataField(new FieldConfiguration(FieldConfiguration.Source.META, "key1", null)); + key2 = new VarCharDataField(new FieldConfiguration(FieldConfiguration.Source.META, "key2", null)); + key3 = new IntegerDataField(new FieldConfiguration(FieldConfiguration.Source.META, "key3", null)); + key4 = new VarCharDataField(new FieldConfiguration(FieldConfiguration.Source.META, "key4", null)); dataSchema = new DataSchema() { { addField(key1); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexFieldTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexFieldTest.java index b7a7ec74252..3b15c8e6d71 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexFieldTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/RangeIndexFieldTest.java @@ -2,11 +2,11 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.testclassification.duration.ShortTests; import static org.junit.Assert.*; -import static org.opencb.opencga.core.config.storage.IndexFieldConfiguration.Source.SAMPLE; +import static org.opencb.opencga.core.config.storage.FieldConfiguration.Source.SAMPLE; @Category(ShortTests.class) public class RangeIndexFieldTest { @@ -14,21 +14,21 @@ public class RangeIndexFieldTest { @Test public void testLength() { boolean nullable = false; - assertEquals(1, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1}).setNullable(nullable), 0).getBitLength()); - assertEquals(2, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2}).setNullable(nullable), 0).getBitLength()); - assertEquals(2, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{2, 4, 5}).setNullable(nullable), 0).getBitLength()); - assertEquals(3, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{2, 4, 5, 6}).setNullable(nullable), 0).getBitLength()); - assertEquals(3, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 3, 4, 5, 6, 7}).setNullable(nullable), 0).getBitLength()); - assertEquals(4, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 3, 4, 5, 6, 7, 8}).setNullable(nullable), 0).getBitLength()); + assertEquals(1, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1}).setNullable(nullable), 0).getBitLength()); + assertEquals(2, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2}).setNullable(nullable), 0).getBitLength()); + assertEquals(2, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{2, 4, 5}).setNullable(nullable), 0).getBitLength()); + assertEquals(3, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{2, 4, 5, 6}).setNullable(nullable), 0).getBitLength()); + assertEquals(3, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 3, 4, 5, 6, 7}).setNullable(nullable), 0).getBitLength()); + assertEquals(4, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 3, 4, 5, 6, 7, 8}).setNullable(nullable), 0).getBitLength()); nullable = true; - assertEquals(2, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1}).setNullable(nullable), 0).getBitLength()); - assertEquals(2, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{2, 4}).setNullable(nullable), 0).getBitLength()); - assertEquals(3, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{2, 4, 5}).setNullable(nullable), 0).getBitLength()); - assertEquals(3, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3}).setNullable(nullable), 0).getBitLength()); - assertEquals(3, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}).setNullable(nullable), 0).getBitLength()); - assertEquals(3, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4, 5}).setNullable(nullable), 0).getBitLength()); - assertEquals(4, new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4, 5, 6}).setNullable(nullable), 0).getBitLength()); + assertEquals(2, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1}).setNullable(nullable), 0).getBitLength()); + assertEquals(2, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{2, 4}).setNullable(nullable), 0).getBitLength()); + assertEquals(3, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{2, 4, 5}).setNullable(nullable), 0).getBitLength()); + assertEquals(3, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3}).setNullable(nullable), 0).getBitLength()); + assertEquals(3, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}).setNullable(nullable), 0).getBitLength()); + assertEquals(3, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4, 5}).setNullable(nullable), 0).getBitLength()); + assertEquals(4, new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4, 5, 6}).setNullable(nullable), 0).getBitLength()); } @Test @@ -42,9 +42,9 @@ public void testNumericMethods() { @Test public void testGetRanges() { - System.out.println(new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, IndexFieldConfiguration.Type.RANGE_GT).setNullable(false), 0).getRanges()); - System.out.println(new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, IndexFieldConfiguration.Type.RANGE_GT).setNullable(true), 0).getRanges()); - System.out.println(new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, IndexFieldConfiguration.Type.RANGE_LT).setNullable(false), 0).getRanges()); - System.out.println(new RangeIndexField(new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, IndexFieldConfiguration.Type.RANGE_LT).setNullable(true), 0).getRanges()); + System.out.println(new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, FieldConfiguration.Type.RANGE_GT).setNullable(false), 0).getRanges()); + System.out.println(new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, FieldConfiguration.Type.RANGE_GT).setNullable(true), 0).getRanges()); + System.out.println(new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, FieldConfiguration.Type.RANGE_LT).setNullable(false), 0).getRanges()); + System.out.println(new RangeIndexField(new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3, 4}, FieldConfiguration.Type.RANGE_LT).setNullable(true), 0).getRanges()); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/filters/RangeIndexFieldFilterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/filters/RangeIndexFieldFilterTest.java index bad33903caf..b672bd9a865 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/filters/RangeIndexFieldFilterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/core/filters/RangeIndexFieldFilterTest.java @@ -2,13 +2,13 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.variant.query.OpValue; import org.opencb.opencga.storage.hadoop.variant.index.core.RangeIndexField; import static org.junit.Assert.*; -import static org.opencb.opencga.core.config.storage.IndexFieldConfiguration.Source.SAMPLE; +import static org.opencb.opencga.core.config.storage.FieldConfiguration.Source.SAMPLE; @Category(ShortTests.class) public class RangeIndexFieldFilterTest { @@ -35,7 +35,7 @@ public void testExactGtNullable() { public void testExactLt(boolean nullable) { RangeIndexField field = new RangeIndexField( - new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3}).setNullable(nullable), 0); + new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3}).setNullable(nullable), 0); assertFalse(field.buildFilter(new OpValue<>("==", 1d)).isExactFilter()); @@ -86,8 +86,8 @@ public void testExactLt(boolean nullable) { public void testExactGt(boolean nullable) { RangeIndexField field = new RangeIndexField( - new IndexFieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3}, - IndexFieldConfiguration.Type.RANGE_GT).setNullable(nullable), 0); + new FieldConfiguration(SAMPLE, "K", new double[]{1, 2, 2, 3}, + FieldConfiguration.Type.RANGE_GT).setNullable(nullable), 0); assertFalse(field.buildFilter(new OpValue<>("==", 1d)).isExactFilter()); assertFalse(field.buildFilter(new OpValue<>("==", 1.999d)).isExactFilter()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 99cf76e71cc..82a1e43a53b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -14,7 +14,7 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.api.ParamConstants; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; import org.opencb.opencga.core.testclassification.duration.ShortTests; @@ -78,8 +78,8 @@ public void setUp() throws Exception { schema = new SampleIndexSchema(configuration, StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION); fileIndex = schema.getFileIndex(); - qualThresholds = fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).getConfiguration().getThresholds(); - dpThresholds = fileIndex.getCustomField(IndexFieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY).getConfiguration().getThresholds(); + qualThresholds = fileIndex.getCustomField(FieldConfiguration.Source.FILE, StudyEntry.QUAL).getConfiguration().getThresholds(); + dpThresholds = fileIndex.getCustomField(FieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY).getConfiguration().getThresholds(); DummyVariantStorageMetadataDBAdaptorFactory.clear(); mm = new VariantStorageMetadataManager(new DummyVariantStorageMetadataDBAdaptorFactory()); @@ -358,7 +358,7 @@ public void parseFileQualTest() { } protected void checkQualFilter(String message, double minValueInclusive, double maxValueExclusive, SampleFileIndexQuery fileQuery) { - RangeIndexFieldFilter qualQuery = (RangeIndexFieldFilter) fileQuery.getFilter(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL); + RangeIndexFieldFilter qualQuery = (RangeIndexFieldFilter) fileQuery.getFilter(FieldConfiguration.Source.FILE, StudyEntry.QUAL); assertEquals(message, minValueInclusive, qualQuery.getMinValueInclusive(), 0); assertEquals(message, maxValueExclusive, qualQuery.getMaxValueExclusive(), 0); @@ -399,7 +399,7 @@ public void parseFileDPTest() { protected void checkDPFilter(String message, double minValueInclusive, double maxValueExclusive, SampleFileIndexQuery fileQuery) { RangeIndexFieldFilter dpFilter = getDPFilter(fileQuery); - IndexField dpField = fileIndex.getCustomField(IndexFieldConfiguration.Source.SAMPLE, "DP"); + IndexField dpField = fileIndex.getCustomField(FieldConfiguration.Source.SAMPLE, "DP"); assertEquals(message, minValueInclusive, dpFilter.getMinValueInclusive(), 0); assertEquals(message, maxValueExclusive, dpFilter.getMaxValueExclusive(), 0); @@ -408,7 +408,7 @@ protected void checkDPFilter(String message, double minValueInclusive, double ma } private RangeIndexFieldFilter getDPFilter(SampleFileIndexQuery fileQuery) { - return (RangeIndexFieldFilter) fileQuery.getFilter(IndexFieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY); + return (RangeIndexFieldFilter) fileQuery.getFilter(FieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY); } @Test @@ -1018,7 +1018,7 @@ public void parseSampleDataTest() { assertEquals(1, fileQueriesS1.size()); assertNotNull(fileQueriesS1.get(0).getVariantTypeFilter()); - assertFalse(fileQueriesS1.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertFalse(fileQueriesS1.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); assertNull(fileQueriesS1.get(0).getFilePositionFilter()); assertNull(sampleIndexQuery.getVariantTypes()); @@ -1043,13 +1043,13 @@ public void parseSampleDataTest_samples_and() { assertEquals(2, fileQueriesS1.get(0).getFilters().size()); assertFalse(fileQueriesS1.get(0).getVariantTypeFilter().isNoOp()); assertNull(fileQueriesS1.get(0).getFilePositionFilter()); - assertFalse(fileQueriesS1.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertFalse(fileQueriesS1.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); assertEquals(1, fileQueriesS2.size()); assertEquals(2, fileQueriesS2.get(0).getFilters().size()); assertFalse(fileQueriesS1.get(0).getVariantTypeFilter().isNoOp()); assertNull(fileQueriesS2.get(0).getFilePositionFilter()); - assertFalse(fileQueriesS2.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertFalse(fileQueriesS2.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); assertNull(sampleIndexQuery.getVariantTypes()); } @@ -1073,16 +1073,16 @@ public void parseSampleDataTest_samples_and_non_covered() { assertEquals(2, fileQueriesS1.get(0).getFilters().size()); assertNotNull(fileQueriesS1.get(0).getVariantTypeFilter()); assertNull(fileQueriesS1.get(0).getFilePositionFilter()); - assertFalse(fileQueriesS1.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); - assertEquals(7, ((RangeIndexFieldFilter) fileQueriesS1.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP")).getMinValueInclusive(), 0.000001); + assertFalse(fileQueriesS1.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertEquals(7, ((RangeIndexFieldFilter) fileQueriesS1.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP")).getMinValueInclusive(), 0.000001); Values fileQueriesS2 = sampleIndexQuery.getSampleFileIndexQuery("S2"); assertEquals(1, fileQueriesS2.size()); assertEquals(2, fileQueriesS2.get(0).getFilters().size()); assertNotNull(fileQueriesS1.get(0).getVariantTypeFilter()); assertNull(fileQueriesS2.get(0).getFilePositionFilter()); - assertFalse(fileQueriesS2.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); - assertEquals(12, ((RangeIndexFieldFilter) fileQueriesS2.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP")).getMinValueInclusive(), 0.000001); + assertFalse(fileQueriesS2.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertEquals(12, ((RangeIndexFieldFilter) fileQueriesS2.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP")).getMinValueInclusive(), 0.000001); assertNull(sampleIndexQuery.getVariantTypes()); } @@ -1102,13 +1102,13 @@ public void parseSampleDataTest_samples_or() { Values fileQueriesS1 = sampleIndexQuery.getSampleFileIndexQuery("S1"); assertEquals(1, fileQueriesS1.size()); assertNotNull(fileQueriesS1.get(0).getVariantTypeFilter()); - assertFalse(fileQueriesS1.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertFalse(fileQueriesS1.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); assertNull(fileQueriesS1.get(0).getFilePositionFilter()); Values fileQueriesS2 = sampleIndexQuery.getSampleFileIndexQuery("S2"); assertEquals(1, fileQueriesS2.size()); assertNotNull(fileQueriesS2.get(0).getVariantTypeFilter()); - assertFalse(fileQueriesS2.get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isNoOp()); + assertFalse(fileQueriesS2.get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isNoOp()); assertNull(fileQueriesS2.get(0).getFilePositionFilter()); assertNull(sampleIndexQuery.getVariantTypes()); @@ -1213,9 +1213,9 @@ public void parseFamilyQuery_dp() { assertEquals(new HashSet<>(Arrays.asList("fam1_child", "fam1_father", "fam1_mother")), indexQuery.getSamplesMap().keySet()); // Still using parent's filter assertEquals(1, indexQuery.getFatherFilterMap().size()); - assertTrue(indexQuery.getSampleFileIndexQuery("fam1_child").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); - assertTrue(indexQuery.getSampleFileIndexQuery("fam1_father").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); - assertTrue(indexQuery.getSampleFileIndexQuery("fam1_mother").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertTrue(indexQuery.getSampleFileIndexQuery("fam1_child").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertTrue(indexQuery.getSampleFileIndexQuery("fam1_father").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertTrue(indexQuery.getSampleFileIndexQuery("fam1_mother").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); assertEquals("", query.getString(SAMPLE_DATA.key())); } @@ -1233,8 +1233,8 @@ public void parseFamilyQuery_dp_partial() { assertEquals(new HashSet<>(Arrays.asList("fam1_child", "fam1_father")), indexQuery.getSamplesMap().keySet()); // Still using parent's filter assertEquals(1, indexQuery.getFatherFilterMap().size()); - assertTrue(indexQuery.getSampleFileIndexQuery("fam1_child").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); - assertTrue(indexQuery.getSampleFileIndexQuery("fam1_father").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertTrue(indexQuery.getSampleFileIndexQuery("fam1_child").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertTrue(indexQuery.getSampleFileIndexQuery("fam1_father").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); assertEquals("", query.getString(SAMPLE_DATA.key())); } @@ -1254,8 +1254,8 @@ public void parseFamilyQuery_dp_partial_no_exact() { assertEquals(new HashSet<>(Arrays.asList("fam1_child", "fam1_father")), indexQuery.getSamplesMap().keySet()); // Still using parent's filter assertEquals(1, indexQuery.getFatherFilterMap().size()); - assertTrue(indexQuery.getSampleFileIndexQuery("fam1_child").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); - assertFalse(indexQuery.getSampleFileIndexQuery("fam1_father").get(0).getFilter(IndexFieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertTrue(indexQuery.getSampleFileIndexQuery("fam1_child").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); + assertFalse(indexQuery.getSampleFileIndexQuery("fam1_father").get(0).getFilter(FieldConfiguration.Source.SAMPLE, "DP").isExactFilter()); assertEquals("fam1_father:DP>18", query.getString(SAMPLE_DATA.key())); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index f00210c8b95..7e7d7b1cfc1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -23,7 +23,7 @@ import org.opencb.commons.datastore.core.*; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.YesNoAuto; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.config.storage.SampleIndexConfiguration; import org.opencb.opencga.core.models.variant.VariantAnnotationConstants; import org.opencb.opencga.core.testclassification.duration.LongTests; @@ -156,7 +156,7 @@ public void load() throws Exception { .append(VariantStorageOptions.LOAD_SPLIT_DATA.key(), VariantStorageEngine.SplitData.MULTI); SampleIndexConfiguration configuration = SampleIndexConfiguration.defaultConfiguration() - .addFileIndexField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.SAMPLE, "DS", new double[]{0, 1, 2})); + .addFileIndexField(new FieldConfiguration(FieldConfiguration.Source.SAMPLE, "DS", new double[]{0, 1, 2})); configuration.getFileDataConfiguration() .setIncludeOriginalCall(null) .setIncludeSecondaryAlternates(null); @@ -177,8 +177,8 @@ public void load() throws Exception { // Study 3 - platinum metadataManager.addSampleIndexConfiguration(STUDY_NAME_3, SampleIndexConfiguration.defaultConfiguration() - .addFileIndexField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "culprit", - IndexFieldConfiguration.Type.CATEGORICAL, "DP", "FS", "MQ", "QD").setNullable(true)), true); + .addFileIndexField(new FieldConfiguration(FieldConfiguration.Source.FILE, "culprit", + FieldConfiguration.Type.CATEGORICAL, "DP", "FS", "MQ", "QD").setNullable(true)), true); params = new ObjectMap() .append(VariantStorageOptions.STUDY.key(), STUDY_NAME_3) @@ -210,7 +210,7 @@ public void load() throws Exception { // Study 6, multiallelic SampleIndexConfiguration sampleIndexConfiguration = SampleIndexConfiguration.defaultConfiguration(); - sampleIndexConfiguration.getFileIndexConfiguration().getCustomField(IndexFieldConfiguration.Source.FILE, "FILTER") + sampleIndexConfiguration.getFileIndexConfiguration().getCustomField(FieldConfiguration.Source.FILE, "FILTER") .setValues("PASS", "noPass", "noPass2"); engine.getMetadataManager().addSampleIndexConfiguration(STUDY_NAME_6, sampleIndexConfiguration, true); @@ -885,7 +885,7 @@ public void testAggregationCorrectnessCt() throws Exception { SampleIndexSchema schema = sampleIndexDBAdaptor.getSchemaLatest(STUDY_NAME_3); CategoricalMultiValuedIndexField field = schema.getCtIndex().getField(); - IndexFieldConfiguration ctConf = field.getConfiguration(); + FieldConfiguration ctConf = field.getConfiguration(); List cts = new ArrayList<>(); for (String ct : ctConf.getValues()) { if (!field.ambiguous(Collections.singletonList(ct))) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java index d7aeb1890ca..5199fbc4f45 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java @@ -7,7 +7,7 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.avro.VariantType; -import org.opencb.opencga.core.config.storage.IndexFieldConfiguration; +import org.opencb.opencga.core.config.storage.FieldConfiguration; import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.io.bit.BitBuffer; @@ -48,30 +48,30 @@ public void testConvert() { converter.createFileIndexValue(0, 0, v("1:100-200:A:").addSample("s1", "0/1", ".").build())); fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); - fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER).write("PASS", bitBuffer); + fileIndex.getCustomField(FieldConfiguration.Source.FILE, StudyEntry.FILTER).write("PASS", bitBuffer); assertEquals(bitBuffer, converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setFilter("PASS").build())); - fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.FILTER).write(null, bitBuffer); - fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).write("2000.0", bitBuffer); + fileIndex.getCustomField(FieldConfiguration.Source.FILE, StudyEntry.FILTER).write(null, bitBuffer); + fileIndex.getCustomField(FieldConfiguration.Source.FILE, StudyEntry.QUAL).write("2000.0", bitBuffer); assertEquals(bitBuffer, converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(2000.0).build())); - fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).write("10.0", bitBuffer); + fileIndex.getCustomField(FieldConfiguration.Source.FILE, StudyEntry.QUAL).write("10.0", bitBuffer); assertEquals(bitBuffer, converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(10.0).build())); bitBuffer.clear(); fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); fileIndex.getFilePositionIndex().write(3, bitBuffer); - fileIndex.getCustomField(IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL).write("10.0", bitBuffer); + fileIndex.getCustomField(FieldConfiguration.Source.FILE, StudyEntry.QUAL).write("10.0", bitBuffer); assertEquals(bitBuffer, converter.createFileIndexValue(0, 3, v("1:100:A:C").addSample("s1", "0/1", ".").setQuality(10.0).build())); bitBuffer.clear(); fileIndex.getTypeIndex().write(VariantType.SNV, bitBuffer); for (Integer dp : IntStream.range(0, 60).toArray()) { - fileIndex.getCustomField(IndexFieldConfiguration.Source.SAMPLE, "DP").write(String.valueOf(dp), bitBuffer); + fileIndex.getCustomField(FieldConfiguration.Source.SAMPLE, "DP").write(String.valueOf(dp), bitBuffer); assertEquals(bitBuffer, converter.createFileIndexValue(0, 0, v("1:100:A:C").addSample("s1", "0/1", dp.toString()).build())); assertEquals(bitBuffer, From 499b5e15acd62a8df2a708bf438e0576a83bb273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 18 Oct 2024 15:44:25 +0100 Subject: [PATCH 14/19] storage: Rename SampleVariantIndexEntry to SampleIndexVariant and AnnotationIndexEntry to SampleIndexVariantAnnotation #TASK-6765 --- .../storage/hadoop/app/SampleIndexMain.java | 5 +- .../SampleIndexOnlyVariantQueryExecutor.java | 16 ++--- ...SampleIndexVariantAggregationExecutor.java | 26 +++---- ...java => SampleIndexVariantAnnotation.java} | 38 +++++----- ...ampleIndexVariantAnnotationConverter.java} | 10 +-- ...mpleIndexVariantAnnotationPutBuilder.java} | 8 +-- .../mr/SampleIndexAnnotationLoaderMapper.java | 22 +++--- ...endelianErrorSampleIndexEntryIterator.java | 10 +-- .../AbstractSampleIndexEntryFilter.java | 67 ++++++++--------- .../sample/HBaseToSampleIndexConverter.java | 12 ++-- .../sample/RawSampleIndexEntryFilter.java | 14 ++-- ...RawSingleSampleIndexVariantDBIterator.java | 12 ++-- .../index/sample/SampleIndexDBAdaptor.java | 12 ++-- .../index/sample/SampleIndexDBLoader.java | 10 +-- .../index/sample/SampleIndexDriver.java | 14 ++-- .../sample/SampleIndexEntryIterator.java | 14 ++-- .../sample/SampleIndexEntryPutBuilder.java | 72 +++++++++---------- .../index/sample/SampleIndexQueryParser.java | 2 +- .../index/sample/SampleIndexSchema.java | 2 +- ...ndexEntry.java => SampleIndexVariant.java} | 30 ++++---- .../sample/SampleIndexVariantBiConverter.java | 50 ++++++------- ....java => SampleIndexVariantConverter.java} | 12 ++-- .../hadoop/variant/VariantHbaseTestUtils.java | 2 +- ...eIndexVariantAnnotationConverterTest.java} | 10 +-- ...endelianErrorSampleIndexConverterTest.java | 4 +- .../sample/SampleIndexEntryFilterTest.java | 14 ++-- .../SampleIndexEntryPutBuilderTest.java | 24 +++---- .../sample/SampleIndexQueryParserTest.java | 14 ++-- .../SampleIndexVariantBiConverterTest.java | 4 +- ...a => SampleIndexVariantConverterTest.java} | 6 +- ...yTest.java => SampleIndexVariantTest.java} | 14 ++-- 31 files changed, 276 insertions(+), 274 deletions(-) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/{AnnotationIndexEntry.java => SampleIndexVariantAnnotation.java} (83%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/{AnnotationIndexConverter.java => SampleIndexVariantAnnotationConverter.java} (96%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/{AnnotationIndexPutBuilder.java => SampleIndexVariantAnnotationPutBuilder.java} (91%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{SampleVariantIndexEntry.java => SampleIndexVariant.java} (83%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{SampleVariantIndexEntryConverter.java => SampleIndexVariantConverter.java} (94%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/{AnnotationIndexConverterTest.java => SampleIndexVariantAnnotationConverterTest.java} (96%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{SampleVariantIndexEntryConverterTest.java => SampleIndexVariantConverterTest.java} (95%) rename opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/{SampleVariantIndexEntryTest.java => SampleIndexVariantTest.java} (72%) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java index 16f0083184e..c1a3ff60073 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/app/SampleIndexMain.java @@ -34,8 +34,9 @@ public static void main(String[] args) { SampleIndexMain main = new SampleIndexMain(); try { main.run(args); - } catch (Exception e) { + } catch (Throwable e) { e.printStackTrace(); + System.exit(1); } } @@ -168,7 +169,7 @@ private void rawQuery(SampleIndexDBAdaptor dbAdaptor, ObjectMap argsMap) throws private void detailedQuery(SampleIndexDBAdaptor dbAdaptor, ObjectMap argsMap) throws Exception { SampleIndexQuery sampleIndexQuery = dbAdaptor.parseSampleIndexQuery(new Query(argsMap)); - CloseableIterator iterator = dbAdaptor.rawIterator(sampleIndexQuery); + CloseableIterator iterator = dbAdaptor.rawIterator(sampleIndexQuery); SampleIndexSchema schema = sampleIndexQuery.getSchema(); if (argsMap.getBoolean("quiet", false)) { print(Iterators.size(iterator)); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index fbfaa8634be..34a511f0fc3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -41,7 +41,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleVariantIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexVariant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -174,8 +174,8 @@ private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery variantIterator = sampleIndexDBAdaptor.iterator(sampleIndexQuery, options); variantIterator = variantIterator.map(v -> v.setId(v.toString())); } else { - logger.info("Using sample index raw iterator Iterator"); - CloseableIterator rawIterator; + logger.info("Using sample index raw iterator Iterator<{}>", SampleIndexVariant.class.getSimpleName()); + CloseableIterator rawIterator; try { rawIterator = sampleIndexDBAdaptor.rawIterator(sampleIndexQuery, options); } catch (IOException e) { @@ -183,7 +183,7 @@ private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery } boolean includeAll = parsedQuery.getSource() == VariantQuerySource.SECONDARY_SAMPLE_INDEX || parsedQuery.getInputQuery().getBoolean("includeAllFromSampleIndex", false); - SampleVariantIndexEntryToVariantConverter converter = new SampleVariantIndexEntryToVariantConverter( + SampleIndexVariantToVariantConverter converter = new SampleIndexVariantToVariantConverter( parsedQuery, sampleIndexQuery, dbAdaptor.getMetadataManager(), includeAll); variantIterator = VariantDBIterator.wrapper(Iterators.transform(rawIterator, converter::convert)); AddMissingDataTask task = new AddMissingDataTask( @@ -297,7 +297,7 @@ private boolean isIncludeCovered(SampleIndexQuery sampleIndexQuery, Query inputQ } - private static class SampleVariantIndexEntryToVariantConverter implements Converter { + private static class SampleIndexVariantToVariantConverter implements Converter { enum FamilyRole { MOTHER, @@ -320,8 +320,8 @@ enum FamilyRole { private final SampleIndexSchema schema; - SampleVariantIndexEntryToVariantConverter(ParsedVariantQuery parseQuery, SampleIndexQuery sampleIndexQuery, - VariantStorageMetadataManager metadataManager, boolean includeAll) { + SampleIndexVariantToVariantConverter(ParsedVariantQuery parseQuery, SampleIndexQuery sampleIndexQuery, + VariantStorageMetadataManager metadataManager, boolean includeAll) { schema = sampleIndexQuery.getSchema(); this.includeAll = includeAll; @@ -421,7 +421,7 @@ enum FamilyRole { } @Override - public Variant convert(SampleVariantIndexEntry entry) { + public Variant convert(SampleIndexVariant entry) { Variant v = entry.getVariant(); v.setId(v.toString()); if (includeStudy) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java index 973a922780b..0c09698a0bb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java @@ -24,9 +24,9 @@ import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleIndexQuery; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexVariant; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleVariantIndexEntry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -111,24 +111,24 @@ protected VariantQueryResult aggregation(Query query, QueryOptions o SampleIndexQuery sampleIndexQuery = sampleIndexDBAdaptor.parseSampleIndexQuery(new Query(query)); SampleIndexSchema schema = sampleIndexQuery.getSchema(); - List> accumulators = createAccumulators(schema, query, facet, filterTranscript); + List> accumulators = createAccumulators(schema, query, facet, filterTranscript); List fields = new ArrayList<>(); logger.info("Filter transcript = {}", filterTranscript); - try (CloseableIterator sampleVariantIndexEntryIterator = sampleIndexDBAdaptor.rawIterator(query)) { + try (CloseableIterator sampleIndexVariantIterator = sampleIndexDBAdaptor.rawIterator(query)) { // Init top level fields - for (FacetFieldAccumulator accumulator : accumulators) { + for (FacetFieldAccumulator accumulator : accumulators) { fields.add(accumulator.createField()); } // Loop long numMatches = 0; - while (sampleVariantIndexEntryIterator.hasNext()) { + while (sampleIndexVariantIterator.hasNext()) { numMatches++; - SampleVariantIndexEntry entry = sampleVariantIndexEntryIterator.next(); + SampleIndexVariant entry = sampleIndexVariantIterator.next(); for (int i = 0; i < accumulators.size(); i++) { - FacetFieldAccumulator accumulator = accumulators.get(i); + FacetFieldAccumulator accumulator = accumulators.get(i); FacetField field = fields.get(i); accumulator.accumulate(field, entry); } @@ -136,7 +136,7 @@ protected VariantQueryResult aggregation(Query query, QueryOptions o // Tear down and clean up results. for (int i = 0; i < accumulators.size(); i++) { - FacetFieldAccumulator accumulator = accumulators.get(i); + FacetFieldAccumulator accumulator = accumulators.get(i); FacetField field = fields.get(i); accumulator.evaluate(field); } @@ -146,19 +146,19 @@ protected VariantQueryResult aggregation(Query query, QueryOptions o } } - private List> createAccumulators( + private List> createAccumulators( SampleIndexSchema schema, Query query, String facet, boolean filterTranscript) { - List> list = new ArrayList<>(); + List> list = new ArrayList<>(); for (String f : facet.split(FACET_SEPARATOR)) { list.add(createAccumulator(schema, query, f, filterTranscript)); } return list; } - private FacetFieldAccumulator createAccumulator( + private FacetFieldAccumulator createAccumulator( SampleIndexSchema schema, Query query, String facet, boolean filterTranscript) { String[] split = facet.split(NESTED_FACET_SEPARATOR); - FacetFieldAccumulator accumulator = null; + FacetFieldAccumulator accumulator = null; Set ctFilter = new HashSet<>(VariantQueryUtils .parseConsequenceTypes(query.getAsStringList(VariantQueryParam.ANNOT_CONSEQUENCE_TYPE.key()))); @@ -188,7 +188,7 @@ private FacetFieldAccumulator createAccumulator( if (fieldKey.equalsIgnoreCase("depth") || fieldKey.equalsIgnoreCase("coverage")) { fieldKey = "dp"; } - FacetFieldAccumulator thisAccumulator = null; + FacetFieldAccumulator thisAccumulator = null; switch (fieldKey) { case CHROM_DENSITY: int step; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexEntry.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotation.java similarity index 83% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexEntry.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotation.java index 715b25e18a8..21bef9650cf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexEntry.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotation.java @@ -5,7 +5,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.core.CombinationTripleIndexSchema.CombinationTriple; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; -public class AnnotationIndexEntry { +public class SampleIndexVariantAnnotation { private boolean hasSummaryIndex; private byte summaryIndex; @@ -24,10 +24,10 @@ public class AnnotationIndexEntry { private BitBuffer clinicalIndex; - public AnnotationIndexEntry() { + public SampleIndexVariantAnnotation() { } - public AnnotationIndexEntry(AnnotationIndexEntry other) { + public SampleIndexVariantAnnotation(SampleIndexVariantAnnotation other) { this(); this.hasSummaryIndex = other.hasSummaryIndex; this.summaryIndex = other.summaryIndex; @@ -44,7 +44,7 @@ public AnnotationIndexEntry(AnnotationIndexEntry other) { this.clinicalIndex = other.clinicalIndex == null ? null : new BitBuffer(other.clinicalIndex); } - public AnnotationIndexEntry( + public SampleIndexVariantAnnotation( byte summaryIndex, boolean intergenic, int ctIndex, int btIndex, int tfIndex, CombinationTriple ctBtTfCombination, BitBuffer popFreqIndex, boolean hasClinical, BitBuffer clinicalIndex) { @@ -62,8 +62,8 @@ public AnnotationIndexEntry( this.popFreqIndex = popFreqIndex; } - public static AnnotationIndexEntry empty(SampleIndexSchema schema) { - return new AnnotationIndexEntry() + public static SampleIndexVariantAnnotation empty(SampleIndexSchema schema) { + return new SampleIndexVariantAnnotation() .setPopFreqIndex(new BitBuffer(schema.getPopFreqIndex().getBitsLength())) .setCtBtTfCombination(new CombinationTriple()); } @@ -72,7 +72,7 @@ public boolean hasSummaryIndex() { return hasSummaryIndex; } - public AnnotationIndexEntry setHasSummaryIndex(boolean hasSummaryIndex) { + public SampleIndexVariantAnnotation setHasSummaryIndex(boolean hasSummaryIndex) { this.hasSummaryIndex = hasSummaryIndex; return this; } @@ -81,7 +81,7 @@ public byte getSummaryIndex() { return summaryIndex; } - public AnnotationIndexEntry setSummaryIndex(byte summaryIndex) { + public SampleIndexVariantAnnotation setSummaryIndex(byte summaryIndex) { this.hasSummaryIndex = true; this.summaryIndex = summaryIndex; return this; @@ -91,7 +91,7 @@ public boolean isIntergenic() { return intergenic; } - public AnnotationIndexEntry setIntergenic(boolean intergenic) { + public SampleIndexVariantAnnotation setIntergenic(boolean intergenic) { this.intergenic = intergenic; return this; } @@ -100,7 +100,7 @@ public boolean hasCtIndex() { return hasCtIndex; } - public AnnotationIndexEntry setHasCtIndex(boolean hasCtIndex) { + public SampleIndexVariantAnnotation setHasCtIndex(boolean hasCtIndex) { this.hasCtIndex = hasCtIndex; return this; } @@ -109,7 +109,7 @@ public int getCtIndex() { return ctIndex; } - public AnnotationIndexEntry setCtIndex(int ctIndex) { + public SampleIndexVariantAnnotation setCtIndex(int ctIndex) { hasCtIndex = true; this.ctIndex = ctIndex; return this; @@ -119,7 +119,7 @@ public boolean hasBtIndex() { return hasBtIndex; } - public AnnotationIndexEntry setHasBtIndex(boolean hasBtIndex) { + public SampleIndexVariantAnnotation setHasBtIndex(boolean hasBtIndex) { this.hasBtIndex = hasBtIndex; return this; } @@ -128,7 +128,7 @@ public int getBtIndex() { return btIndex; } - public AnnotationIndexEntry setBtIndex(int btIndex) { + public SampleIndexVariantAnnotation setBtIndex(int btIndex) { setHasBtIndex(true); this.btIndex = btIndex; return this; @@ -138,7 +138,7 @@ public boolean hasTfIndex() { return hasTfIndex; } - public AnnotationIndexEntry setHasTfIndex(boolean hasTfIndex) { + public SampleIndexVariantAnnotation setHasTfIndex(boolean hasTfIndex) { this.hasTfIndex = hasTfIndex; return this; } @@ -147,7 +147,7 @@ public int getTfIndex() { return tfIndex; } - public AnnotationIndexEntry setTfIndex(int tfIndex) { + public SampleIndexVariantAnnotation setTfIndex(int tfIndex) { hasTfIndex = true; this.tfIndex = tfIndex; return this; @@ -157,7 +157,7 @@ public CombinationTriple getCtBtTfCombination() { return ctBtTfCombination; } - public AnnotationIndexEntry setCtBtTfCombination(CombinationTriple ctBtTfCombination) { + public SampleIndexVariantAnnotation setCtBtTfCombination(CombinationTriple ctBtTfCombination) { this.ctBtTfCombination = ctBtTfCombination; return this; } @@ -166,7 +166,7 @@ public BitBuffer getPopFreqIndex() { return popFreqIndex; } - public AnnotationIndexEntry setPopFreqIndex(BitBuffer popFreqIndex) { + public SampleIndexVariantAnnotation setPopFreqIndex(BitBuffer popFreqIndex) { this.popFreqIndex = popFreqIndex; return this; } @@ -175,7 +175,7 @@ public boolean hasClinical() { return hasClinical; } - public AnnotationIndexEntry setHasClinical(boolean hasClinical) { + public SampleIndexVariantAnnotation setHasClinical(boolean hasClinical) { this.hasClinical = hasClinical; return this; } @@ -184,7 +184,7 @@ public BitBuffer getClinicalIndex() { return clinicalIndex; } - public AnnotationIndexEntry setClinicalIndex(BitBuffer clinicalIndex) { + public SampleIndexVariantAnnotation setClinicalIndex(BitBuffer clinicalIndex) { this.clinicalIndex = clinicalIndex; return this; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java similarity index 96% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexConverter.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java index 33bc3a6173e..488ad7cc6cc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java @@ -21,7 +21,7 @@ /** * Created by jacobo on 04/01/19. */ -public class AnnotationIndexConverter { +public class SampleIndexVariantAnnotationConverter { public static final double POP_FREQ_THRESHOLD_001 = 0.001; public static final Set BIOTYPE_SET; @@ -63,13 +63,13 @@ public class AnnotationIndexConverter { private final SampleIndexSchema schema; - public AnnotationIndexConverter(SampleIndexSchema schema) { + public SampleIndexVariantAnnotationConverter(SampleIndexSchema schema) { this.schema = schema; } - public AnnotationIndexEntry convert(VariantAnnotation variantAnnotation) { + public SampleIndexVariantAnnotation convert(VariantAnnotation variantAnnotation) { if (variantAnnotation == null) { - return AnnotationIndexEntry.empty(schema); + return SampleIndexVariantAnnotation.empty(schema); } byte b = 0; BitBuffer ctIndex = new BitBuffer(schema.getCtIndex().getBitsLength()); @@ -194,7 +194,7 @@ public AnnotationIndexEntry convert(VariantAnnotation variantAnnotation) { schema.getClinicalIndexSchema().getSourceField().write(new ArrayList<>(source), clinicalIndex); schema.getClinicalIndexSchema().getClinicalSignificanceField().write(combinations, clinicalIndex); } - return new AnnotationIndexEntry(b, intergenic, ctIndex.toInt(), btIndex.toInt(), tfIndex.toInt(), + return new SampleIndexVariantAnnotation(b, intergenic, ctIndex.toInt(), btIndex.toInt(), tfIndex.toInt(), ctBtTfCombination, popFreq, clinical, clinicalIndex); } protected void addPopFreqIndex(BitBuffer bitBuffer, PopulationFrequency populationFrequency) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexPutBuilder.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationPutBuilder.java similarity index 91% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexPutBuilder.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationPutBuilder.java index dc7f1ba12d0..d3ecf273652 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexPutBuilder.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationPutBuilder.java @@ -7,7 +7,7 @@ import java.io.ByteArrayOutputStream; -public class AnnotationIndexPutBuilder { +public class SampleIndexVariantAnnotationPutBuilder { private final ByteArrayOutputStream annotation; private final BitOutputStream biotype; @@ -19,11 +19,11 @@ public class AnnotationIndexPutBuilder { private final SampleIndexSchema indexSchema; private int numVariants; - public AnnotationIndexPutBuilder(SampleIndexSchema indexSchema) { + public SampleIndexVariantAnnotationPutBuilder(SampleIndexSchema indexSchema) { this(indexSchema, 50); } - public AnnotationIndexPutBuilder(SampleIndexSchema indexSchema, int size) { + public SampleIndexVariantAnnotationPutBuilder(SampleIndexSchema indexSchema, int size) { this.indexSchema = indexSchema; this.annotation = new ByteArrayOutputStream(size); this.biotype = new BitOutputStream(size / 4); @@ -35,7 +35,7 @@ public AnnotationIndexPutBuilder(SampleIndexSchema indexSchema, int size) { numVariants = 0; } - public AnnotationIndexPutBuilder add(AnnotationIndexEntry indexEntry) { + public SampleIndexVariantAnnotationPutBuilder add(SampleIndexVariantAnnotation indexEntry) { numVariants++; annotation.write(indexEntry.getSummaryIndex()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/mr/SampleIndexAnnotationLoaderMapper.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/mr/SampleIndexAnnotationLoaderMapper.java index 3e00a9d4ee1..4cf88ecddde 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/mr/SampleIndexAnnotationLoaderMapper.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/mr/SampleIndexAnnotationLoaderMapper.java @@ -9,9 +9,9 @@ import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; import org.opencb.opencga.storage.hadoop.variant.converters.VariantRow; import org.opencb.opencga.storage.hadoop.variant.converters.annotation.HBaseToVariantAnnotationConverter; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexPutBuilder; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotation; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationPutBuilder; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; import org.opencb.opencga.storage.hadoop.variant.mr.VariantMapReduceUtil; import org.opencb.opencga.storage.hadoop.variant.mr.VariantsTableMapReduceHelper; @@ -31,11 +31,11 @@ public class SampleIndexAnnotationLoaderMapper extends VariantTableSampleIndexOr private static final String FIRST_SAMPLE_ID = "SampleIndexAnnotationLoaderMapper.firstSampleId"; private static final String LAST_SAMPLE_ID = "SampleIndexAnnotationLoaderMapper.lastSampleId"; private byte[] family; - private Map[] annotationIndices; + private Map[] annotationIndices; private boolean hasGenotype; private boolean multiFileSamples; - private AnnotationIndexConverter converter; + private SampleIndexVariantAnnotationConverter converter; private int firstSampleId; private SampleIndexSchema schema; private HBaseToVariantAnnotationConverter annotationConverter; @@ -67,7 +67,7 @@ protected void setup(Context context) throws IOException, InterruptedException { annotationIndices[i] = new HashMap<>(); } schema = VariantMapReduceUtil.getSampleIndexSchema(context.getConfiguration()); - converter = new AnnotationIndexConverter(schema); + converter = new SampleIndexVariantAnnotationConverter(schema); annotationConverter = new HBaseToVariantAnnotationConverter(); } @@ -79,7 +79,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t if (variantAnnotation == null) { context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "variantsAnnotationNull").increment(1); } - AnnotationIndexEntry indexEntry = converter.convert(variantAnnotation); + SampleIndexVariantAnnotation indexEntry = converter.convert(variantAnnotation); // TODO Get stats given index values Set samples = multiFileSamples ? new HashSet<>(result.rawCells().length) : null; @@ -104,7 +104,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t if (samples == null || samples.add(sampleId + "_" + gt)) { if (validGt) { annotationIndices[sampleId - firstSampleId] - .computeIfAbsent(gt, k -> new AnnotationIndexPutBuilder(schema)).add(indexEntry); + .computeIfAbsent(gt, k -> new SampleIndexVariantAnnotationPutBuilder(schema)).add(indexEntry); } } }).walk(); @@ -118,12 +118,12 @@ public void flush(Context context, String chromosome, int position) throws IOExc protected void writeIndices(Context context, String chromosome, int position) throws IOException, InterruptedException { context.getCounter(VariantsTableMapReduceHelper.COUNTER_GROUP_NAME, "write_indices").increment(1); for (int i = 0; i < annotationIndices.length; i++) { - Map gtMap = annotationIndices[i]; + Map gtMap = annotationIndices[i]; int sampleId = i + firstSampleId; Put put = new Put(SampleIndexSchema.toRowKey(sampleId, chromosome, position)); - for (Map.Entry e : gtMap.entrySet()) { + for (Map.Entry e : gtMap.entrySet()) { String gt = e.getKey(); - AnnotationIndexPutBuilder value = e.getValue(); + SampleIndexVariantAnnotationPutBuilder value = e.getValue(); if (!value.isEmpty()) { value.buildAndReset(put, gt, family); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java index 4c3f6799d80..b30b918e403 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexEntryIterator.java @@ -3,7 +3,7 @@ import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.variant.Variant; import org.opencb.opencga.storage.core.io.bit.BitBuffer; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotation; import org.opencb.opencga.storage.hadoop.variant.index.sample.*; import java.nio.ByteBuffer; @@ -91,7 +91,7 @@ public byte nextParentsIndexEntry() { } @Override - public AnnotationIndexEntry nextAnnotationIndexEntry() { + public SampleIndexVariantAnnotation nextAnnotationIndexEntry() { SampleIndexEntryIterator it = getGtIterator(); return it == null ? null : it.nextAnnotationIndexEntry(); } @@ -141,8 +141,8 @@ public Variant nextVariant() { } @Override - public SampleVariantIndexEntry nextSampleVariantIndexEntry() { - AnnotationIndexEntry annotationIndexEntry = nextAnnotationIndexEntry(); + public SampleIndexVariant nextSampleIndexVariant() { + SampleIndexVariantAnnotation annotationIndex = nextAnnotationIndexEntry(); List filesIndex = new ArrayList<>(); List filesDataIndex = new ArrayList<>(); if (hasFileIndex()) { @@ -164,7 +164,7 @@ public SampleVariantIndexEntry nextSampleVariantIndexEntry() { parentsCode = nextParentsIndexEntry(); } Variant variant = next(); - return new SampleVariantIndexEntry(variant, filesIndex, filesDataIndex, genotype, annotationIndexEntry, parentsCode, meCode); + return new SampleIndexVariant(variant, filesIndex, filesDataIndex, genotype, annotationIndex, parentsCode, meCode); } @Override diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java index a6d0eb974bd..fb233211f7d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java @@ -8,8 +8,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.hadoop.variant.index.IndexUtils; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotation; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter; import org.opencb.opencga.storage.hadoop.variant.index.family.MendelianErrorSampleIndexEntryIterator; import org.opencb.opencga.storage.hadoop.variant.index.query.LocusQuery; @@ -275,25 +275,25 @@ private T filter(SampleIndexEntryIterator variants) { private T filter(SampleIndexEntryIterator variants, MutableInt expectedResultsFromAnnotation) { // Either call to next() or to skip(), but no both - AnnotationIndexEntry annotationIndexEntry; + SampleIndexVariantAnnotation annotationIndex; try { - annotationIndexEntry = variants.nextAnnotationIndexEntry(); + annotationIndex = variants.nextAnnotationIndexEntry(); } catch (RuntimeException e) { logger.error("Error reading AnnotationIndexEntry at " + variants.toString()); throw VariantQueryException.internalException(e); } // Test annotation index (if any) - if (annotationIndexEntry == null - || !annotationIndexEntry.hasSummaryIndex() - || testIndex(annotationIndexEntry.getSummaryIndex(), query.getAnnotationIndexMask(), query.getAnnotationIndex())) { + if (annotationIndex == null + || !annotationIndex.hasSummaryIndex() + || testIndex(annotationIndex.getSummaryIndex(), query.getAnnotationIndexMask(), query.getAnnotationIndex())) { expectedResultsFromAnnotation.decrement(); // Test other annotation index and popFreq (if any) - if (annotationIndexEntry == null - || filterClinicalFields(annotationIndexEntry) - && filterBtCtTfFields(annotationIndexEntry) - && filterPopFreq(annotationIndexEntry)) { + if (annotationIndex == null + || filterClinicalFields(annotationIndex) + && filterBtCtTfFields(annotationIndex) + && filterPopFreq(annotationIndex)) { // Test file index (if any) if (filterFile(variants)) { @@ -384,62 +384,63 @@ private boolean filterFile(BitBuffer fileIndex, SampleFileIndexQuery fileQuery) } public static boolean isNonIntergenic(byte summaryIndex) { - return IndexUtils.testIndex(summaryIndex, AnnotationIndexConverter.INTERGENIC_MASK, (byte) 0); + return IndexUtils.testIndex(summaryIndex, SampleIndexVariantAnnotationConverter.INTERGENIC_MASK, (byte) 0); } public static boolean isClinical(byte summaryIndex) { - return IndexUtils.testIndex(summaryIndex, AnnotationIndexConverter.CLINICAL_MASK, AnnotationIndexConverter.CLINICAL_MASK); + return IndexUtils.testIndex(summaryIndex, + SampleIndexVariantAnnotationConverter.CLINICAL_MASK, SampleIndexVariantAnnotationConverter.CLINICAL_MASK); } - private boolean filterPopFreq(AnnotationIndexEntry annotationIndexEntry) { - return query.getAnnotationIndexQuery().getPopulationFrequencyFilter().test(annotationIndexEntry.getPopFreqIndex()); + private boolean filterPopFreq(SampleIndexVariantAnnotation annotationIndex) { + return query.getAnnotationIndexQuery().getPopulationFrequencyFilter().test(annotationIndex.getPopFreqIndex()); } - private boolean filterClinicalFields(AnnotationIndexEntry annotationIndexEntry) { + private boolean filterClinicalFields(SampleIndexVariantAnnotation annotationIndex) { if (query.getAnnotationIndexQuery().getClinicalFilter().isNoOp()) { // No filter required return true; } - if (annotationIndexEntry == null || !annotationIndexEntry.hasSummaryIndex()) { + if (annotationIndex == null || !annotationIndex.hasSummaryIndex()) { // unable to filter by this field return true; } - if (!annotationIndexEntry.hasClinical()) { + if (!annotationIndex.hasClinical()) { return false; } // FIXME - return query.getAnnotationIndexQuery().getClinicalFilter().test(annotationIndexEntry.getClinicalIndex()); + return query.getAnnotationIndexQuery().getClinicalFilter().test(annotationIndex.getClinicalIndex()); } - private boolean filterBtCtTfFields(AnnotationIndexEntry annotationIndexEntry) { - if (annotationIndexEntry == null || !annotationIndexEntry.hasSummaryIndex()) { + private boolean filterBtCtTfFields(SampleIndexVariantAnnotation annotationIndex) { + if (annotationIndex == null || !annotationIndex.hasSummaryIndex()) { // Missing annotation. Unable to filter return true; } - if (annotationIndexEntry.isIntergenic()) { + if (annotationIndex.isIntergenic()) { // unable to filter by this field return true; } - if (annotationIndexEntry.hasBtIndex() - && !query.getAnnotationIndexQuery().getBiotypeFilter().test(annotationIndexEntry.getBtIndex())) { + if (annotationIndex.hasBtIndex() + && !query.getAnnotationIndexQuery().getBiotypeFilter().test(annotationIndex.getBtIndex())) { return false; } - if (annotationIndexEntry.hasCtIndex() - && !query.getAnnotationIndexQuery().getConsequenceTypeFilter().test(annotationIndexEntry.getCtIndex())) { + if (annotationIndex.hasCtIndex() + && !query.getAnnotationIndexQuery().getConsequenceTypeFilter().test(annotationIndex.getCtIndex())) { return false; } - if (annotationIndexEntry.hasTfIndex() - && !query.getAnnotationIndexQuery().getTranscriptFlagFilter().test(annotationIndexEntry.getTfIndex())) { + if (annotationIndex.hasTfIndex() + && !query.getAnnotationIndexQuery().getTranscriptFlagFilter().test(annotationIndex.getTfIndex())) { return false; } - if (annotationIndexEntry.getCtBtTfCombination().getMatrix() != null - && !query.getAnnotationIndexQuery().getCtBtTfFilter().test(annotationIndexEntry.getCtBtTfCombination(), - annotationIndexEntry.getCtIndex(), - annotationIndexEntry.getBtIndex(), - annotationIndexEntry.getTfIndex())) { + if (annotationIndex.getCtBtTfCombination().getMatrix() != null + && !query.getAnnotationIndexQuery().getCtBtTfFilter().test(annotationIndex.getCtBtTfCombination(), + annotationIndex.getCtIndex(), + annotationIndex.getBtIndex(), + annotationIndex.getTfIndex())) { return false; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java index d6e8be6f22b..7e95accbe2b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java @@ -125,15 +125,15 @@ public Map> convertToMap(Result result) { return map; } - public Map> convertToMapSampleVariantIndex(Result result) { + public Map> convertToMapSampleVariantIndex(Result result) { if (result == null || result.isEmpty()) { return Collections.emptyMap(); } Map> map = convertToMap(result); - Map> mapVariantFileIndex = new HashMap<>(); - SampleVariantIndexEntry.SampleVariantIndexEntryComparator comparator - = new SampleVariantIndexEntry.SampleVariantIndexEntryComparator(schema); + Map> mapVariantFileIndex = new HashMap<>(); + SampleIndexVariant.SampleIndexVariantComparator comparator + = new SampleIndexVariant.SampleIndexVariantComparator(schema); Map fileIndexMap = new HashMap<>(); Map fileDataMap = new HashMap<>(); for (Cell cell : result.rawCells()) { @@ -155,7 +155,7 @@ public Map> convertToMapSampleVariantIn } for (Map.Entry entry : fileIndexMap.entrySet()) { String gt = entry.getKey(); - TreeSet values = new TreeSet<>(comparator); + TreeSet values = new TreeSet<>(comparator); mapVariantFileIndex.put(gt, values); BitInputStream fileIndexStream = entry.getValue(); @@ -171,7 +171,7 @@ public Map> convertToMapSampleVariantIn } else { fileDataEntry = fileDataSchema.readNextDocument(fileDataBuffer); } - values.add(new SampleVariantIndexEntry(variant, fileIndexEntry, fileDataEntry)); + values.add(new SampleIndexVariant(variant, fileIndexEntry, fileDataEntry)); } while (this.fileIndexSchema.isMultiFile(fileIndexEntry)); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSampleIndexEntryFilter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSampleIndexEntryFilter.java index 9e3b9096446..10b019696ab 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSampleIndexEntryFilter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSampleIndexEntryFilter.java @@ -6,7 +6,7 @@ import java.util.Comparator; -public class RawSampleIndexEntryFilter extends AbstractSampleIndexEntryFilter { +public class RawSampleIndexEntryFilter extends AbstractSampleIndexEntryFilter { public RawSampleIndexEntryFilter(SingleSampleIndexQuery query) { super(query); @@ -17,22 +17,22 @@ public RawSampleIndexEntryFilter(SingleSampleIndexQuery query, LocusQuery region } @Override - protected SampleVariantIndexEntry getNext(SampleIndexEntryIterator variants) { - return variants.nextSampleVariantIndexEntry(); + protected SampleIndexVariant getNext(SampleIndexEntryIterator variants) { + return variants.nextSampleIndexVariant(); } @Override - protected Variant toVariant(SampleVariantIndexEntry v) { + protected Variant toVariant(SampleIndexVariant v) { return v.getVariant(); } @Override - protected boolean sameGenomicVariant(SampleVariantIndexEntry v1, SampleVariantIndexEntry v2) { + protected boolean sameGenomicVariant(SampleIndexVariant v1, SampleIndexVariant v2) { return v1.getVariant().sameGenomicVariant(v2.getVariant()); } @Override - protected Comparator getComparator() { - return Comparator.comparing(SampleVariantIndexEntry::getVariant, SampleIndexSchema.INTRA_CHROMOSOME_VARIANT_COMPARATOR); + protected Comparator getComparator() { + return Comparator.comparing(SampleIndexVariant::getVariant, SampleIndexSchema.INTRA_CHROMOSOME_VARIANT_COMPARATOR); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSingleSampleIndexVariantDBIterator.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSingleSampleIndexVariantDBIterator.java index 05e9a9ffc37..de25dbadee9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSingleSampleIndexVariantDBIterator.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/RawSingleSampleIndexVariantDBIterator.java @@ -16,9 +16,9 @@ import java.util.Collections; import java.util.Iterator; -public class RawSingleSampleIndexVariantDBIterator extends CloseableIterator { +public class RawSingleSampleIndexVariantDBIterator extends CloseableIterator { - private final Iterator iterator; + private final Iterator iterator; protected int count = 0; public RawSingleSampleIndexVariantDBIterator(Table table, SingleSampleIndexQuery query, SampleIndexSchema schema, @@ -31,7 +31,7 @@ public RawSingleSampleIndexVariantDBIterator(Table table, SingleSampleIndexQuery locusQueries = query.getLocusQueries(); } - Iterator> iterators = locusQueries.stream() + Iterator> iterators = locusQueries.stream() .map(locusQuery -> { // One scan per locus query Scan scan = dbAdaptor.parseIncludeAll(query, locusQuery); @@ -41,7 +41,7 @@ public RawSingleSampleIndexVariantDBIterator(Table table, SingleSampleIndexQuery ResultScanner scanner = table.getScanner(scan); addCloseable(scanner); Iterator resultIterator = scanner.iterator(); - Iterator> transform = Iterators.transform(resultIterator, + Iterator> transform = Iterators.transform(resultIterator, result -> { SampleIndexEntry sampleIndexEntry = converter.convert(result); return filter.filter(sampleIndexEntry).iterator(); @@ -54,7 +54,7 @@ public RawSingleSampleIndexVariantDBIterator(Table table, SingleSampleIndexQuery iterator = Iterators.concat(iterators); } - private RawSingleSampleIndexVariantDBIterator(Iterator iterator) { + private RawSingleSampleIndexVariantDBIterator(Iterator iterator) { this.iterator = iterator; } @@ -68,7 +68,7 @@ public boolean hasNext() { } @Override - public SampleVariantIndexEntry next() { + public SampleIndexVariant next() { return iterator.next(); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 609f832afcd..1eeb2c5886a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -269,15 +269,15 @@ public RawSingleSampleIndexVariantDBIterator rawIterator(String study, String sa return rawInternalIterator(sampleIndexQuery, sampleIndexQuery.getSchema()); } - public CloseableIterator rawIterator(Query query) throws IOException { + public CloseableIterator rawIterator(Query query) throws IOException { return rawIterator(parseSampleIndexQuery(query)); } - public CloseableIterator rawIterator(SampleIndexQuery query) throws IOException { + public CloseableIterator rawIterator(SampleIndexQuery query) throws IOException { return rawIterator(query, new QueryOptions()); } - public CloseableIterator rawIterator(SampleIndexQuery query, QueryOptions options) throws IOException { + public CloseableIterator rawIterator(SampleIndexQuery query, QueryOptions options) throws IOException { Map> samples = query.getSamplesMap(); if (samples.isEmpty()) { @@ -323,15 +323,15 @@ public CloseableIterator rawIterator(SampleIndexQuery q } } - final CloseableIterator iterator; + final CloseableIterator iterator; if (operation.equals(QueryOperation.OR)) { logger.info("Union of " + iterators.size() + " sample indexes"); iterator = new UnionMultiKeyIterator<>( - Comparator.comparing(SampleVariantIndexEntry::getVariant, VariantDBIterator.VARIANT_COMPARATOR), iterators); + Comparator.comparing(SampleIndexVariant::getVariant, VariantDBIterator.VARIANT_COMPARATOR), iterators); } else { logger.info("Intersection of " + iterators.size() + " sample indexes plus " + negatedIterators.size() + " negated indexes"); iterator = new IntersectMultiKeyIterator<>( - Comparator.comparing(SampleVariantIndexEntry::getVariant, VariantDBIterator.VARIANT_COMPARATOR), + Comparator.comparing(SampleIndexVariant::getVariant, VariantDBIterator.VARIANT_COMPARATOR), iterators, negatedIterators); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java index 584d1952295..da139e99f1d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBLoader.java @@ -42,7 +42,7 @@ public class SampleIndexDBLoader extends AbstractHBaseDataWriter { @@ -134,7 +134,7 @@ private SampleIndexEntryPutBuilder fetchIndex(IndexChunk indexChunk, Integer sam } } - public void addVariant(int sampleIdx, String gt, SampleVariantIndexEntry variantIndexEntry) { + public void addVariant(int sampleIdx, String gt, SampleIndexVariant variantIndexEntry) { SampleIndexEntryPutBuilder sampleEntry = get(sampleIdx); if (isMerging() && !multiFileIndex) { // Look for the variant in any genotype to avoid duplications @@ -203,8 +203,8 @@ protected List convert(List variants) { if (validVariant(variant) && validGenotype(gt)) { genotypes.add(gt); Chunk chunk = buffer.computeIfAbsent(indexChunk, Chunk::new); - SampleVariantIndexEntry indexEntry = sampleVariantIndexEntryConverter - .createSampleVariantIndexEntry(sampleIdx, fileIdxMap[sampleIdx], variant); + SampleIndexVariant indexEntry = sampleIndexVariantConverter + .createSampleIndexVariant(sampleIdx, fileIdxMap[sampleIdx], variant); chunk.addVariant(sampleIdx, gt, indexEntry); } sampleIdx++; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java index 7e053fc9332..2529ce0855d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java @@ -342,7 +342,7 @@ public static class SampleIndexerMapper extends VariantTableSampleIndexOrderMapp private static final String HAS_GENOTYPE = "SampleIndexerMapper.hasGenotype"; public static final int SAMPLES_TO_COUNT = 2; private Set samplesToCount; - private SampleVariantIndexEntryConverter sampleVariantIndexEntryConverter; + private SampleIndexVariantConverter sampleIndexVariantConverter; private Map fixedAttributesPositions; private Map sampleDataKeyPositions; private final Map samples = new HashMap<>(); @@ -362,7 +362,7 @@ protected void setup(Context context) throws IOException, InterruptedException { hasGenotype = context.getConfiguration().getBoolean(HAS_GENOTYPE, true); schema = VariantMapReduceUtil.getSampleIndexSchema(context.getConfiguration()); studyId = context.getConfiguration().getInt(STUDY_ID, -1); - sampleVariantIndexEntryConverter = new SampleVariantIndexEntryConverter(schema); + sampleIndexVariantConverter = new SampleIndexVariantConverter(schema); progressLogger = new ProgressLogger("Processing variants").setBatchSize(10000); int[] sampleIds = context.getConfiguration().getInts(SAMPLES); @@ -422,7 +422,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t progressLogger.increment(1, () -> "up to variant " + variant); // Get fileIndex for each file - Map sampleIndexentryMap = new HashMap<>(); + Map sampleIndexentryMap = new HashMap<>(); variantRow.forEachFile(fileColumn -> { if ((partialScan && !this.files.contains(fileColumn.getFileId())) || fileColumn.getStudyId() != studyId) { @@ -433,7 +433,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t // Map fileAttributes = HBaseToStudyEntryConverter.convertFileAttributes(fileColumn.raw(), // fixedAttributes, includeAttributes); - SampleVariantIndexEntry indexEntry = sampleVariantIndexEntryConverter.createSampleVariantIndexEntry(0, + SampleIndexVariant indexEntry = sampleIndexVariantConverter.createSampleIndexVariant(0, variant, fileColumn.getCall(), fileColumn.getSecondaryAlternates(), @@ -501,12 +501,12 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t // Add fileIndex value for this genotype boolean fileFound = false; for (Integer fileId : files) { - SampleVariantIndexEntry indexEntry = sampleIndexentryMap.get(fileId); + SampleIndexVariant indexEntry = sampleIndexentryMap.get(fileId); if (indexEntry != null) { fileFound = true; // Copy bit buffer BitBuffer sampleFileIndex = new BitBuffer(indexEntry.getFilesIndex().get(0)); - sampleVariantIndexEntryConverter.addSampleDataIndexValues(sampleFileIndex, sampleDataKeyPositions, + sampleIndexVariantConverter.addSampleDataIndexValues(sampleFileIndex, sampleDataKeyPositions, sampleColumn::getSampleData); if (filePosition >= 0) { schema.getFileIndex().getFilePositionIndex().write(filePosition, sampleFileIndex); @@ -517,7 +517,7 @@ protected void map(ImmutableBytesWritable key, Result result, Context context) t } else { fileData = indexEntry.getFileData().get(0); } - builder.add(gt, new SampleVariantIndexEntry(variant, sampleFileIndex, fileData)); + builder.add(gt, new SampleIndexVariant(variant, sampleFileIndex, fileData)); countSampleGt(context, sampleId, gt); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java index ae4033a78cc..853d4491d22 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryIterator.java @@ -2,7 +2,7 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.opencga.storage.core.io.bit.BitBuffer; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotation; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -38,12 +38,12 @@ public interface SampleIndexEntryIterator extends Iterator { */ Variant nextVariant(); - default SampleVariantIndexEntry nextSampleVariantIndexEntry() { - AnnotationIndexEntry annotationIndexEntry = nextAnnotationIndexEntry(); - if (annotationIndexEntry != null) { + default SampleIndexVariant nextSampleIndexVariant() { + SampleIndexVariantAnnotation annotationIndex = nextAnnotationIndexEntry(); + if (annotationIndex != null) { // Make a copy of the AnnotationIndexEntry! // This object could be reused - annotationIndexEntry = new AnnotationIndexEntry(annotationIndexEntry); + annotationIndex = new SampleIndexVariantAnnotation(annotationIndex); } List filesIndex = new ArrayList<>(); List filesData = new ArrayList<>(); @@ -65,7 +65,7 @@ default SampleVariantIndexEntry nextSampleVariantIndexEntry() { } String genotype = nextGenotype(); Variant variant = next(); - return new SampleVariantIndexEntry(variant, filesIndex, filesData, genotype, annotationIndexEntry, parentsCode, null); + return new SampleIndexVariant(variant, filesIndex, filesData, genotype, annotationIndex, parentsCode, null); } /** @@ -112,7 +112,7 @@ default SampleVariantIndexEntry nextSampleVariantIndexEntry() { /** * @return the AnnotationIndexEntry of the next element. */ - AnnotationIndexEntry nextAnnotationIndexEntry(); + SampleIndexVariantAnnotation nextAnnotationIndexEntry(); int getApproxSize(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java index 029aa655f72..31ca7b55ff0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java @@ -26,7 +26,7 @@ public class SampleIndexEntryPutBuilder { private final SampleIndexSchema schema; private final FileIndexSchema fileIndexSchema; private final FileDataSchema fileDataSchema; - private final SampleVariantIndexEntry.SampleVariantIndexEntryComparator comparator; + private final SampleIndexVariant.SampleIndexVariantComparator comparator; private final boolean orderedInput; private final boolean multiFileSample; @@ -59,19 +59,19 @@ public SampleIndexEntryPutBuilder(int sampleId, String chromosome, int position, this.schema = schema; fileIndexSchema = this.schema.getFileIndex(); fileDataSchema = this.schema.getFileData(); - comparator = new SampleVariantIndexEntry.SampleVariantIndexEntryComparator(schema); + comparator = new SampleIndexVariant.SampleIndexVariantComparator(schema); } public SampleIndexEntryPutBuilder(int sampleId, String chromosome, int position, SampleIndexSchema schema, - Map> map) { + Map> map) { // As there is already present data, this won't be an ordered input. this(sampleId, chromosome, position, schema, false, true); - for (Map.Entry> entry : map.entrySet()) { + for (Map.Entry> entry : map.entrySet()) { gts.put(entry.getKey(), new SampleIndexGtEntryBuilderTreeSet(entry.getKey(), entry.getValue())); } } - public boolean add(String gt, SampleVariantIndexEntry variantIndexEntry) { + public boolean add(String gt, SampleIndexVariant variantIndexEntry) { return get(gt).add(variantIndexEntry); } @@ -83,7 +83,7 @@ private SampleIndexGtEntryBuilder get(String gt) { ); } - public boolean containsVariant(SampleVariantIndexEntry variantIndexEntry) { + public boolean containsVariant(SampleIndexVariant variantIndexEntry) { for (Map.Entry entry : gts.entrySet()) { if (entry.getValue().containsVariant(variantIndexEntry)) { @@ -140,27 +140,27 @@ public String getGt() { return gt; } - public abstract Collection getEntries(); + public abstract Collection getEntries(); - public abstract boolean add(SampleVariantIndexEntry variantIndexEntry); + public abstract boolean add(SampleIndexVariant variantIndexEntry); - public abstract boolean containsVariant(SampleVariantIndexEntry variantIndexEntry); + public abstract boolean containsVariant(SampleIndexVariant variantIndexEntry); public abstract int containsVariants(SampleIndexGtEntryBuilder entries); public void build(Put put) { - Collection gtEntries = getEntries(); + Collection gtEntries = getEntries(); BitBuffer fileIndexBuffer = new BitBuffer(fileIndexSchema.getBitsLength() * gtEntries.size()); ByteBuffer fileDataIndexBuffer = ByteBuffer.allocate(gtEntries.stream() - .mapToInt(SampleVariantIndexEntry::getFileDataIndexBytes) + .mapToInt(SampleIndexVariant::getFileDataIndexBytes) .map(i -> i + 4) .sum()); int offset = 0; - SampleVariantIndexEntry prev = null; + SampleIndexVariant prev = null; List variants = new ArrayList<>(gtEntries.size()); - for (SampleVariantIndexEntry gtEntry : gtEntries) { + for (SampleIndexVariant gtEntry : gtEntries) { Variant variant = gtEntry.getVariant(); if (prev == null || !prev.getVariant().sameGenomicVariant(variant)) { variants.add(variant); @@ -191,35 +191,35 @@ public void build(Put put) { } private class SampleIndexGtEntryBuilderTreeSet extends SampleIndexGtEntryBuilder { - private final TreeSet entries; + private final TreeSet entries; SampleIndexGtEntryBuilderTreeSet(String gt) { super(gt); entries = new TreeSet<>(comparator); } - SampleIndexGtEntryBuilderTreeSet(String gt, TreeSet entries) { + SampleIndexGtEntryBuilderTreeSet(String gt, TreeSet entries) { super(gt); this.entries = entries; } @Override - public Collection getEntries() { + public Collection getEntries() { return entries; } @Override - public boolean add(SampleVariantIndexEntry variantIndexEntry) { + public boolean add(SampleIndexVariant variantIndexEntry) { return entries.add(variantIndexEntry); } @Override - public boolean containsVariant(SampleVariantIndexEntry variantIndexEntry) { - SampleVariantIndexEntry lower = entries.lower(variantIndexEntry); + public boolean containsVariant(SampleIndexVariant variantIndexEntry) { + SampleIndexVariant lower = entries.lower(variantIndexEntry); if (lower != null && lower.getVariant().sameGenomicVariant(variantIndexEntry.getVariant())) { return true; } - SampleVariantIndexEntry ceiling = entries.ceiling(variantIndexEntry); + SampleIndexVariant ceiling = entries.ceiling(variantIndexEntry); if (ceiling != null && ceiling.getVariant().sameGenomicVariant(variantIndexEntry.getVariant())) { return true; } @@ -229,7 +229,7 @@ public boolean containsVariant(SampleVariantIndexEntry variantIndexEntry) { @Override public int containsVariants(SampleIndexGtEntryBuilder other) { int c = 0; - for (SampleVariantIndexEntry entry : other.getEntries()) { + for (SampleIndexVariant entry : other.getEntries()) { if (containsVariant(entry)) { c++; } @@ -239,8 +239,8 @@ public int containsVariants(SampleIndexGtEntryBuilder other) { } private class SampleIndexGtEntryBuilderAssumeOrdered extends SampleIndexGtEntryBuilder { - protected final ArrayDeque entries; - protected SampleVariantIndexEntry lastEntry; + protected final ArrayDeque entries; + protected SampleIndexVariant lastEntry; SampleIndexGtEntryBuilderAssumeOrdered(String gt) { super(gt); @@ -248,12 +248,12 @@ private class SampleIndexGtEntryBuilderAssumeOrdered extends SampleIndexGtEntryB } @Override - public Collection getEntries() { + public Collection getEntries() { return entries; } @Override - public boolean add(SampleVariantIndexEntry variantIndexEntry) { + public boolean add(SampleIndexVariant variantIndexEntry) { if (lastEntry != null && comparator.compare(lastEntry, variantIndexEntry) >= 0) { // Small out-of-order is expected in duplicated variants. // The order regarding the comparator will depend not only on the "variant", but @@ -267,7 +267,7 @@ public boolean add(SampleVariantIndexEntry variantIndexEntry) { } // Insert ordered. Take out values into a Deque to find the position where the entry // should be placed. - ArrayDeque removedEntries = new ArrayDeque<>(1); + ArrayDeque removedEntries = new ArrayDeque<>(1); do { // Add first to preserve order removedEntries.addFirst(entries.removeLast()); @@ -282,8 +282,8 @@ public boolean add(SampleVariantIndexEntry variantIndexEntry) { } @Override - public boolean containsVariant(SampleVariantIndexEntry variantIndexEntry) { - for (SampleVariantIndexEntry entry : entries) { + public boolean containsVariant(SampleIndexVariant variantIndexEntry) { + for (SampleIndexVariant entry : entries) { if (entry.getVariant().sameGenomicVariant(variantIndexEntry.getVariant())) { return true; } @@ -293,7 +293,7 @@ public boolean containsVariant(SampleVariantIndexEntry variantIndexEntry) { @Override public int containsVariants(SampleIndexGtEntryBuilder other) { - TreeSet tree = new TreeSet<>(comparator); + TreeSet tree = new TreeSet<>(comparator); tree.addAll(this.entries); return new SampleIndexGtEntryBuilderTreeSet(getGt(), tree).containsVariants(other); } @@ -302,7 +302,7 @@ public int containsVariants(SampleIndexGtEntryBuilder other) { private class SampleIndexGtEntryBuilderWithPartialBuilds extends SampleIndexGtEntryBuilderAssumeOrdered { private final int lowerThreshold; private final int upperThreshold; - private SampleVariantIndexEntry prev = null; + private SampleIndexVariant prev = null; // Variants is a shared object. No problem for the GC. private final ArrayList variants = new ArrayList<>(0); // This is the real issue. This might produce the "too many objects" problem. Need to run "partial builds" from time to time. @@ -320,7 +320,7 @@ private class SampleIndexGtEntryBuilderWithPartialBuilds extends SampleIndexGtEn } @Override - public boolean add(SampleVariantIndexEntry variantIndexEntry) { + public boolean add(SampleIndexVariant variantIndexEntry) { boolean add = super.add(variantIndexEntry); if (entries.size() >= upperThreshold) { partialBuild(false); @@ -330,7 +330,7 @@ public boolean add(SampleVariantIndexEntry variantIndexEntry) { } @Override - public boolean containsVariant(SampleVariantIndexEntry variantIndexEntry) { + public boolean containsVariant(SampleIndexVariant variantIndexEntry) { return containsVariant(variantIndexEntry.getVariant()); } @@ -340,7 +340,7 @@ public boolean containsVariant(Variant variant) { return true; } } - for (SampleVariantIndexEntry entry : entries) { + for (SampleIndexVariant entry : entries) { if (entry.getVariant().sameGenomicVariant(variant)) { return true; } @@ -353,7 +353,7 @@ public int containsVariants(SampleIndexGtEntryBuilder other) { // Build a temporary TreeSet for fast searching. TreeSet set = new TreeSet<>(INTRA_CHROMOSOME_VARIANT_COMPARATOR); set.addAll(variants); - for (SampleVariantIndexEntry entry : entries) { + for (SampleIndexVariant entry : entries) { set.add(entry.getVariant()); } @@ -365,7 +365,7 @@ public int containsVariants(SampleIndexGtEntryBuilder other) { } } } - for (SampleVariantIndexEntry otherEntry : other.getEntries()) { + for (SampleIndexVariant otherEntry : other.getEntries()) { if (set.contains(otherEntry.getVariant())) { c++; } @@ -381,7 +381,7 @@ private void partialBuild(boolean flush) { variants.ensureCapacity(variants.size() + entries.size()); int processedEntries = 0; while (!entries.isEmpty()) { - SampleVariantIndexEntry gtEntry = entries.removeFirst(); + SampleIndexVariant gtEntry = entries.removeFirst(); Variant variant = gtEntry.getVariant(); // This if-statement won't be executed in "flush==true" if (processedEntries >= entriesToProcess) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java index 2fc267b7ff8..994b26c86b0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java @@ -40,7 +40,7 @@ import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.*; import static org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantSqlQueryParser.DEFAULT_LOADED_GENOTYPES; -import static org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter.*; +import static org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter.*; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema.INTRA_CHROMOSOME_VARIANT_COMPARATOR; /** diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index a0e4ff5905f..5f9c4088484 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -25,7 +25,7 @@ * {@link SampleIndexEntry}: HBase row. Contains all the information from a sample in a specific region. * {@link SampleIndexEntry.SampleIndexGtEntry}: HBase columns grouped by genotype. * {@link SampleIndexEntryIterator}: Iterator over the variants of a {@link SampleIndexEntry} - * {@link SampleVariantIndexEntry}: Logical view over an entry for a specific variant and corresponding keys + * {@link SampleIndexVariant}: Logical view over an entry for a specific variant and corresponding keys *

* - Row : {SAMPLE_ID}_{CHROMOSOME}_{BATCH_START} * - Variants columns: {GT} -> [{variant1}, {variant2}, {variant3}, ...] diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariant.java similarity index 83% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariant.java index f346cf69180..9131c60e044 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntry.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariant.java @@ -5,7 +5,7 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantAvro; import org.opencb.opencga.storage.core.io.bit.BitBuffer; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotation; import java.nio.ByteBuffer; import java.util.Collections; @@ -15,24 +15,24 @@ import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema.INTRA_CHROMOSOME_VARIANT_COMPARATOR; -public class SampleVariantIndexEntry { +public class SampleIndexVariant { private final Variant variant; private final String genotype; private final List filesIndex; private final List fileData; - private final AnnotationIndexEntry annotationIndexEntry; + private final SampleIndexVariantAnnotation annotationIndex; private final Integer meCode; private final Byte parentsCode; - public SampleVariantIndexEntry(Variant variant, BitBuffer fileIndex, ByteBuffer fileData) { + public SampleIndexVariant(Variant variant, BitBuffer fileIndex, ByteBuffer fileData) { this(variant, Collections.singletonList(fileIndex), fileData == null ? Collections.emptyList() : Collections.singletonList(fileData), null, null, null, null); } - public SampleVariantIndexEntry(Variant variant, List filesIndex, List fileData, String genotype, - AnnotationIndexEntry annotationIndexEntry, Byte parentsCode, Integer meCode) { + public SampleIndexVariant(Variant variant, List filesIndex, List fileData, String genotype, + SampleIndexVariantAnnotation annotationIndex, Byte parentsCode, Integer meCode) { if (CollectionUtils.isEmpty(variant.getImpl().getStudies())) { this.variant = variant; } else { @@ -53,7 +53,7 @@ public SampleVariantIndexEntry(Variant variant, List filesIndex, Lis this.filesIndex = filesIndex; this.fileData = fileData; this.genotype = genotype; - this.annotationIndexEntry = annotationIndexEntry; + this.annotationIndex = annotationIndex; this.meCode = meCode; this.parentsCode = parentsCode; } @@ -91,8 +91,8 @@ public Byte getParentsCode() { return parentsCode; } - public AnnotationIndexEntry getAnnotationIndexEntry() { - return annotationIndexEntry; + public SampleIndexVariantAnnotation getAnnotationIndexEntry() { + return annotationIndex; } @Override @@ -103,7 +103,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - SampleVariantIndexEntry that = (SampleVariantIndexEntry) o; + SampleIndexVariant that = (SampleIndexVariant) o; return filesIndex.equals(that.filesIndex) && Objects.equals(variant, that.variant); } @@ -143,22 +143,22 @@ public String toString(SampleIndexSchema schema, String separator) { sb.append(separator).append("parents: ") .append(this.parentsCode); - if (annotationIndexEntry != null) { - annotationIndexEntry.toString(schema, separator, sb); + if (annotationIndex != null) { + annotationIndex.toString(schema, separator, sb); } return sb.toString(); } - public static class SampleVariantIndexEntryComparator implements Comparator { + public static class SampleIndexVariantComparator implements Comparator { private final SampleIndexSchema schema; - public SampleVariantIndexEntryComparator(SampleIndexSchema schema) { + public SampleIndexVariantComparator(SampleIndexSchema schema) { this.schema = schema; } @Override - public int compare(SampleVariantIndexEntry o1, SampleVariantIndexEntry o2) { + public int compare(SampleIndexVariant o1, SampleIndexVariant o2) { int compare = INTRA_CHROMOSOME_VARIANT_COMPARATOR.compare(o1.variant, o2.variant); if (compare != 0) { return compare; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java index 1b26888615e..4e61f5400aa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverter.java @@ -8,7 +8,7 @@ import org.opencb.opencga.storage.core.io.bit.BitBuffer; import org.opencb.opencga.storage.core.io.bit.BitInputStream; import org.opencb.opencga.storage.hadoop.variant.adaptors.phoenix.VariantPhoenixKeyFactory; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexEntry; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotation; import org.opencb.opencga.storage.hadoop.variant.index.family.MendelianErrorSampleIndexEntryIterator; import java.io.ByteArrayOutputStream; @@ -217,14 +217,14 @@ private abstract static class SampleIndexGtEntryIterator implements SampleIndexE private int fileIndexIdx; // Index over file index array. Index of last visited fileIndex // Reuse the annotation index entry. Avoid create a new instance for each variant. - private final AnnotationIndexEntry annotationIndexEntry; + private final SampleIndexVariantAnnotation annotationIndex; private int annotationIndexEntryIdx; private BitInputStream clinicalIndex; SampleIndexGtEntryIterator(SampleIndexSchema schema) { nonIntergenicCount = 0; clinicalCount = 0; - annotationIndexEntry = AnnotationIndexEntry.empty(schema); + annotationIndex = SampleIndexVariantAnnotation.empty(schema); annotationIndexEntryIdx = -1; fileIndexIdx = 0; fileIndexCount = 0; @@ -319,9 +319,9 @@ public byte nextParentsIndexEntry() { return gtEntry.getParentsIndex(nextIndex()); } - public AnnotationIndexEntry nextAnnotationIndexEntry() { + public SampleIndexVariantAnnotation nextAnnotationIndexEntry() { if (annotationIndexEntryIdx == nextIndex()) { - return annotationIndexEntry; + return annotationIndex; } if (gtEntry.getAnnotationIndex() == null && popFreq == null) { @@ -329,35 +329,35 @@ public AnnotationIndexEntry nextAnnotationIndexEntry() { } int idx = nextIndex(); - annotationIndexEntry.clear(); + annotationIndex.clear(); if (gtEntry.getAnnotationIndex() != null) { - annotationIndexEntry.setSummaryIndex(gtEntry.getAnnotationIndex(idx)); - boolean nonIntergenic = AbstractSampleIndexEntryFilter.isNonIntergenic(annotationIndexEntry.getSummaryIndex()); - annotationIndexEntry.setIntergenic(!nonIntergenic); + annotationIndex.setSummaryIndex(gtEntry.getAnnotationIndex(idx)); + boolean nonIntergenic = AbstractSampleIndexEntryFilter.isNonIntergenic(annotationIndex.getSummaryIndex()); + annotationIndex.setIntergenic(!nonIntergenic); if (nonIntergenic) { int nextNonIntergenic = nextNonIntergenicIndex(); if (ctIndex != null) { - annotationIndexEntry.setCtIndex(schema.getCtIndex().readFieldValue(ctIndex, nextNonIntergenic)); + annotationIndex.setCtIndex(schema.getCtIndex().readFieldValue(ctIndex, nextNonIntergenic)); } if (btIndex != null) { - annotationIndexEntry.setBtIndex(schema.getBiotypeIndex().readFieldValue(btIndex, nextNonIntergenic)); + annotationIndex.setBtIndex(schema.getBiotypeIndex().readFieldValue(btIndex, nextNonIntergenic)); } if (tfIndex != null) { - annotationIndexEntry.setTfIndex(schema.getTranscriptFlagIndexSchema().readFieldValue(tfIndex, nextNonIntergenic)); + annotationIndex.setTfIndex(schema.getTranscriptFlagIndexSchema().readFieldValue(tfIndex, nextNonIntergenic)); } if (ctBtTfIndex != null - && annotationIndexEntry.getCtIndex() != 0 - && annotationIndexEntry.getBtIndex() != 0 - && annotationIndexEntry.getTfIndex() != 0) { + && annotationIndex.getCtIndex() != 0 + && annotationIndex.getBtIndex() != 0 + && annotationIndex.getTfIndex() != 0) { schema.getCtBtTfIndex().getField().read( ctBtTfIndex, - annotationIndexEntry.getCtIndex(), - annotationIndexEntry.getBtIndex(), - annotationIndexEntry.getTfIndex(), - annotationIndexEntry.getCtBtTfCombination()); + annotationIndex.getCtIndex(), + annotationIndex.getBtIndex(), + annotationIndex.getTfIndex(), + annotationIndex.getCtBtTfCombination()); } } } @@ -365,21 +365,21 @@ public AnnotationIndexEntry nextAnnotationIndexEntry() { if (popFreq != null) { // TODO: Reuse BitBuffer BitBuffer popFreqIndex = popFreq.readBitBuffer(schema.getPopFreqIndex().getBitsLength()); - annotationIndexEntry.setPopFreqIndex(popFreqIndex); + annotationIndex.setPopFreqIndex(popFreqIndex); } if (gtEntry.getClinicalIndex() != null) { - boolean clinical = AbstractSampleIndexEntryFilter.isClinical(annotationIndexEntry.getSummaryIndex()); - annotationIndexEntry.setHasClinical(clinical); + boolean clinical = AbstractSampleIndexEntryFilter.isClinical(annotationIndex.getSummaryIndex()); + annotationIndex.setHasClinical(clinical); if (clinical) { int nextClinical = nextClinicalIndex(); // TODO: Reuse BitBuffer - annotationIndexEntry.setClinicalIndex(schema.getClinicalIndexSchema().readDocument(clinicalIndex, nextClinical)); + annotationIndex.setClinicalIndex(schema.getClinicalIndexSchema().readDocument(clinicalIndex, nextClinical)); } } annotationIndexEntryIdx = idx; - return annotationIndexEntry; + return annotationIndex; } @Override @@ -507,7 +507,7 @@ public byte nextParentsIndexEntry() { } @Override - public AnnotationIndexEntry nextAnnotationIndexEntry() { + public SampleIndexVariantAnnotation nextAnnotationIndexEntry() { throw new NoSuchElementException("Empty iterator"); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantConverter.java similarity index 94% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantConverter.java index d7200fcc234..895b3f3c859 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantConverter.java @@ -18,17 +18,17 @@ import java.util.function.Function; import java.util.function.IntFunction; -public class SampleVariantIndexEntryConverter { +public class SampleIndexVariantConverter { private final FileIndexSchema fileIndex; private final FileDataSchema fileDataSchema; - public SampleVariantIndexEntryConverter(SampleIndexSchema configuration) { + public SampleIndexVariantConverter(SampleIndexSchema configuration) { fileIndex = configuration.getFileIndex(); fileDataSchema = configuration.getFileData(); } - public SampleVariantIndexEntry createSampleVariantIndexEntry(int sampleIdx, int filePosition, Variant variant) { + public SampleIndexVariant createSampleIndexVariant(int sampleIdx, int filePosition, Variant variant) { // Expecting only one study and only one file StudyEntry study = variant.getStudies().get(0); FileEntry file = study.getFiles().get(0); @@ -38,17 +38,17 @@ public SampleVariantIndexEntry createSampleVariantIndexEntry(int sampleIdx, int ByteBuffer fileDataIndexValue = createFileDataIndexValue(variant, filePosition, file.getCall(), study.getSecondaryAlternates()); - return new SampleVariantIndexEntry(variant, fileIndexValue, fileDataIndexValue); + return new SampleIndexVariant(variant, fileIndexValue, fileDataIndexValue); } - public SampleVariantIndexEntry createSampleVariantIndexEntry( + public SampleIndexVariant createSampleIndexVariant( int filePosition, Variant variant, OriginalCall call, List alts, Function fileAttributes, Function sampleData) { BitBuffer fileIndexValue = createFileIndexValue(variant.getType(), filePosition, fileAttributes, sampleData); ByteBuffer fileDataIndexValue = createFileDataIndexValue(variant, filePosition, call, alts); - return new SampleVariantIndexEntry(variant, fileIndexValue, fileDataIndexValue); + return new SampleIndexVariant(variant, fileIndexValue, fileDataIndexValue); } /** diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java index d52b17d3c97..7bb7fe0e10e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java @@ -463,7 +463,7 @@ public static void printSampleIndexTable2(VariantHadoopDBAdaptor dbAdaptor, Path out.println(""); out.println("SAMPLE: " + sampleName + " (id=" + sampleId + ")"); while (it.hasNext()) { - SampleVariantIndexEntry entry = it.next(); + SampleIndexVariant entry = it.next(); out.println("_______________________"); out.println(entry.toString(schema)); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexConverterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverterTest.java similarity index 96% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexConverterTest.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverterTest.java index 14b8ef87c73..d240fb67007 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/AnnotationIndexConverterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverterTest.java @@ -19,7 +19,7 @@ import java.util.List; import static org.junit.Assert.assertEquals; -import static org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter.*; +import static org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter.*; /** * Created on 17/04/19. @@ -27,9 +27,9 @@ * @author Jacobo Coll <jacobo167@gmail.com> */ @Category(ShortTests.class) -public class AnnotationIndexConverterTest { +public class SampleIndexVariantAnnotationConverterTest { - private AnnotationIndexConverter converter; + private SampleIndexVariantAnnotationConverter converter; byte b; private SampleIndexSchema schema; @@ -49,7 +49,7 @@ public void setUp() throws Exception { populations.stream().map(SampleIndexConfiguration.Population::new).forEach(configuration::addPopulation); schema = new SampleIndexSchema(configuration, StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION); - converter = new AnnotationIndexConverter(schema); + converter = new SampleIndexVariantAnnotationConverter(schema); } // @After @@ -178,7 +178,7 @@ public void testDuplicatedPopulations() { List populations = Arrays.asList("1000G:ALL", "GNOMAD_GENOMES:ALL", "1000G:ALL"); SampleIndexConfiguration configuration = new SampleIndexConfiguration(); populations.stream().map(SampleIndexConfiguration.Population::new).forEach(configuration::addPopulation); - new AnnotationIndexConverter(new SampleIndexSchema(configuration, StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION)); + new SampleIndexVariantAnnotationConverter(new SampleIndexSchema(configuration, StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION)); } @Test diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexConverterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexConverterTest.java index f719a97b078..e78a7aa4a6b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexConverterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/family/MendelianErrorSampleIndexConverterTest.java @@ -4,7 +4,7 @@ import org.junit.experimental.categories.Category; import org.opencb.biodata.models.variant.Variant; import org.opencb.opencga.core.testclassification.duration.ShortTests; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexEntry; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; @@ -59,7 +59,7 @@ private void checkIterator(List variants01, List variants11, S byte[] annot = new byte[numVariants]; for (int i = 0; i < numVariants; i++) { if (i % 3 != 0) { - annot[i] = AnnotationIndexConverter.INTERGENIC_MASK; + annot[i] = SampleIndexVariantAnnotationConverter.INTERGENIC_MASK; } } entry.getGtEntry("0/1").setAnnotationIndex(annot); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java index a631c07e099..11d3ac453e9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java @@ -19,8 +19,8 @@ import org.opencb.opencga.storage.core.variant.query.OpValue; import org.opencb.opencga.storage.core.variant.query.Values; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexPutBuilder; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationPutBuilder; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.RangeIndexFieldFilter; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleAnnotationIndexQuery; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleFileIndexQuery; @@ -33,7 +33,7 @@ import java.util.Map; import java.util.stream.Collectors; -import static org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverterTest.*; +import static org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverterTest.*; @Category(ShortTests.class) public class SampleIndexEntryFilterTest { @@ -123,9 +123,9 @@ public void testCtBtCombinationFilter() { } private SampleIndexEntry getSampleIndexEntry1() { - AnnotationIndexConverter converter = new AnnotationIndexConverter(schema); + SampleIndexVariantAnnotationConverter converter = new SampleIndexVariantAnnotationConverter(schema); //{0.001, 0.005, 0.01}; - Map map = new AnnotationIndexPutBuilder(schema) + Map map = new SampleIndexVariantAnnotationPutBuilder(schema) .add(converter.convert(annot( pf("s1", "ALL", 0.0), pf("s2", "ALL", 0.0), @@ -199,9 +199,9 @@ private SampleIndexEntry getSampleIndexEntry1() { } private SampleIndexEntry getSampleIndexEntry2() { - AnnotationIndexConverter converter = new AnnotationIndexConverter(schema); + SampleIndexVariantAnnotationConverter converter = new SampleIndexVariantAnnotationConverter(schema); - Map map = new AnnotationIndexPutBuilder(schema) + Map map = new SampleIndexVariantAnnotationPutBuilder(schema) .add(converter.convert(annot( ct("missense_variant", "protein_coding"), ct("start_lost", "nonsense_mediated_decay"), diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java index 1f64db0132b..816588fd4a7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilderTest.java @@ -37,27 +37,27 @@ public void testContains() { assertFalse(builder.containsVariant(newVariantIndexEntry("1:500:A:C", (short) 30))); } - protected SampleVariantIndexEntry newVariantIndexEntry(String s, short i) { + protected SampleIndexVariant newVariantIndexEntry(String s, short i) { byte[] v = new byte[2]; Bytes.putShort(v, 0, i); - return new SampleVariantIndexEntry(new Variant(s), new BitBuffer(v), null); + return new SampleIndexVariant(new Variant(s), new BitBuffer(v), null); } @Test public void testBuild() { SampleIndexSchema schema = SampleIndexSchema.defaultSampleIndexSchema(); - SampleVariantIndexEntryConverter c = new SampleVariantIndexEntryConverter(schema); + SampleIndexVariantConverter c = new SampleIndexVariantConverter(schema); SampleIndexEntryPutBuilder builder = new SampleIndexEntryPutBuilder(1, "1", 10, schema, false, true); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:100:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 1, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - assertTrue(builder.add("1/1", new SampleVariantIndexEntry(new Variant("1:300:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - assertTrue(builder.add("1/1", new SampleVariantIndexEntry(new Variant("1:400:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - - assertFalse(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - assertTrue(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); - assertFalse(builder.add("0/1", new SampleVariantIndexEntry(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleIndexVariant(new Variant("1:100:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleIndexVariant(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 1, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleIndexVariant(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("1/1", new SampleIndexVariant(new Variant("1:300:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("1/1", new SampleIndexVariant(new Variant("1:400:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + + assertFalse(builder.add("0/1", new SampleIndexVariant(new Variant("1:200:A:C"), c.createFileIndexValue(SNV, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertTrue(builder.add("0/1", new SampleIndexVariant(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); + assertFalse(builder.add("0/1", new SampleIndexVariant(new Variant("1:200:A:C"), c.createFileIndexValue(INDEL, 0, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList()), null))); Put build = builder.build(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 82a1e43a53b..e116f29a9be 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -28,7 +28,7 @@ import org.opencb.opencga.storage.core.variant.query.Values; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.hadoop.variant.HadoopVariantQueryParser; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter; import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.RangeIndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter; @@ -46,7 +46,7 @@ import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.*; import static org.opencb.opencga.storage.hadoop.variant.index.IndexUtils.EMPTY_MASK; -import static org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter.*; +import static org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter.*; import static org.opencb.opencga.storage.hadoop.variant.index.core.RangeIndexField.DELTA; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser.buildLocusQueries; import static org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser.validSampleIndexQuery; @@ -1675,7 +1675,7 @@ public void testCoveredQuery_popFreq() { SampleAnnotationIndexQuery indexQuery; // Query fully covered by summary index. - query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), String.join(OR, new ArrayList<>(AnnotationIndexConverter.POP_FREQ_ANY_001_FILTERS))); + query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), String.join(OR, new ArrayList<>(SampleIndexVariantAnnotationConverter.POP_FREQ_ANY_001_FILTERS))); indexQuery = parseAnnotationIndexQuery(query, true); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndexMask() & POP_FREQ_ANY_001_MASK); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndex() & POP_FREQ_ANY_001_MASK); @@ -1708,7 +1708,7 @@ public void testCoveredQuery_popFreq() { assertEquals(Collections.singleton(ANNOT_POPULATION_ALTERNATE_FREQUENCY), validParams(query, true)); // Summary index query plus a new filter. Use only popFreqIndex - query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), String.join(OR, new ArrayList<>(AnnotationIndexConverter.POP_FREQ_ANY_001_FILTERS)) + OR + "s1:ALL<0.005"); + query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), String.join(OR, new ArrayList<>(SampleIndexVariantAnnotationConverter.POP_FREQ_ANY_001_FILTERS)) + OR + "s1:ALL<0.005"); indexQuery = parseAnnotationIndexQuery(query, true); assertEquals(EMPTY_MASK, indexQuery.getAnnotationIndexMask() & POP_FREQ_ANY_001_MASK); assertEquals(EMPTY_MASK, indexQuery.getAnnotationIndex() & POP_FREQ_ANY_001_MASK); @@ -1717,7 +1717,7 @@ public void testCoveredQuery_popFreq() { assertEquals(Collections.emptySet(), validParams(query, true)); // Summary index query with AND instead of OR filter. Use both, summary and popFreqIndex - query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), String.join(AND, new ArrayList<>(AnnotationIndexConverter.POP_FREQ_ANY_001_FILTERS))); + query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), String.join(AND, new ArrayList<>(SampleIndexVariantAnnotationConverter.POP_FREQ_ANY_001_FILTERS))); indexQuery = parseAnnotationIndexQuery(query, true); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndexMask() & POP_FREQ_ANY_001_MASK); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndex() & POP_FREQ_ANY_001_MASK); @@ -1727,7 +1727,7 @@ public void testCoveredQuery_popFreq() { // Summary index query with AND instead of OR filter plus a new filter. Use both, summary and popFreqIndex. Leave extra filter in query query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), - String.join(AND, new ArrayList<>(AnnotationIndexConverter.POP_FREQ_ANY_001_FILTERS)) + AND + "s1:ALL<0.065132"); + String.join(AND, new ArrayList<>(SampleIndexVariantAnnotationConverter.POP_FREQ_ANY_001_FILTERS)) + AND + "s1:ALL<0.065132"); indexQuery = parseAnnotationIndexQuery(query, true); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndexMask() & POP_FREQ_ANY_001_MASK); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndex() & POP_FREQ_ANY_001_MASK); @@ -1737,7 +1737,7 @@ public void testCoveredQuery_popFreq() { // Summary index query with AND instead of OR filter plus a new filter. Use both, summary and popFreqIndex. Clear covered query query = new Query().append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), - String.join(AND, new ArrayList<>(AnnotationIndexConverter.POP_FREQ_ANY_001_FILTERS)) + AND + "s1:ALL<0.005"); + String.join(AND, new ArrayList<>(SampleIndexVariantAnnotationConverter.POP_FREQ_ANY_001_FILTERS)) + AND + "s1:ALL<0.005"); indexQuery = parseAnnotationIndexQuery(query, true); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndexMask() & POP_FREQ_ANY_001_MASK); assertEquals(POP_FREQ_ANY_001_MASK, indexQuery.getAnnotationIndex() & POP_FREQ_ANY_001_MASK); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverterTest.java index 6b135faadc2..7946153ca5f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantBiConverterTest.java @@ -8,7 +8,7 @@ import org.junit.experimental.categories.Category; import org.opencb.biodata.models.variant.Variant; import org.opencb.opencga.core.testclassification.duration.ShortTests; -import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; +import org.opencb.opencga.storage.hadoop.variant.index.annotation.SampleIndexVariantAnnotationConverter; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -182,7 +182,7 @@ private void checkIterator(int numVariants, List variants, SampleIndexE byte[] annot = new byte[numVariants]; for (int i = 0; i < numVariants; i++) { if (i % 3 != 0) { - annot[i] = AnnotationIndexConverter.INTERGENIC_MASK; + annot[i] = SampleIndexVariantAnnotationConverter.INTERGENIC_MASK; } } entry.setAnnotationIndex(annot); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantConverterTest.java similarity index 95% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantConverterTest.java index 5199fbc4f45..28426c53eb8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantConverterTest.java @@ -16,14 +16,14 @@ import static org.junit.Assert.assertEquals; @Category(ShortTests.class) -public class SampleVariantIndexEntryConverterTest { +public class SampleIndexVariantConverterTest { - private SampleVariantIndexEntryConverter converter; + private SampleIndexVariantConverter converter; private FileIndexSchema fileIndex; @Before public void setUp() throws Exception { - converter = new SampleVariantIndexEntryConverter(SampleIndexSchema.defaultSampleIndexSchema()); + converter = new SampleIndexVariantConverter(SampleIndexSchema.defaultSampleIndexSchema()); fileIndex = (SampleIndexSchema.defaultSampleIndexSchema()).getFileIndex(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantTest.java similarity index 72% rename from opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java rename to opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantTest.java index 7b5165ab5c6..4142450ae56 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexVariantTest.java @@ -15,7 +15,7 @@ import java.util.List; @Category(ShortTests.class) -public class SampleVariantIndexEntryTest { +public class SampleIndexVariantTest { private FileIndexSchema fileIndex; @@ -27,7 +27,7 @@ public void setUp() throws Exception { @Test public void testComparator() { - List expected = Arrays.asList( + List expected = Arrays.asList( newVariantIndexEntry("1:100:A:C", 0), newVariantIndexEntry("1:200:A:C", ((1 << 4)), true), newVariantIndexEntry("1:200:A:C", (2 << 4)), @@ -36,10 +36,10 @@ public void testComparator() { ); for (int i = 0; i < 10; i++) { - ArrayList actual = new ArrayList<>(expected); + ArrayList actual = new ArrayList<>(expected); Collections.shuffle(actual); - actual.sort(new SampleVariantIndexEntry.SampleVariantIndexEntryComparator(SampleIndexSchema.defaultSampleIndexSchema())); + actual.sort(new SampleIndexVariant.SampleIndexVariantComparator(SampleIndexSchema.defaultSampleIndexSchema())); Assert.assertEquals(expected, actual); } @@ -47,17 +47,17 @@ public void testComparator() { } - protected SampleVariantIndexEntry newVariantIndexEntry(String s, int i) { + protected SampleIndexVariant newVariantIndexEntry(String s, int i) { return newVariantIndexEntry(s, i, false); } - protected SampleVariantIndexEntry newVariantIndexEntry(String s, int i, boolean multiFileIndex) { + protected SampleIndexVariant newVariantIndexEntry(String s, int i, boolean multiFileIndex) { byte[] v = new byte[4]; Bytes.putInt(v, 0, i); BitBuffer fileIndex = new BitBuffer(v); if (multiFileIndex) { this.fileIndex.setMultiFile(fileIndex, 0); } - return new SampleVariantIndexEntry(new Variant(s), fileIndex, null); + return new SampleIndexVariant(new Variant(s), fileIndex, null); } } \ No newline at end of file From ec84b74e6130e8b78264aeb7b2e8294f1e2437c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 18 Oct 2024 17:11:55 +0100 Subject: [PATCH 15/19] storage: Fix compilation issue. #TASK-6765 --- .../storage/hadoop/variant/index/sample/FileDataSchema.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java index 43dc9c289ef..9b9d15a30b8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataSchema.java @@ -100,7 +100,8 @@ public Pair to(Pair pair) { alternate += call.getVariantId().substring(alternatesIdx); } String alternateAndExtras = VariantPhoenixKeyFactory - .buildSymbolicAlternate(reference, alternate, originalVariant.getEnd(), originalVariant.getSv()); + .buildSymbolicAlternate(reference, alternate, + originalVariant.getStart(), originalVariant.getEnd(), originalVariant.getSv()); int relativeStart = originalVariant.getStart() - variant.getStart(); From d7f17b1cfc7d3356e839c4853e0b6e1ece7f678c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 18 Oct 2024 17:18:22 +0100 Subject: [PATCH 16/19] storage: Rename "canUseThisExecutor" with "acceptsQuery" #TASK-6765 --- .../storage/core/variant/VariantStorageEngine.java | 2 +- .../executors/BreakendVariantQueryExecutor.java | 2 +- .../executors/CompoundHeterozygousQueryExecutor.java | 2 +- .../executors/DBAdaptorVariantQueryExecutor.java | 3 +-- .../query/executors/NoOpVariantQueryExecutor.java | 3 +-- .../query/executors/VariantQueryExecutor.java | 12 ++++++------ .../SamplesSearchIndexVariantQueryExecutor.java | 3 ++- .../search/SearchIndexVariantQueryExecutor.java | 3 ++- .../query/executors/VariantQueryExecutorTest.java | 8 ++++---- .../HBaseColumnIntersectVariantQueryExecutor.java | 3 ++- .../SampleIndexMendelianErrorQueryExecutor.java | 5 ++--- .../index/SampleIndexOnlyVariantQueryExecutor.java | 3 ++- .../index/SampleIndexVariantQueryExecutor.java | 3 ++- 13 files changed, 27 insertions(+), 25 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 4a52bdedaf0..d861c5ec95f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1357,7 +1357,7 @@ public VariantQueryExecutor getVariantQueryExecutor(Query query, QueryOptions op public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQuery) { try { for (VariantQueryExecutor executor : getVariantQueryExecutors()) { - if (executor.canUseThisExecutor(variantQuery)) { + if (executor.acceptsQuery(variantQuery)) { logger.info("Using VariantQueryExecutor : " + executor.getClass().getName()); logger.info(" Query : " + VariantQueryUtils.printQuery(variantQuery.getInputQuery())); logger.info(" Options : " + variantQuery.getInputOptions().toJson()); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java index bc40c5b6418..266399400c0 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/BreakendVariantQueryExecutor.java @@ -41,7 +41,7 @@ public BreakendVariantQueryExecutor(String storageEngineId, ObjectMap options, } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { VariantQuery query = variantQuery.getQuery(); return query.getString(VariantQueryParam.TYPE.key()).equals(VariantType.BREAKEND.name()) && VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java index 596b4a5bf87..61a15ea25db 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java @@ -61,7 +61,7 @@ public CompoundHeterozygousQueryExecutor(VariantStorageMetadataManager metadataM } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { return isValidParam(variantQuery.getQuery(), VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java index f83d938534c..eda6b070b61 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/DBAdaptorVariantQueryExecutor.java @@ -2,7 +2,6 @@ import org.opencb.biodata.models.variant.Variant; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.datastore.core.QueryParam; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor; @@ -54,7 +53,7 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { for (QueryParam unsupportedParam : UNSUPPORTED_PARAMS) { if (VariantQueryUtils.isValidParam(variantQuery.getQuery(), unsupportedParam)) { logger.warn("Unsupported variant query param {} in {}", diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java index 5f2da2c2056..2f57a73a551 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/NoOpVariantQueryExecutor.java @@ -2,7 +2,6 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; -import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; @@ -35,7 +34,7 @@ public NoOpVariantQueryExecutor(VariantStorageMetadataManager metadataManager, S } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { VariantQuery query = variantQuery.getQuery(); boolean sampleQuery = false; String sample = null; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java index c08f2626afb..feb65fc1b91 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutor.java @@ -69,8 +69,8 @@ public static void setDefaultTimeout(QueryOptions queryOptions, ObjectMap config * @return True if this variant query executor is valid for the query * @throws StorageEngineException if there is an error */ - public final boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { - boolean canUseThisExecutor = canUseThisExecutor(variantQuery, variantQuery.getInputOptions()); + public final boolean acceptsQuery(ParsedVariantQuery variantQuery) throws StorageEngineException { + boolean canUseThisExecutor = canUseThisExecutor(variantQuery); if (canUseThisExecutor) { if (variantQuery.getSource().isSecondary()) { // Querying for a secondary index source. This executor can only be used if the source is the same @@ -84,12 +84,12 @@ public final boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws /** * Internal method to determine if this VariantQueryExecutor can run the given query. - * @param variantQuery Query to execute - * @param options Options for the query - * @return True if this variant query executor is valid for the query + * + * @param variantQuery Query to execute + * @return True if this variant query executor is valid for the query * @throws StorageEngineException if there is an error */ - protected abstract boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException; + protected abstract boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException; protected abstract Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java index cfa794cbe10..8256257f752 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SamplesSearchIndexVariantQueryExecutor.java @@ -29,8 +29,9 @@ public SamplesSearchIndexVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Varian } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { VariantQuery query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); String samplesCollection = inferSpecificSearchIndexSamplesCollection(query, options, getMetadataManager(), dbName); return samplesCollection != null && searchActiveAndAlive(samplesCollection); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java index ccaae7dbe39..99c5048cf56 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantQueryExecutor.java @@ -73,8 +73,9 @@ public SearchIndexVariantQueryExecutor setIntersectParamsThreshold(int intersect } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) throws StorageEngineException { VariantQuery query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); return doQuerySearchManager(query, options) || doIntersectWithSearch(query, options); } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java index fcd1192df2c..7765ce03ef5 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/VariantQueryExecutorTest.java @@ -242,7 +242,7 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, logger.info("## Allowed VariantQueryExecutors:"); ParsedVariantQuery variantQuery = variantStorageEngine.parseQuery(query, options); for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { - if (variantQueryExecutor.canUseThisExecutor(variantQuery, options)) { + if (variantQueryExecutor.canUseThisExecutor(variantQuery)) { logger.info("## - " + variantQueryExecutor.getClass().getSimpleName()); } } @@ -250,12 +250,12 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, VariantQueryResult expected; if (expectDBAdaptorExecutor) { logger.info("## Using DBAdaptorVariantQueryExecutor for expected results"); - Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(variantQuery, options)); + Assert.assertTrue(dbQueryExecutor.canUseThisExecutor(variantQuery)); expected = dbQueryExecutor.get(variantQuery); } else { logger.info("## DBAdaptorVariantQueryExecutor can not be used for expected results"); - Assert.assertFalse(dbQueryExecutor.canUseThisExecutor(variantQuery, options)); + Assert.assertFalse(dbQueryExecutor.canUseThisExecutor(variantQuery)); expected = null; } @@ -302,7 +302,7 @@ public VariantQueryResult testQuery(Query query, QueryOptions options, } for (VariantQueryExecutor variantQueryExecutor : variantQueryExecutors) { - if (variantQueryExecutor.canUseThisExecutor(variantQuery, options)) { + if (variantQueryExecutor.canUseThisExecutor(variantQuery)) { logger.info(""); logger.info("###################"); logger.info("### Testing " + variantQueryExecutor.getClass().getSimpleName()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java index ceb52e5eeaa..680e478f7fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HBaseColumnIntersectVariantQueryExecutor.java @@ -44,8 +44,9 @@ public HBaseColumnIntersectVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Stri } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) { VariantQuery query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); if (!options.getBoolean(HBASE_COLUMN_INTERSECT, ACTIVE_BY_DEFAULT)) { // HBase column intersect not active return false; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java index 5063ca1fe4b..429c7d05f9d 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexMendelianErrorQueryExecutor.java @@ -9,7 +9,6 @@ import org.opencb.biodata.tools.pedigree.MendelianError; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; -import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; @@ -32,11 +31,11 @@ public SampleIndexMendelianErrorQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, } @Override - public boolean canUseThisExecutor(ParsedVariantQuery query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery query) { if (VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_MENDELIAN_ERROR) || VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_DE_NOVO) || VariantQueryUtils.isValidParam(query.getQuery(), VariantQueryUtils.SAMPLE_DE_NOVO_STRICT)) { - return super.canUseThisExecutor(query, options); + return super.canUseThisExecutor(query); } else { return false; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 34a511f0fc3..86d21f0f36c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -91,8 +91,9 @@ public SampleIndexOnlyVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, Sam } @Override - public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery variantQuery) { VariantQuery query = variantQuery.getQuery(); + QueryOptions options = variantQuery.getInputOptions(); if (variantQuery.getSource() == VariantQuerySource.SECONDARY_SAMPLE_INDEX) { if (SampleIndexQueryParser.validSampleIndexQuery(query) && isQueryCovered(query)) { return true; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java index 03e8a5fe880..67940511673 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantQueryExecutor.java @@ -58,7 +58,8 @@ public SampleIndexVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, SampleI } @Override - public boolean canUseThisExecutor(ParsedVariantQuery query, QueryOptions options) { + public boolean canUseThisExecutor(ParsedVariantQuery query) { + QueryOptions options = query.getInputOptions(); if (options.getBoolean(SAMPLE_INDEX_INTERSECT, true)) { return SampleIndexQueryParser.validSampleIndexQuery(query.getQuery()); } From 4f4ce7eb7146462a2288cd8db470485e95219c19 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 11 Nov 2024 10:14:49 +0100 Subject: [PATCH 17/19] Push echo to debug pull-request-approve #TASK-6765 --- .github/workflows/pull-request-approved.yml | 1 + .github/workflows/scripts/get-xetabase-branch.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index eb0e45fadce..0c148d9ac10 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -26,6 +26,7 @@ jobs: echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} + echo "xetabase_branch: ${xetabase_branch}" echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT env: ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }} diff --git a/.github/workflows/scripts/get-xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh index 90ab2c75747..40ad247ecf7 100644 --- a/.github/workflows/scripts/get-xetabase-branch.sh +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -8,7 +8,7 @@ get_xetabase_branch() { # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it if [[ $input_branch == TASK* ]]; then if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then - echo $input_branch; + echo "$input_branch"; return 0; fi fi From d6109e731205220a7faf27b13dab863b2059d11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Fri, 15 Nov 2024 11:17:09 +0000 Subject: [PATCH 18/19] storage: Fix tests. #TASK-6765 --- .../variant/manager/VariantStorageManager.java | 7 +++++-- .../core/metadata/VariantStorageMetadataManager.java | 11 ++--------- .../index/sample/SampleIndexQueryParserTest.java | 2 +- .../hadoop/variant/index/sample/SampleIndexTest.java | 11 +++++++---- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index 0c5d426b139..57e69ee4759 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -558,10 +558,13 @@ public OpenCGAResult configureSampleIndex(String studyStr, SampleIndexConfi String cellbaseVersion = engine.getCellBaseUtils().getVersionFromServer(); sampleIndexConfiguration.validate(cellbaseVersion); String studyFqn = getStudyFqn(studyStr, token); + int studyId; if (!engine.getMetadataManager().studyExists(studyFqn)) { - engine.getMetadataManager().createStudy(studyFqn, cellbaseVersion); + studyId = engine.getMetadataManager().createStudy(studyFqn, cellbaseVersion).getId(); + } else { + studyId = engine.getMetadataManager().getStudyId(studyFqn); } - engine.getMetadataManager().addSampleIndexConfiguration(studyFqn, sampleIndexConfiguration, true); + engine.getMetadataManager().addSampleIndexConfiguration(studyId, sampleIndexConfiguration, true); catalogManager.getStudyManager() .setVariantEngineConfigurationSampleIndex(studyStr, sampleIndexConfiguration, token); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 5a7eebb7386..c3457d5489b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -208,7 +208,7 @@ public Lock lockStudy(int studyId, long lockDuration, long timeout, String lockN // Test purposes only @Deprecated public StudyMetadata createStudy(String studyName) throws StorageEngineException { - return createStudy(studyName, "v5"); + return createStudy(studyName, ParamConstants.CELLBASE_VERSION); } public StudyMetadata createStudy(String studyName, String cellbaseVersion) throws StorageEngineException { @@ -223,14 +223,7 @@ public StudyMetadata createStudy(String studyName, String cellbaseVersion) throw } public StudyMetadata.SampleIndexConfigurationVersioned addSampleIndexConfiguration( - String study, SampleIndexConfiguration configuration, boolean staging) throws StorageEngineException { - Integer idOrNull = getStudyIdOrNull(study); - int studyId; - if (idOrNull == null) { - studyId = createStudy(study, null).getId(); - } else { - studyId = idOrNull; - } + int studyId, SampleIndexConfiguration configuration, boolean staging) throws StorageEngineException { StudyMetadata.SampleIndexConfigurationVersioned.Status status = staging ? StudyMetadata.SampleIndexConfigurationVersioned.Status.STAGING : StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 2bcb071b5ab..a1dc0a6bd99 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -85,7 +85,7 @@ public void setUp() throws Exception { mm = new VariantStorageMetadataManager(new DummyVariantStorageMetadataDBAdaptorFactory()); sampleIndexQueryParser = new SampleIndexQueryParser(mm); studyId = mm.createStudy("study").getId(); - int sampleIndexVersion = mm.addSampleIndexConfiguration("study", configuration, false).getVersion(); + int sampleIndexVersion = mm.addSampleIndexConfiguration(studyId, configuration, false).getVersion(); mm.addIndexedFiles(studyId, Arrays.asList(mm.registerFile(studyId, "F1", Arrays.asList("S1", "S2", "S3")))); mm.addIndexedFiles(studyId, Arrays.asList(mm.registerFile(studyId, "fam1", Arrays.asList("fam1_child", "fam1_father", "fam1_mother")))); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index d39356b2ec6..2c508095fe6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -160,7 +160,8 @@ public void load() throws Exception { configuration.getFileDataConfiguration() .setIncludeOriginalCall(null) .setIncludeSecondaryAlternates(null); - versioned = metadataManager.addSampleIndexConfiguration(STUDY_NAME_2, configuration, true); + metadataManager.createStudy(STUDY_NAME_2, engine.getCellBaseUtils().getVersionFromServer()); + versioned = metadataManager.addSampleIndexConfiguration(metadataManager.getStudyId(STUDY_NAME_2), configuration, true); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.STAGING, versioned.getStatus()); @@ -176,7 +177,8 @@ public void load() throws Exception { // Study 3 - platinum - metadataManager.addSampleIndexConfiguration(STUDY_NAME_3, SampleIndexConfiguration.defaultConfiguration() + metadataManager.createStudy(STUDY_NAME_3, engine.getCellBaseUtils().getVersionFromServer()); + metadataManager.addSampleIndexConfiguration(metadataManager.getStudyId(STUDY_NAME_3), SampleIndexConfiguration.defaultConfiguration() .addFileIndexField(new FieldConfiguration(FieldConfiguration.Source.FILE, "culprit", FieldConfiguration.Type.CATEGORICAL, "DP", "FS", "MQ", "QD").setNullable(true)), true); @@ -209,10 +211,11 @@ public void load() throws Exception { engine.familyIndex(STUDY_NAME_5, trios, new ObjectMap()); // Study 6, multiallelic + metadataManager.createStudy(STUDY_NAME_6, engine.getCellBaseUtils().getVersionFromServer()); SampleIndexConfiguration sampleIndexConfiguration = SampleIndexConfiguration.defaultConfiguration(); sampleIndexConfiguration.getFileIndexConfiguration().getCustomField(FieldConfiguration.Source.FILE, "FILTER") .setValues("PASS", "noPass", "noPass2"); - engine.getMetadataManager().addSampleIndexConfiguration(STUDY_NAME_6, sampleIndexConfiguration, true); + engine.getMetadataManager().addSampleIndexConfiguration(metadataManager.getStudyId(STUDY_NAME_6), sampleIndexConfiguration, true); params = new ObjectMap() .append(VariantStorageOptions.STUDY.key(), STUDY_NAME_6) @@ -230,7 +233,7 @@ public void load() throws Exception { configuration = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest().getConfiguration(); // Don't modify the configuration. - versioned = engine.getMetadataManager().addSampleIndexConfiguration(STUDY_NAME, configuration, true); + versioned = engine.getMetadataManager().addSampleIndexConfiguration(metadataManager.getStudyId(STUDY_NAME), configuration, true); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.STAGING, versioned.getStatus()); From 80de0c31c9333027537f0aea2706ece87d7eb4c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 27 Nov 2024 17:37:52 +0000 Subject: [PATCH 19/19] storage: Add missing `--source` arg. #TASK-6765 --- .../app/cli/internal/executors/VariantQueryCommandUtils.java | 2 ++ .../app/cli/internal/options/VariantCommandOptions.java | 3 +++ .../opencb/opencga/master/monitor/daemons/ExecutionDaemon.java | 1 - 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantQueryCommandUtils.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantQueryCommandUtils.java index 33ff458ec10..8de46997c0e 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantQueryCommandUtils.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantQueryCommandUtils.java @@ -22,6 +22,7 @@ import org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils; import org.opencb.opencga.app.cli.internal.options.VariantCommandOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,6 +56,7 @@ public static Query parseQuery(VariantCommandOptions.AbstractVariantQueryCommand addParam(query, VariantCatalogQueryUtils.PANEL_ROLE_IN_CANCER, queryVariantsOptions.panelRoleInCancer); addParam(query, VariantCatalogQueryUtils.PANEL_FEATURE_TYPE, queryVariantsOptions.panelFeatureType); addParam(query, VariantCatalogQueryUtils.SAVED_FILTER, queryVariantsOptions.savedFilter); + addParam(query, VariantQueryParam.SOURCE, queryVariantsOptions.source); return query; } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java index 173a56940ef..4f3d185d376 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java @@ -376,6 +376,9 @@ public class AbstractVariantQueryCommandOptions extends GeneralCliOptions.StudyO @Parameter(names = {"-p", "--project"}, description = PROJECT_DESC, arity = 1) public String project; + @Parameter(names = {"--source"}, description = SOURCE_DESCR, arity = 1) + public String source; + @Parameter(names = {"--family"}, description = FAMILY_DESC, arity = 1) public String family; diff --git a/opencga-master/src/main/java/org/opencb/opencga/master/monitor/daemons/ExecutionDaemon.java b/opencga-master/src/main/java/org/opencb/opencga/master/monitor/daemons/ExecutionDaemon.java index 01c02bde4ad..1c0b649d7a3 100644 --- a/opencga-master/src/main/java/org/opencb/opencga/master/monitor/daemons/ExecutionDaemon.java +++ b/opencga-master/src/main/java/org/opencb/opencga/master/monitor/daemons/ExecutionDaemon.java @@ -195,7 +195,6 @@ public class ExecutionDaemon extends MonitorParentDaemon implements Closeable { put(PicardWrapperAnalysis.ID, "alignment " + PicardWrapperAnalysis.ID + "-run"); put(VariantIndexOperationTool.ID, "variant index-run"); - put(VariantExportTool.ID, "variant export-run"); put(VariantStatsAnalysis.ID, "variant stats-run"); put("variant-stats-export", "variant stats-export-run"); put(SampleVariantStatsAnalysis.ID, "variant sample-stats-run");