From e5ad4658eb051a717f6a33e7751a34bd3f810aee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 20 Mar 2024 11:16:59 +0000 Subject: [PATCH 01/14] storage: Add VariantAnnotatorExtension scaffolding. #TASK-5318 --- .../core/variant/VariantStorageOptions.java | 3 + .../DefaultVariantAnnotationManager.java | 9 +++ .../VariantAnnotatorExtensionTask.java | 58 +++++++++++++++++++ .../VariantAnnotatorExtensionsFactory.java | 53 +++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index caefbb5260e..6488e35bf8a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -80,6 +80,9 @@ public enum VariantStorageOptions implements ConfigurationOption { ANNOTATOR_CELLBASE_VARIANT_LENGTH_THRESHOLD("annotator.cellbase.variantLengthThreshold", Integer.MAX_VALUE), ANNOTATOR_CELLBASE_IMPRECISE_VARIANTS("annotator.cellbase.impreciseVariants", true), ANNOTATOR_CELLBASE_STAR_ALTERNATE("annotator.cellbase.starAlternate", false), + ANNOTATOR_EXTENSION_PREFIX("annotator.extension."), + ANNOTATOR_EXTENSION_LIST("annotator.extension.list"), + ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"), INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java index 78229d47ef6..16d3b071279 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java @@ -52,6 +52,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionsFactory; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter; import org.opencb.opencga.storage.core.variant.io.db.VariantDBReader; @@ -265,6 +267,13 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap return variantAnnotationList; }; + List extensions = new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(params); + for (VariantAnnotatorExtensionTask extension : extensions) { + extension.setup(outDir); + extension.checkAvailable(); + annotationTask = annotationTask.then(extension); + } + final DataWriter variantAnnotationDataWriter; if (avro) { //FIXME diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..7e044a901ce --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java @@ -0,0 +1,58 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.run.Task; + +import java.net.URI; +import java.util.List; + +public interface VariantAnnotatorExtensionTask extends Task { + + /** + * Set up the annotator extension. + * This method will be called before any other method. It might generate extra files or data needed for the annotation. + * + * @param output Output directory where the annotator extension should write the files + * @return List of URIs of generated files (if any) + */ + List setup(URI output); + + /** + * Check if the annotator extension is available for the given options. + * @throws IllegalArgumentException if the annotator extension is not available + */ + void checkAvailable() throws IllegalArgumentException; + + /** + * Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available. + * @return true if the annotator extension is available + */ + default boolean isAvailable() { + try { + checkAvailable(); + return true; + } catch (IllegalArgumentException e) { + return false; + } + } + + @Override + default void pre() throws Exception { + Task.super.pre(); + checkAvailable(); + } + + /** + * Get the options for the annotator extension. + * @return Options for the annotator extension + */ + ObjectMap getOptions(); + + /** + * Get the metadata for the annotator extension. + * @return Metadata for the annotator extension + */ + ObjectMap getMetadata(); + +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java new file mode 100644 index 00000000000..57626a34b43 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java @@ -0,0 +1,53 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; + +import java.lang.reflect.InvocationTargetException; +import java.util.LinkedList; +import java.util.List; + +public class VariantAnnotatorExtensionsFactory { + + public List getVariantAnnotatorExtensions(ObjectMap options) { + + List tasks = new LinkedList<>(); + for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { + VariantAnnotatorExtensionTask task = null; + switch (extensionId) { +// case CosmicVariantAnnotatorExtensionTask.ID: +// task = new CosmicVariantAnnotatorExtensionTask(options); +// break; + default: + String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); + if (extensionClass != null) { + task = getVariantAnnotatorExtension(extensionClass, options); + } else { + throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'"); + } + } + + if (task == null) { + throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'"); + } + + tasks.add(task); + } + return tasks; + } + + private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) { + try { + Class clazz = Class.forName(className); + return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); + } catch (ClassNotFoundException + | NoSuchMethodException + | InstantiationException + | IllegalAccessException + | InvocationTargetException e) { + throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); + } + } + + +} From abbc66a4d5f1808ad272bb8887ed963711c70148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 21 Mar 2024 17:55:10 +0100 Subject: [PATCH 02/14] core: implement the CosmicVariantAnnotatorExtensionTask, #TASK-5902, #TASK-5318 --- .../DefaultVariantAnnotationManager.java | 2 +- .../VariantAnnotatorExtensionTask.java | 3 +- .../cosmic/CosmicExtensionTaskCallback.java | 82 ++++++++ .../CosmicVariantAnnotatorExtensionTask.java | 194 ++++++++++++++++++ ...smicVariantAnnotatorExtensionTaskTest.java | 120 +++++++++++ .../custom_annotation/cosmic.small.tsv.gz | Bin 0 -> 8342 bytes 6 files changed, 399 insertions(+), 2 deletions(-) create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java create mode 100644 opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java create mode 100644 opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java index 16d3b071279..f8e7f4c4b90 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java @@ -295,7 +295,7 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap ParallelTaskRunner parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); - } catch (ExecutionException e) { + } catch (Exception e) { throw new VariantAnnotatorException("Error creating annotations", e); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java index 7e044a901ce..03255409123 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java @@ -15,8 +15,9 @@ public interface VariantAnnotatorExtensionTask extends Task setup(URI output); + List setup(URI output) throws Exception; /** * Check if the annotator extension is available for the given options. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java new file mode 100644 index 00000000000..c343e690dde --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java @@ -0,0 +1,82 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.collections4.CollectionUtils; +import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; +import org.opencb.biodata.models.sequence.SequenceLocation; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; +import org.opencb.biodata.tools.variant.VariantNormalizer; +import org.opencb.opencga.core.common.JacksonUtils; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class CosmicExtensionTaskCallback implements CosmicParserCallback { + + private RocksDB rdb; + private VariantNormalizer variantNormalizer; + private ObjectMapper defaultObjectMapper; + + private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class); + + private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|()|()|()|()|()"; + + public CosmicExtensionTaskCallback(RocksDB rdb) { + this.rdb = rdb; + this.variantNormalizer = new VariantNormalizer(); + this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); + } + + @Override + public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List evidenceEntries) { + // Add evidence entries in the RocksDB + // More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed + List normalisedVariantStringList; + try { + normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(), + sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()); + if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) { + for (String normalisedVariantString : normalisedVariantStringList) { + rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries)); + } + return true; + } + return false; + } catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) { + e.printStackTrace(); + return false; + } + } + + protected List getNormalisedVariantString(String chromosome, int start, String reference, String alternate) + throws NonStandardCompliantSampleField { + Variant variant = new Variant(chromosome, start, reference, alternate); + return getNormalisedVariantString(variant); + } + + protected List getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField { + // Checks no weird characters are part of the reference & alternate alleles + if (isValid(variant)) { + List normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true); + return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList()); + } else { + logger.warn("Variant {} is not valid: skipping it!", variant); + } + + return Collections.emptyList(); + } + + protected boolean isValid(Variant variant) { + return (variant.getReference().matches(VARIANT_STRING_PATTERN) + && (variant.getAlternate().matches(VARIANT_STRING_PATTERN) + && !variant.getAlternate().equals(variant.getReference()))); + } +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..60dcaeaccf1 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -0,0 +1,194 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectReader; +import org.opencb.biodata.formats.variant.cosmic.CosmicParser; +import org.opencb.biodata.models.common.DataVersion; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; + +public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExtensionTask { + + private ObjectMap options; + + private Path cosmicFolder; + + private ObjectReader objectReader; + + private RocksDB rdb = null; + private Options dbOption = null; + private Path dbLocation = null; + + public static final String COSMIC_ANNOTATOR_INDEX_NAME = "cosmicAnnotatorIndex"; + public static final String COSMIC_VERSION_FILENAME = "cosmicVersion.json"; + + private static Logger logger = LoggerFactory.getLogger(CosmicVariantAnnotatorExtensionTask.class); + + public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { + this.options = options; + this.objectReader = JacksonUtils.getDefaultObjectMapper().readerFor(new TypeReference>() {}); + } + + @Override + public List setup(URI output) throws Exception { + // Check input path + cosmicFolder = Paths.get(output.getPath()); + if (cosmicFolder == null || !Files.exists(cosmicFolder)) { + throw new IllegalArgumentException("Path " + output + " does not exist"); + } + if (!cosmicFolder.toFile().isDirectory()) { + throw new IllegalArgumentException("Path " + output + " must be a directory with two files: the raw COSMIC file and the" + + " metadata file 'cosmicVersion.json'"); + } + + // Clean and init RocksDB + dbLocation = cosmicFolder.toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); + if (Files.exists(dbLocation)) { + // Skipping setup but init RocksDB + logger.info("Skipping setup, it was already done"); + initRockDB(false); + } else { + logger.info("Setup and populate RocksDB"); + File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); + if (!versionFile.exists()) { + throw new IllegalArgumentException("Path " + output + " does not contain the COSMIC metadata file: " + + COSMIC_VERSION_FILENAME); + } + DataVersion dataVersion; + try { + dataVersion = JacksonUtils.getDefaultObjectMapper().readValue(versionFile, DataVersion.class); + } catch (IOException e) { + throw new IllegalArgumentException("Error parsing the metadata file " + versionFile.getAbsolutePath(), e); + } + String cosmicFilename; + try { + cosmicFilename = dataVersion.getFiles().get(0); + } catch (Exception e) { + throw new IllegalArgumentException("Error getting the COSMIC file from the metadata file " + + versionFile.getAbsolutePath(), e); + } + File cosmicFile = cosmicFolder.resolve(cosmicFilename).toFile(); + if (!cosmicFile.exists()) { + throw new IllegalArgumentException("COSMIC file " + cosmicFile.getAbsolutePath() + " does not exist"); + } + + // Init RocksDB + initRockDB(true); + + // Call COSMIC parser + try { + CosmicExtensionTaskCallback callback = new CosmicExtensionTaskCallback(rdb); + CosmicParser.parse(cosmicFile.toPath(), dataVersion.getVersion(), dataVersion.getName(), dataVersion.getAssembly(), + callback); + } catch (IOException e) { + throw new ToolException(e); + } + } + return Collections.singletonList(dbLocation.toUri()); + } + + @Override + public void checkAvailable() throws IllegalArgumentException { + if (!isAvailable()) { + throw new IllegalArgumentException("COSMIC annotator extension is not available"); + } + } + + @Override + public boolean isAvailable() { + return (dbLocation != null && Files.exists(dbLocation)); + } + + @Override + public ObjectMap getOptions() { + return options; + } + + @Override + public ObjectMap getMetadata() { + File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); + if (!versionFile.exists()) { + throw new IllegalArgumentException("Metadata file " + versionFile + " does not exist"); + } + try { + return JacksonUtils.getDefaultObjectMapper().readValue(versionFile, ObjectMap.class); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public List apply(List list) throws Exception { + for (VariantAnnotation variantAnnotation : list) { + Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), + variantAnnotation.getAlternate()); + byte[] key = variant.toString().getBytes(); + byte[] dbContent = rdb.get(key); + if (dbContent != null) { + List evidenceEntryList = objectReader.readValue(dbContent); + if (variantAnnotation.getTraitAssociation() == null) { + variantAnnotation.setTraitAssociation(evidenceEntryList); + } else { + variantAnnotation.getTraitAssociation().addAll(evidenceEntryList); + } + } + } + return list; + } + + @Override + public void post() throws Exception { + closeRocksDB(); + } + + + private void closeRocksDB() { + if (rdb != null) { + rdb.close(); + } + if (dbOption != null) { + dbOption.dispose(); + } + } + + private void initRockDB(boolean forceCreate) throws ToolException { + boolean indexingNeeded = forceCreate || !Files.exists(dbLocation); + // a static method that loads the RocksDB C++ library. + RocksDB.loadLibrary(); + // the Options class contains a set of configurable DB options + // that determines the behavior of a database. + dbOption = new Options().setCreateIfMissing(true); + + rdb = null; + try { + // a factory method that returns a RocksDB instance + if (indexingNeeded) { + rdb = RocksDB.open(dbOption, dbLocation.toAbsolutePath().toString()); + } else { + rdb = RocksDB.openReadOnly(dbOption, dbLocation.toAbsolutePath().toString()); + } + } catch (RocksDBException e) { + // Do some error handling + throw new ToolException("", e); + } + } +} diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java new file mode 100644 index 00000000000..aced5f62950 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -0,0 +1,120 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.Assert; +import org.junit.Test; +import org.opencb.biodata.models.common.DataVersion; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.common.TimeUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class CosmicVariantAnnotatorExtensionTaskTest { + + private final String COSMIC_VERSION = "v95"; + + @Test + public void testSetupCosmicVariantAnnotatorExtensionTask() throws Exception { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + + Assert.assertEquals(false, task.isAvailable()); + + // Setup COSMIC directory + Path cosmicPath = initCosmicPath(); + + // Set-up COSMIC variant annotator extension task, once + task.setup(cosmicPath.toUri()); + + // Set-up COSMIC variant annotator extension task, twice + task.setup(cosmicPath.toUri()); + + ObjectMap metadata = task.getMetadata(); + Assert.assertEquals(COSMIC_VERSION, metadata.get("version")); + + Assert.assertEquals(true, task.isAvailable()); + } + + @Test + public void testSCosmicVariantAnnotatorExtensionTask() { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + Assert.assertEquals(false, task.isAvailable()); + } + + @Test + public void testAnnotationCosmicVariantAnnotatorExtensionTask() throws Exception { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + + Assert.assertEquals(false, task.isAvailable()); + + // Setup COSMIC directory + Path cosmicPath = initCosmicPath(); + + // Set-up COSMIC variant annotator extension task, once + task.setup(cosmicPath.toUri()); + + List inputVariantAnnotations = new ArrayList<>(); + VariantAnnotation variantAnnotation1 = new VariantAnnotation(); + variantAnnotation1.setChromosome("12"); + variantAnnotation1.setStart(124402657); + variantAnnotation1.setEnd(124402657); + variantAnnotation1.setReference("G"); + variantAnnotation1.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation1); + VariantAnnotation variantAnnotation2 = new VariantAnnotation(); + variantAnnotation2.setChromosome("22"); + variantAnnotation2.setStart(124402657); + variantAnnotation2.setEnd(124402657); + variantAnnotation2.setReference("G"); + variantAnnotation2.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation2); + + List outputVariantAnnotations = task.apply(inputVariantAnnotations); + task.post(); + + Assert.assertEquals(inputVariantAnnotations.size(), outputVariantAnnotations.size()); + + // Checking variantAnnotation1 + Assert.assertEquals(1, outputVariantAnnotations.get(0).getTraitAssociation().size()); + Assert.assertEquals("COSV62300079", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getId()); + Assert.assertEquals("liver", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getPrimarySite()); + Assert.assertEquals("hepatocellular carcinoma", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getHistologySubtype()); + Assert.assertEquals("PMID:323", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getBibliography().get(0)); + + // Checking variantAnnotation2 + Assert.assertTrue(CollectionUtils.isEmpty(outputVariantAnnotations.get(1).getTraitAssociation())); + } + + private Path initCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(getClass().getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); + DataVersion cosmicDataVersion = new DataVersion("variant", "cosmic", COSMIC_VERSION, "20231212", + "hsapiens", "GRCh38", Collections.singletonList(cosmicFile.getFileName().toString()), + Collections.singletonList("http://cosmic.org"), null); + JacksonUtils.getDefaultObjectMapper().writeValue(cosmicPath.resolve(CosmicVariantAnnotatorExtensionTask.COSMIC_VERSION_FILENAME).toFile(), cosmicDataVersion); + Files.copy(cosmicFile, cosmicPath.resolve(cosmicDataVersion.getFiles().get(0))); + + return cosmicPath; + } + + private Path getTempPath() { + return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); + } +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz b/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..153129875feedfd109939484ff008b3f3f7a30b6 GIT binary patch literal 8342 zcmV;HAZgzpiwFom4EtpO17mMb;~t>MM^Y>}>>w$IV!=yrH^brFq|<;x=cqpOSQ z#(jya?QXf=Wzk(WUnbe>*=D{?qcOakZFb+buP^Bt-T#v9f60q~`8?lj*NgSjYjl~f zD!=3KJ=X9ZNBP&wH2&xO0iH3-(sUJ#KHYx4xf)JyzDx#_tIxMn*v|E*(O~%N-dlU? z-8R{xRzMTw7lT2$zDVJ>&tGs|zWK-N(;5`;8eM?qjc+-_vRNuK%f3H2ggNN(oWYP{Hu48_k8#Rz?&aC#W_enw<+R_3)=j^m2Yj zrBT<>c=v6yoo{zeNAcOse6vYcn>30dZX(Klq)c$ZsT6UsiemV42TJ}7Msqcc&aD-i zMPHL_o~*WK>&LU*>bKSUpQ~JXoKQohR8h3qWlw4L8b8keCw)MBpA)p4zDOBg@ZbU13z87E?=J4^M~mc)Ur!Ubr;`QrR$ePvROv% zHJQ%R#bUaAO&29PuN7z*po-8~Dy?9~L8C0w6rlm+Mn4S!ntLjl{*O8|v-QJ#^#slv zy#6+NPPcmy2~9{`tQth~mL43Cz$RNTv~W40wNic=&Y1gAUfc2!8rSL%1IGQQN!Y5b>ONrN37eCh`>=s zk6F5T{`bdh{YQ!qy8S$kUVQwGq-_Re^8zA{7R@Io>>~%oiv}?D@mIs4R1?I)GO7>@ zB{^ULF(K>pPr1sOHIga(;+9gut+HO_@S;KG#;_k4V_-z+qW&}b;?+~Lb~vq?*tl5j zTRWkgt1caL#=CSJH7cdLOVXr=|_aj`n2F#@9gdwc_LR(Dml!ZAA$dRngIkemz8AuVxy1(f1AB$pq+A$9JS z9fcvmExQ8(?o0^pmdA|Bztr9G?RwSZmKhguu{u;e13W*J$$)4RILM9e*vnsW#|**g zQ&Nt1hCTyf+H7FWz(cXlg zFyUU@$q}$kJUI}SJ5J8?I$I(bSvpH!w(IP)b7YLJqyoyP0!C?v4`gLN6##q;{rhS7 zC@lj%xXM!jYh_o+ed_^ofEYT-UYY?3l9zy>Ed5de7ds@0fsd;g?8YUl%+pK=A|DYu zz$W2QW$Z8>2Z{6(aFB3frtqR4QefVjiAO|Jdt!11XE0>Pu#z5;V`0P#s93A^@$oH* z*rB{74k*jCkj4MNpf@ZRG#*g-4;Rw4;Q}ZkRYnV|qIC7JUZ&e@zFRh(mg3v!*4lBg z6i$N6$GB4X+YuJaTu4`t4|w-AVbt2UrDL2kY2sp4BdC!1>28E#G}phep6z!5n~SznOFM3a5^!vMflna?!L$P&HuOQCo3|Yr!IUXUP)m zcK&$iy#&PSxL6$xDR0p|L=`7anmulSAuHR)k)80RyFx+WogOIsVAm zek5#seakTAV}=SzFpts1*!PxX|GLY5n{T&e{<3*lXUPNnBJ4xeOS2$Zg(`b40E~m8 zSL2ehylny0=rm#mdS)^%RtKq!QKGB10)}EF5bH9`)M_hjtqT;eg}UYYC^19H{(oWy zo|(a`KVl5S1IsaE3^21Qbeirt<0mCIf)nj&fnOaWW{K`hKKnq1!HDs;R444BtT>`z z{QzUYpdn0@e=B1xsLE9Wmb=+{lPsb~0B?~_pVHNQ3;Dm}aMORqhB3sehjg1}%lRtV zrvHXez>m(KR-lXNGF>E1Wl8`@K|l%(DT@!bAq!s1QVJF{8S!6@u`ca3xPGH^3l$DJ^WqqLyRQMgr z1*YNyLpev_DF=!Qgkuy!mKI`sT5?L132iA{g>nISp%1MVK$t>_TIy}N;1zBSqV%hM z`m-a-zwr!F^JknPp~4yBz+ghx1iUvwqnI-@cZP_yS0_QkC05Ie)uFTFxWTudA&^@% zcd79Bpm>Y4ne&BRl$;OcdPhg#&nO@0vV35Q@`0~Xqcd>v2WY<-wqkO~y?txTuMT0s zp#Aa11BgOS%-7};Xug{&A1C%56*uhGNWsc(jSAfw^D*5T?5|h4HNJzOq%aN}MMI}% z1QsU0o_1b16ErSXwGKOY%&rlXqrIEvfHPXWIsw6{G@=srQ7!6VWwd8M(#p_}L?&fi z8|F>@NXe-v`;m}sq0Ti?Tw-UJsu;2_;bVPkI}&`e@-+ars^vCB+ODwPv$I=~tl(Mh z={>Xlv7mX*Z+*^h*azkO4494fZAiv&I4F=Q3keM7Dcl(EbS^37q@7IFGfFt#JT6v; zPDg{U`=A2A<$Qy(wUMZN3zZbb_GP@6I9>qkQ^C1_Pz|I;1^3cm&>ecWmPkTFB~n8Y z1%WS&?kLIfW;-3#OuK5=gh}NGL)_|cFvLpR#fcy`VxA?UymYTRdh4`WO7sux*{vE5 zA({!VLlJbS+jZ!T+|@-^=`LGi9Gzug4J|=5C{_f;YH$}Jio#x*%MNh9!Z&-lChbvl zKRW<<<6?D~9T=rL3%qq*x6>JvsU>9QqF1s35T;+B_f}z=Z4LSrr4^;*lr-CYJzcFI z^62@-M(=*6O+%)wgj8Ypf#I3JsV|XJ9R%()kid>YICZOrMlblfPLLo&02kX{g*UW) zfQ_~?oqRLZ5jH{l`Je9{36#ns0XQO7L;^SGsq?h@4ycfBZ@$;(2svdmE>=xv$8`Fs zH$(zAXA82mwcT|sopuuSi^-rOqo?;2vlfnnmvD?M9I1{B`e8T2?5!o5h3{H9hoyVV7`jC9EzL=0k3M!0YKE2;$gm{ zN`3}1_MIVei#4^ zH1KJkjk#5gj+#z3`qnZAkbRH~Kxou{E`6bWCs5xZl9{33W^) zNC~NN9(4gJV5^V^K-LXGMRS)2VACFzJpr$CtVH@FDU$&q@G>a6!v zD+|_gWzLx&7C{O3N~S@tKbHHTh)gcps24KQXq-%>S}6_81UL}I(cESd^oE=aun^V? z-&mlGNea2Y!vdm!a9GeV`+P4nyEzdHY2#vbhy`q+EBpeXl40)8LGgp=6} zh~G>F5a|Zbdma@bVg}(dltv*1V|Y{qviu(TF`f&;z_q)A8%Tz?v=9r(YAMblP%cOe zhEOm5RCTW*Y{bQ?=7hQ@a*kEkfSNUHwIWubS$sN@%(rXy?t&w$On_usVHBp#!s7h7 z>kd4p2_{~%--7B+Df~+!tjVto8I4V9uUCOJ1ndsdA0*bP+SCfEBEnp(YHDS&^EL{{ zVINkZlOcjG(=D)rWCO%qvu4#+Bnz7L0t|a;_WH}OhiX*M7W`HOHc5zN!x{neK^YW^p)0w)GHC{8bvdk*7QSR~%F(@U){om_l6(tW+oliG#Lez?;4o9$Ad6e1Tt#kuEkw5dv_8Xmhxcp zxY~yUG||at&@?z+X6W|evJn&l5Q9$U#i*+t|GJkuB3jG5!b;nW=N2h0kbIozFs`(Z?Wgo;FD~4Ull`GOcyPsRh zLm}|-{N50E)7b`ZQ+z-t3iS*Kkw5R`oN1Hwrp@4?NJAe*6&#J;->ySJkYXo3N9D z<)Co=5OT5jeiDxfBw5s{AU%BCi;-GIhByRdnFfkS7Z30)*6zul2 z$PDRgoUe{wjyM{OS)=l?k_pM|X4R)X$dS9GJpw{0R<2d|=zdj% zs%hh;Lyh<5wKwhjD$v_an;H?QCZt1L75?AFS%Xs)n5079#E%W`PJV%H{*CX zx%bHyCqCH%cG8t>d3_%wTIupz_PSmri^DpN%x~`i)8$xoMnn{$u4$Fu*!{|gJp~Y; zdPIk#fU6oH+H@X?GS_Nr7Ve>dGPR%(1TP6F$kGply9>b};|)a|yy}KU91=!6w;61q zy@0dX0ZDzF&*sTux_Qac1o?YIO-Jc4%xQ_%EQAD$%_}e{#-#72|Cb>4*!^~&h-ngv zJZ^O`nnpW69qD+HXK$Xfgk-c>frOK`8gkf2k*A?(qh3+r&Q2j6pnNK)V?Mp7Q)CjZ zldF9vm-F78Tuf69Gd@ByqPe>V>dpk5zoN!Qr~Yr|8&|EhHdU*&bvCF@>()Ym=_s-7 z305t&$!Q@IHmG`}V#6UN!jA+}9C!jsDU5j&r35X84|^eaM}viKiPe~1i5N#Bd~K(d zQeycmE>`v46a)V!Y({T+QC!;ut4oyLYbjsBoIjl~{AXzbZMFd6}25}h&}B>&l5e+M{@O*jRr zf`g^Ss%gN9b~aXzm1&ym!jtR;C2Xq$gng831w%Nv zr|Kj(c8--`Mgl(>C_6c^GEegSA65XA`YNsK--x6A0GUrQO4}kCyVR z1h1OL#j1YEo{DbQ4Sw!+Z$Q4Kg;$ZKpc?qRepKUjczHLyxVjr*OO$a+6vN3qE%8d| zEM0B01ZP{b-%)>?*UdIv0`o}bE4T7$d|J#=r)!{xYoH|AdiJ`7H>RTuY=3%nbrmJ6 z1m`ce^I6UKCHFo^XrY*qp;lnsGzT@-gy90k`LX#W?}Lc@sLyJ{;=E{DDE=Va>;Y9G!JuB`SEZYys(=a68lI_nQ;&Pxne5CN9MnzF< zK{loVHAc@72z|7(KhG zzYs}Flg0PZWWm?I#bkB0b0-MPo2A4R%VRKNb)-%UR_9IDt?EMo^w=ev9wu9gRq#Q4 zbiZ2FC4NAYGg)f9$pXS{d}E2(?9{Q2vM1CU_&ArcOa(R1jaTS=jD-TrsjGKyjs*f+ zT&(H|i_o3r*}A#Xj=_~&kaBVbp4U&IqrcvM8cxCGF-tTOFa}f`FBg5AFXjMh=(Ig0 zi^aD^^6*fYYK2lPlSSw793>Lms>mm{AcZ$z%7NZd*XRM}=mXr(j2)er&dX1X#Ko%N zA~=3DL09oZDsJq-$HjrvvbpGS_ne||Y4hMk*g>TZh1J16koX(Mr2O06G#2|ejg!?n zOR>1oQje-R56DEZvPknwtBhl_(R&Z+QgXba5N4#MJaK>#C;sDNm46}}f7b>47Uho? zuP9bq#VH71DaA_p4rYy?3P*%3hUBmSvCyKs3x#8H*ER&m1_|yK%h82Nn41gP7 zoMios<%2H&+upVH#&KKWXaAKH6Yx&X<7VT?Mr1>F)=1w3!Byg-swCJkTJ+cVoWny7 zXLdL{!$wtxWaMRD;VdOE0MjMoPq(BXds& zw=$M36?=_1I(AuvT&~|bqXrkuKoGMU(B{bu0d0`4v4&#z&f#O?EjGNvzl+hlRT+l+ zhrfNEE1+}eVUyhg4{ypSkn_QTV()j0F+#9ujp*`ddcJK~UUb=GDrcI=smHCOI7PhL zh(lX|6&qNhF{s#oY0p+7*4yB^h(5&tL!dTCd7!g=DXmmwmE@hJGKSk z?^j9=QJhL%jAbq=PHfnHTN;rAO;IDNtPx#=-HOE#ze>3(=!(8sUUA7QKBfv0X9Zm= zTJiAl5hbhyU3nUX|40u>L(dRy1tq>P6+$hHx<9jpQMsapk%4Iqj^2N-oKQPj|3f(4 z&5|1Cq}N>@O`Z|&7oq9a<-4O!Fw24iQ1RD(gUc#gU&tTp5z=3tKHJ(P3?Fo7jfPQ6Z z^nBYjmdK%)HIOEr&Cpzm@|^)N^CioAX>UMf7m&`}{EPW0>b~}U!r=giYYQ?hEU@bG z==cGX%UWYoaqk$Eg9M%1n>3rU5nG=FxBcb!M!R8eyiI!(0q1&d3wxuST?I;yIY4c@ zJUX^Xm02vYXxx;GyL%L|(jL$A7!Yevv^KT!%j(}r@M|9wq@J)lm z0vh<`Jv{BmA~;2d0O=WP9!4vGfwe%Q5LoRc?5lZ&f&B(7*{s4SXHh`=fq>Upz&60( zFM1v10NG*kbr1xwC@~e!=>Nj4MvOxUcH8CAvC6=%#HT~fCfO?BDQOBNA%JVdmwkaz zF|5K#Ij_8@#*O z)g~Spkf;Wj$buK3e5P-tP$HzK0|wfhHyXQcf`#?3rlmgIKHmKCfCmP{5fvAMjyzU{ zW>7dAfh(QRI^$-M$V-@Tf?AdHXqsUSi!6jxzO;;n;uoN_qHqP4mb1PQ(eD$%dnfUvCB_ z^MO}V$BH^@*H_dLAB&32X|LcUT14@Qm}7!K9F!EX$n&|E-_?LgCPy?l<08Qnj#iMc zoLb3wd+@WDHC8E>I-gXMs12FqhnM@e*Ggfth{JYJc!OjBPqf~GH|=fTPV}LjO;iR# zE0gl{?Nqk`{O>Sh&L8=G+Ugjg!%7of7?hlVPn++ z?2tz0ue3w-kUPV@!dLLsZLA+ieXXZJUs z9@2QUmBTN7za7P7?;q~3|8e{A2JYtH|HJJfxncr2x!H8>;hp@Hf`y#e4T0 z_WD$Va&n)+KkV{orY>180uCX^fdt!i6T=>;E)ejt;x?pd6s|N#b_T)T&sW%>M?VSL zOp8ddhlHu;K${V=v{82O;RGYGgEik%P}Z6RyV!?y;Ux@p2GiwHYA4igG11w=YK|gR z!bwT;G5{M)qp7>!ODVYN0n--P^Z*9L786a|pb9W)X8uB8W&)G8e?c4W6qvNNeYDDG zG_64P%Pee-VA6wRLud}a{y$yHFf3*8c_}eP-$|r)hxa-x69vO78=}V>M@Ps8{zRQ? z!(xtQOUoI*e=aY8f0K9ttfnlcw_wRU4KIMxD$@%39DNf6(G4$1OvFF;ez!8jgT}nc z?uD6YV~|*q(oPCcNvWa3;cFV1CsH~b#vGcc-Y;Lil1f@e%9YHMW}XZw7oJj=Ojq0z zjzLI)Pqv5u9;o!^Qfck$b@dZZ8f+Y9LZ#6RSln_=&{onkkgbLdnx;^&JA*{KCaCEz zzZYH>dlAxJNTQ?#BP-=Jt=Y)A31++I;53eoy4?I?JZ>v)v8nI}rYRKo(N3GT5UBS- zWYA*b*GACdaAwBet0LpC7{?xqeG^r9Yw{U8cQohQNoSVy1Kecs$`6-5zjhF7`Wi+agHJ z+rzW-JhTj+9%ZTkLPzI}ace&QK5l3m`DP${ zmX5FWOo=`}Q`)6xib$4}&b^(6`(IjSsULe`1Xn1G$IVg+%%1*5hn3uoz$fkE<_tB g$MExKrGHZL)N}er{F(mwn(3ea0!i$rfjpN002~7OLjV8( literal 0 HcmV?d00001 From 0f835389388381413aa65148ddde8cec33c337b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 09:41:05 +0200 Subject: [PATCH 03/14] storage-core: configure normalization and use logger instead of printStackTrace, #TASK-5902, #TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java --- .../extensions/cosmic/CosmicExtensionTaskCallback.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java index c343e690dde..054ffaf4915 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; import org.opencb.biodata.models.sequence.SequenceLocation; import org.opencb.biodata.models.variant.Variant; @@ -31,7 +32,10 @@ public class CosmicExtensionTaskCallback implements CosmicParserCallback { public CosmicExtensionTaskCallback(RocksDB rdb) { this.rdb = rdb; - this.variantNormalizer = new VariantNormalizer(); + this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig() + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(false)); this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); } @@ -51,7 +55,7 @@ public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List Date: Wed, 5 Jun 2024 09:44:35 +0200 Subject: [PATCH 04/14] storage-core: add options to VariantStorageOptions and update the COSMIC annotator extension, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java --- .../core/variant/VariantStorageOptions.java | 1 + .../VariantAnnotatorExtensionsFactory.java | 15 ++-- .../CosmicVariantAnnotatorExtensionTask.java | 73 ++++++------------- 3 files changed, 32 insertions(+), 57 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index 6488e35bf8a..42d4851cf2a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -83,6 +83,7 @@ public enum VariantStorageOptions implements ConfigurationOption { ANNOTATOR_EXTENSION_PREFIX("annotator.extension."), ANNOTATOR_EXTENSION_LIST("annotator.extension.list"), ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"), + ANNOTATOR_EXTENSION_COSMIC_VERSION("annotator.extension.cosmic.version"), INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java index 57626a34b43..f20dadda289 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java @@ -2,6 +2,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.lang.reflect.InvocationTargetException; import java.util.LinkedList; @@ -15,9 +16,9 @@ public List getVariantAnnotatorExtensions(ObjectM for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { VariantAnnotatorExtensionTask task = null; switch (extensionId) { -// case CosmicVariantAnnotatorExtensionTask.ID: -// task = new CosmicVariantAnnotatorExtensionTask(options); -// break; + case CosmicVariantAnnotatorExtensionTask.ID: + task = new CosmicVariantAnnotatorExtensionTask(options); + break; default: String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); if (extensionClass != null) { @@ -41,10 +42,10 @@ private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String classN Class clazz = Class.forName(className); return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); } catch (ClassNotFoundException - | NoSuchMethodException - | InstantiationException - | IllegalAccessException - | InvocationTargetException e) { + | NoSuchMethodException + | InstantiationException + | IllegalAccessException + | InvocationTargetException e) { throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 60dcaeaccf1..49616124772 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -2,14 +2,15 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.variant.cosmic.CosmicParser; -import org.opencb.biodata.models.common.DataVersion; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; import org.rocksdb.Options; import org.rocksdb.RocksDB; @@ -17,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; import java.io.IOException; import java.net.URI; import java.nio.file.Files; @@ -28,9 +28,12 @@ public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExtensionTask { + public static final String ID = "cosmic"; + private ObjectMap options; - private Path cosmicFolder; + private String cosmicVersion; + private String assembly; private ObjectReader objectReader; @@ -50,46 +53,28 @@ public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { @Override public List setup(URI output) throws Exception { - // Check input path - cosmicFolder = Paths.get(output.getPath()); - if (cosmicFolder == null || !Files.exists(cosmicFolder)) { - throw new IllegalArgumentException("Path " + output + " does not exist"); + // Sanity check + Path cosmicFile = Paths.get(options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key())); + if (!Files.exists(cosmicFile)) { + throw new IllegalArgumentException("COSMIC file " + cosmicFile + " does not exist"); + } + cosmicVersion = (String) options.getOrDefault(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), null); + if (StringUtils.isEmpty(cosmicVersion)) { + throw new IllegalArgumentException("Missing COSMIC version"); } - if (!cosmicFolder.toFile().isDirectory()) { - throw new IllegalArgumentException("Path " + output + " must be a directory with two files: the raw COSMIC file and the" - + " metadata file 'cosmicVersion.json'"); + assembly = (String) options.getOrDefault(VariantStorageOptions.ASSEMBLY.key(), null); + if (StringUtils.isEmpty(assembly)) { + throw new IllegalArgumentException("Missing assembly"); } // Clean and init RocksDB - dbLocation = cosmicFolder.toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); + dbLocation = Paths.get(output.getPath()).toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); if (Files.exists(dbLocation)) { // Skipping setup but init RocksDB logger.info("Skipping setup, it was already done"); initRockDB(false); } else { logger.info("Setup and populate RocksDB"); - File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); - if (!versionFile.exists()) { - throw new IllegalArgumentException("Path " + output + " does not contain the COSMIC metadata file: " - + COSMIC_VERSION_FILENAME); - } - DataVersion dataVersion; - try { - dataVersion = JacksonUtils.getDefaultObjectMapper().readValue(versionFile, DataVersion.class); - } catch (IOException e) { - throw new IllegalArgumentException("Error parsing the metadata file " + versionFile.getAbsolutePath(), e); - } - String cosmicFilename; - try { - cosmicFilename = dataVersion.getFiles().get(0); - } catch (Exception e) { - throw new IllegalArgumentException("Error getting the COSMIC file from the metadata file " - + versionFile.getAbsolutePath(), e); - } - File cosmicFile = cosmicFolder.resolve(cosmicFilename).toFile(); - if (!cosmicFile.exists()) { - throw new IllegalArgumentException("COSMIC file " + cosmicFile.getAbsolutePath() + " does not exist"); - } // Init RocksDB initRockDB(true); @@ -97,8 +82,7 @@ public List setup(URI output) throws Exception { // Call COSMIC parser try { CosmicExtensionTaskCallback callback = new CosmicExtensionTaskCallback(rdb); - CosmicParser.parse(cosmicFile.toPath(), dataVersion.getVersion(), dataVersion.getName(), dataVersion.getAssembly(), - callback); + CosmicParser.parse(cosmicFile, cosmicVersion, ID, assembly, callback); } catch (IOException e) { throw new ToolException(e); } @@ -108,16 +92,11 @@ public List setup(URI output) throws Exception { @Override public void checkAvailable() throws IllegalArgumentException { - if (!isAvailable()) { + if (dbLocation == null || !Files.exists(dbLocation)) { throw new IllegalArgumentException("COSMIC annotator extension is not available"); } } - @Override - public boolean isAvailable() { - return (dbLocation != null && Files.exists(dbLocation)); - } - @Override public ObjectMap getOptions() { return options; @@ -125,15 +104,9 @@ public ObjectMap getOptions() { @Override public ObjectMap getMetadata() { - File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); - if (!versionFile.exists()) { - throw new IllegalArgumentException("Metadata file " + versionFile + " does not exist"); - } - try { - return JacksonUtils.getDefaultObjectMapper().readValue(versionFile, ObjectMap.class); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } + return new ObjectMap("data", ID) + .append("version", cosmicVersion) + .append("assembly", assembly); } @Override From 79ac8874d293f1f168c04e45055c5c3832c6472e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 09:46:34 +0200 Subject: [PATCH 05/14] test: update COSMIC annotator extension JUnit tests, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java --- ...smicVariantAnnotatorExtensionTaskTest.java | 62 ++++++++++++++----- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index aced5f62950..f1fbaaf411f 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -11,6 +11,7 @@ import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.io.IOException; @@ -23,26 +24,40 @@ public class CosmicVariantAnnotatorExtensionTaskTest { + private final String ASSEMBLY ="GRCh38"; private final String COSMIC_VERSION = "v95"; @Test public void testSetupCosmicVariantAnnotatorExtensionTask() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + ObjectMap options = new ObjectMap(); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); Assert.assertEquals(false, task.isAvailable()); - // Setup COSMIC directory - Path cosmicPath = initCosmicPath(); - // Set-up COSMIC variant annotator extension task, once - task.setup(cosmicPath.toUri()); + task.setup(outPath.toUri()); // Set-up COSMIC variant annotator extension task, twice - task.setup(cosmicPath.toUri()); + task.setup(outPath.toUri()); ObjectMap metadata = task.getMetadata(); Assert.assertEquals(COSMIC_VERSION, metadata.get("version")); + Assert.assertEquals(CosmicVariantAnnotatorExtensionTask.ID, metadata.get("data")); + Assert.assertEquals(ASSEMBLY, metadata.get("assembly")); Assert.assertEquals(true, task.isAvailable()); } @@ -55,17 +70,29 @@ public void testSCosmicVariantAnnotatorExtensionTask() { } @Test - public void testAnnotationCosmicVariantAnnotatorExtensionTask() throws Exception { + public void testAnnotationCosmicVariantAnnotatorExtensionTaskUsingFactory() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + ObjectMap options = new ObjectMap(); - CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID); - Assert.assertEquals(false, task.isAvailable()); + CosmicVariantAnnotatorExtensionTask task = (CosmicVariantAnnotatorExtensionTask) new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(options).get(0); - // Setup COSMIC directory - Path cosmicPath = initCosmicPath(); + Assert.assertEquals(false, task.isAvailable()); // Set-up COSMIC variant annotator extension task, once - task.setup(cosmicPath.toUri()); + task.setup(outPath.toUri()); List inputVariantAnnotations = new ArrayList<>(); VariantAnnotation variantAnnotation1 = new VariantAnnotation(); @@ -105,13 +132,14 @@ private Path initCosmicPath() throws IOException { throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); } Path cosmicFile = Paths.get(getClass().getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); - DataVersion cosmicDataVersion = new DataVersion("variant", "cosmic", COSMIC_VERSION, "20231212", - "hsapiens", "GRCh38", Collections.singletonList(cosmicFile.getFileName().toString()), - Collections.singletonList("http://cosmic.org"), null); - JacksonUtils.getDefaultObjectMapper().writeValue(cosmicPath.resolve(CosmicVariantAnnotatorExtensionTask.COSMIC_VERSION_FILENAME).toFile(), cosmicDataVersion); - Files.copy(cosmicFile, cosmicPath.resolve(cosmicDataVersion.getFiles().get(0))); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } - return cosmicPath; + return targetPath; } private Path getTempPath() { From 41c155ca5034cbf2bb8f72689436f83442239073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 12:40:58 +0200 Subject: [PATCH 06/14] storage-core: check valid variants in COSMIC annotator extension, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java --- .../cosmic/CosmicVariantAnnotatorExtensionTask.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 49616124772..49a2a5f871e 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -112,8 +112,14 @@ public ObjectMap getMetadata() { @Override public List apply(List list) throws Exception { for (VariantAnnotation variantAnnotation : list) { - Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), - variantAnnotation.getAlternate()); + Variant variant; + try { + variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), + variantAnnotation.getAlternate()); + } catch (Exception e) { + logger.warn("Skipping variant: " + e.getMessage()); + continue; + } byte[] key = variant.toString().getBytes(); byte[] dbContent = rdb.get(key); if (dbContent != null) { From ca7458e56e75911c811e0f717dbfbbeaf061358a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 12:42:52 +0200 Subject: [PATCH 07/14] test: add VariantAnnotationManager JUnit tests to check the COSMIC annotator extension, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java new file: opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz --- .../core/variant/VariantStorageBaseTest.java | 16 ++++ .../VariantAnnotationManagerTest.java | 71 ++++++++++++++++++ ...smicVariantAnnotatorExtensionTaskTest.java | 6 +- ...riant-test-file-annotator-extension.vcf.gz | Bin 0 -> 6726 bytes 4 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java index fe5313bdc80..33229775f45 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java @@ -97,6 +97,15 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari "22:16616084:G:A" ))); + public static final Set COSMIC_VARIANTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + "1:169607124:G:T", + "1:169611640:T:G", + "1:169617058:A:T", + "1:169617158:C:A", + "12:124372173:T:A", + "12:124336867:G:A" + ))); + public static final String VCF_TEST_FILE_NAME = "10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"; protected static URI inputUri; @@ -106,6 +115,9 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari public static final String VCF_CORRUPTED_FILE_NAME = "variant-test-file-corrupted.vcf"; protected static URI corruptedInputUri; + public static final String ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME = "variant-test-file-annotator-extension.vcf.gz"; + protected static URI annotatorExtensionInputUri; + protected static URI outputUri; protected VariantStorageEngine variantStorageEngine; protected VariantStorageMetadataManager metadataManager; @@ -134,16 +146,20 @@ public static void _beforeClass() throws Exception { Path inputPath = rootDir.resolve(VCF_TEST_FILE_NAME); Path smallInputPath = rootDir.resolve(SMALL_VCF_TEST_FILE_NAME); Path corruptedInputPath = rootDir.resolve(VCF_CORRUPTED_FILE_NAME); + Path annotatorExtensionInputPath = rootDir.resolve(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_TEST_FILE_NAME), inputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(SMALL_VCF_TEST_FILE_NAME), smallInputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_CORRUPTED_FILE_NAME), corruptedInputPath, StandardCopyOption.REPLACE_EXISTING); + Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME), + annotatorExtensionInputPath, StandardCopyOption.REPLACE_EXISTING); inputUri = inputPath.toUri(); smallInputUri = smallInputPath.toUri(); corruptedInputUri = corruptedInputPath.toUri(); + annotatorExtensionInputUri = annotatorExtensionInputPath.toUri(); outputUri = rootDir.toUri(); // logger.info("count: " + count.getAndIncrement()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index dc2721882fa..0092e2889da 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -4,7 +4,9 @@ import org.apache.commons.lang.StringUtils; import org.junit.Assume; import org.junit.Test; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -17,10 +19,13 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantMatchers; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotatorFactory; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.CosmicVariantAnnotatorExtensionTaskTest; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.io.File; import java.net.URI; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; @@ -237,6 +242,72 @@ public void testMultiAnnotations() throws Exception { } + @Test + public void testCosmicAnnotatorExtensionWithCosmicAnnotation() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, COSMIC_VARIANTS.size()); + } + + @Test + public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, 0); + } + + public void checkCosmicVariants(DataResult annotationDataResult, int expected) { + int cosmicCount = 0; + for (VariantAnnotation va : annotationDataResult.getResults()) { + String variantId = va.getChromosome() + ":" + va.getStart() + ":" + va.getReference() + ":" + va.getAlternate(); + if (COSMIC_VARIANTS.contains(variantId)) { + if (va.getTraitAssociation() != null) { + for (EvidenceEntry entry : va.getTraitAssociation()) { + if (CosmicVariantAnnotatorExtensionTask.ID.equals(entry.getSource().getName())) { + cosmicCount++; + break; + } + } + } + } + } + assertEquals(expected, cosmicCount); + } + public void testQueries(VariantStorageEngine variantStorageEngine) throws StorageEngineException { long count = variantStorageEngine.count(new Query()).first(); long partialCount = 0; diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index f1fbaaf411f..5070e4922d7 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -126,12 +126,12 @@ public void testAnnotationCosmicVariantAnnotatorExtensionTaskUsingFactory() thro Assert.assertTrue(CollectionUtils.isEmpty(outputVariantAnnotations.get(1).getTraitAssociation())); } - private Path initCosmicPath() throws IOException { + public static Path initCosmicPath() throws IOException { Path cosmicPath = getTempPath(); if (!cosmicPath.toFile().mkdirs()) { throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); } - Path cosmicFile = Paths.get(getClass().getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); Files.copy(cosmicFile, targetPath); @@ -142,7 +142,7 @@ private Path initCosmicPath() throws IOException { return targetPath; } - private Path getTempPath() { + public static Path getTempPath() { return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa19d5b2e4db3ff662592f94257e36b0a27b8ede GIT binary patch literal 6726 zcmV-M8oA{kiwFoFJz!=419o9@X<=@3Ep%mbbS-9SY-KHBZfeJ_6KEJC|e){>3FW&l;*KeQx_>aH+$GhL2!_&9_`}cRx z-+cb<{jdLZ{-M|Je*O4a2d^gou%G<8PX6x0$6r2A&;9Dd`@g>X@czF)e*E*N%kR8; za{*%FJ2vO#zx@2uABR8v^k(?})$sGT-wiK*csu;-zrFZjsEaUs|KoQ*4fVsrk1wFa z5b8hU{-03)ndJ!#Adz!`;oYZyVw6Iq3@?Y@zxwuvKMY}~Gi?ioZ-0FC-2PzEKlu9X zvsZ7Pz5Zbc{}-Nxok)20P1r?vs(%N3`c2uzIF5zjW1-ks(lOv&f#yW;v0TSWvaujf zzezhp0~;mekcQtr0ZWWjvP?Z z@M3t~fz4K6Wo*a{B-yZW;fSGo*-m#=|1a)xc`!E7;}>j8nJ`IU(Pe9(+e0{h<(7r( zn5Ez%=wb;gxEPDxlhR(elhpw~)-gW)hIi@gr{IprkG1{@I9`Xvc@=YoNA)@&XU-`= zRDjF^lI-%nebCNcEl}623QkZ$41OiJKuk%oz3AB4+t9%-<^96m1TRq%06TdGGiNHT zD?7UtK%>+uRce8w;AC6K7ncTH6mqnyBc=7tCpchI5l4zIl>{*)ZcCKL5r=1Z`cn@3 z*m*HIV-8NU`LYvfo;vHnis2G-UMbtob3^Q3e&jWLhFZq7qV6 zXg6GmgS?3O1On`j9M>Z^{=GTx^0zwtiG&VNg>ZI&NP)1#ZY|bx<{Fwp&m1OG`q8n) zn2X(qSz&htfOjxnuohZ@}QCfSu?ZKmrC4=uP;mvbr3# zLgjUjMfn>xM9}L{X2jIbT0hBDaCJEN=&M}Pzh zP_0OeSZsrtRMeH+!gsY0x;!ISn;IL1DqoLJ?c$FLO@btT){p$cY9O?ljBOZN&r zNG4V|y2+y?fF#~^@E)pV7rZ zsnt`W@2zSxRZDC2b3h9%9K{vN~=C<=8(xCcX zZ>hM#;s{cdICZk6lOeqq`DkO&rBFHWk>wLt5=E&BP3dOg1<&=h-p=!qzh2jAwgj_o zvg0?$Q%6``VL)iEx^0hLV{1zeYW;&aV?wlS!=a&EnP_miReq(j*GCw$ey%Kvja9DIV zqdRga6vjZ^X7$SC*oX-z4!sqIE+q~j_FApIMo~i&D>{sn&>285v%m93tnMvwY|1u? za`mnU$O{hG)fcY4tqkUZ7}W%arp34|bpkbLMUPx|4_(TEo5s$TaWDhNsv)u)Iz*;m zZ_}^aTn9;NTC*QHG?b;it1G7vEtoi&ViET+=}j#qNwoOujohT#%kj+4j&eG7owRYx zhLO>Q(fFp@6M9&w;|A9zlOr_o;#}(B)Ub; zC-8FS6rB~LYK+1$TSVe!ADPEw+#7L`&{QcT%Khti@<~HjB$;uYoq3|lX68F{C*(9& zE$D%&XUvySXfj-OIq!7LvF-EUDnjKZ;67{+L@K=rJ72)+G3P(q?HF?>C6L?*@8Hvd zIJpkheu&q*2xD^RL_*fpH=poen=4LLiV9}6(Kd*km}_s&qCM&j#U{+DurFWCoP(lU zU2$~j6bC03VfTZl*H(p0Gxv1M1PTQgnMp88O$E@U!&5KFoiDTVf zsFg3R!plW%+Jt*XAQCzm!RnD%8B|@6xW2G$rRK8r9l1V7fD8pn;J`K4DTdO_!`d>T zSQG?iRSY!{7NVMD3qp9iP>NXMWY#_P2(jvjkmbHW)ah02bH6viB~bl^BLNl5X1gH- zK+6e|lVfx9j5x$_8b!e|isBF%vDB<0H6IBV;$+*!C={F-V#p=e3I*k&+{~Ql4f6LK zy*MaW7x8rB6ivUjsOOfMY;j-O|7tP%%Ob#+Py)69b4AY>JgDiW)c8z{P|id%|-8sE);3 zQp8Sen#*If&p@ohWEt5D0ld{OM|1I#rR_R6RTp{` zfp3g2+VARzIfk)>qxvhxg05ymNcUS2dL*#H4IkBzd+NquP#9wcDaqL8sKOUh-c)~*28^txMI|dmLMjVs3s+Vlz{NdQ*+*wLnR&dSB-9!Dd^>G7H2Wn-nOBY zSzch^qO!f1oV0LOhId@-^UIqD45&g6s%4871ZsL4w~L16L3w+~H5C;iBx%-FYzs}dk)dYY zXI7cqqy+c;eA;n2q;mE|rIanWb0=2o2creKrS0lj`Mx$+4xp4cW*aGyXFYtk<)obC z>sy1q)IbZ>a!d)E8TV(Yxgf`Etb#Y1 zj#tyBF4Aa&SA2!WoNZmb407kz))ZzV!;|DQ&X1kVyG#YHrscSTrG)GuN7b^-(k3Wd z##B`H3ab=>?5Qcby{WgDcl9hm#TG*(3Ds$V8Mt{bdR_lLBtWq#D_N8c139Y0j`cp6Nroz)%oL`I5)o}s#3dQYqC zNU;2#kd;8WX`9A^fo84yb`d>?N5wL>r6%JSuWfiFhMB`FSw)K%AM1gWVChkP!P?dR z$p()&sO9OM{9UqFSg%qJ)1`Jk_dWX4vv|6wG5KS)7fR!VPgkM)%pD#a@bBvc=iC8ccHcBHj% z3XPF#$V~ZDWa}8XWjcO)-7Ia^I-$5zHb#M1T203`)vY+Fy+cHCbhxWkac|jHoG;q~ zo-@gC-;`pyY-{|c;Zy1axuoFhhinZYn<14c8Lz%`N5qW zV%Z`W?qn2DqxP}5@(i41fY$zF7!q(HVQgnfe^z|bB!yL zknh5GJ*_4FMu0`P)kCAfQqC9T-@{xN_u-QPiyCUt6CF*lNxZKx zpNBge`AVbd4vlY11AI*4~_N4Sdgs}j;mBV$;=gZ9RkdSTi-Nk9?v%D;*`6?-Wb zSicuTr&rX_seUNc1Y)m3{}QpsPH-h4#^QMv2`o1rHqO>>gR_uXfg0{6Z|#0xx@DS7 z*bmOf%fvejVQi#2?@>N}(-2y#2t9CKouq;|V{8^DZ7S`e3Y3(+RRoxV4XM|!GG)vh zeO6tY8;5L#Ka#^R_D!dUKg4j%hMwr}cKcPY=ma8pP@YW`KfFW9b+n={qtU^sgi}~{ zX)&o84o%BvCH9G@&d4S_wG#=c_M1%$)N{MGI29T4n3JaqhtjN?TH7_T2+kV1c{O)Z z#;K?8#xuwJAk)OI!Y;{KLHUjIPSfIPN7(@uQB77u$)yMjx>5V~?TdjB7jA7iDJ9~I zL~M6D?0?qz(8RP!XNF?bMNC74+Coh&AbTm|zF0vB^Ra?fYM0E`92~!C;7E#!WbZhO zAf97k$1jDruz1;aqyD~e--sc0kT*m@Uw!?Yb~1wq#9( zljqVZ7r^d^Q`V=QicK#=I`mDk=8=!3OX&zMhke1mYKeuF+b~Zc4LMMO=y%}!n=LqBhlj?ehrmQZ8c9VlmFKQkLRB+_+*)OvYo9TC(rlsZ~ zTT26|(H+apUr6U!dEr{pHxI?gdVP&VP1kJC8$77(hd3L?F?iQ!|Hu)Z*fWP%BV!19 zZ*w!1Y<0Izd-0-5IAHIBlavcv2cg>t$;Eqn7jN27+S?LpSCsR4S43;}n-w5Zs;$YC zY{n$LyW8srZLXdwhEScI`fsxEy4LDWu{OA*i$$qMj+s+!BXmz}yIE{f=HfTUr)68; zbbtx$EjJkZ90A-H`D{Hp*ATilO{?&7;mlsekeID%b@l!gol)a5NC10j!(^Uy6S}!U z367lzs+^)Tw;Uc`uq8|$7DFi@-HA|N0vlL*LdxzFaBPtVpcoRY1g>cbmq*F;)Uym4}_D_%a@V@hh(8LPZ zm@V13HgI7C^mjJys-cDXr0g=Q;@c^E`)^i+qNs8i=}F=*P}$QN6mxJ{l~^9Z$$jr@ z-wLkFTCCvgjm=s;lc?LT^JTzgNa7`UQ9#XEXuq7kVIbSVDJ!lu0!B>A;>)vFwkO=% zu=AE1#V--pxk<}j1;7m*0}z>Oy|#n`3GLTOCpdPHTlW<-t!N9nI={h4$FU%pGMB4*nIc44JFPn{WCofQVnG%n|D8+hLqV_joJFfVcWE|NY-(v(5rY$X zNuYY}(ZoO%T#ks0^>Jpa?YS=#g=9H+Jyn=G`!e(Kla8(k5RNaaMNHwor|55F#Jx*r z9(VApPt3NlxR3~Sr54KghQ_(SFylUWK^NIDSM!?HTZS;l5@bwHzNdRv5;_RCC59wCY%TEiJPUa5cZ zmE7_g*AhC3f;DNILeiz{e`ICt`W{1P&E3XVR@C4q?YyWdn>pPB?|NEe^^^dp1n=7JGow6t_nQ8w<-z6?`=BV+dVvpAqaYZGEsvq-`d&oE(@r~J*C(PgGJEX zfQ)LsEaa(d7iCdw43Vh%MU_vL&|5vTIuBKB5g?|Vt8axV#0C4Jvj(JRT7{Q0px@YH z>b7ZOY?;Pj*suf=sYbVol1%;Rdi;?MS*IEm#ETc|vp$F^FNnI^kJedB@E$#Z))Eix z857R|wU@)2O6<&OH}(UXScGHowMWq|9&e~maLB~N6PMpKd_)P-*y}x$HOyV*THNd} zYwxXEJgR2X*i1ELXu4Cj>`-Rlti4h64!OCcg%peJ%0LyFIqsB^c#wcIsD{1O)58Z| zIk~O%#n}lE3-kP-M%W`n3uCCw?JuhsiI1UQ8n5hRG;r3o3O#xj5v@PzV(F3E1odXt zZus?8S}5aKn(;Co<|!BP|63cu6SuzU2ure-A8uG!x7W!km0?n;s)24Ubi zc}vO>i&)Nc$QS1SFNs<>jy(lJRJ+Pzu2x+p%i&K{eAGBp4k|P=^b7(XvWTZ%}ioJ>&ftys#0GS~%WG%Zpq4YT>&?%-DHUJf--SmJJL4u-$pVgvp>|oS$|MHeN3^*msXCg90b3To zT2w+Uc(ywS%GqU Date: Thu, 6 Jun 2024 11:40:15 +0200 Subject: [PATCH 08/14] test: add JUnit tests to check COSMIC file format, missing COSMIC file, version and assembly; and assembly match, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java --- .../CosmicVariantAnnotatorExtensionTask.java | 5 +- .../VariantAnnotationManagerTest.java | 119 ++++++++++++++++++ ...smicVariantAnnotatorExtensionTaskTest.java | 16 +++ 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 49a2a5f871e..33246dc9658 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -8,6 +8,7 @@ import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.utils.FileUtils; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; @@ -55,9 +56,7 @@ public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { public List setup(URI output) throws Exception { // Sanity check Path cosmicFile = Paths.get(options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key())); - if (!Files.exists(cosmicFile)) { - throw new IllegalArgumentException("COSMIC file " + cosmicFile + " does not exist"); - } + FileUtils.checkFile(cosmicFile); cosmicVersion = (String) options.getOrDefault(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), null); if (StringUtils.isEmpty(cosmicVersion)) { throw new IllegalArgumentException("Missing COSMIC version"); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index 0092e2889da..0497d3bc96b 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -290,6 +290,125 @@ public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Excepti checkCosmicVariants(annotationDataResult, 0); } + @Test + public void testCosmicAnnotatorExtensionMissingInvalidCosmicFile() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initInvalidCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicFile() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95"); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicVersion() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMismatchAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh37") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + public void checkCosmicVariants(DataResult annotationDataResult, int expected) { int cosmicCount = 0; for (VariantAnnotation va : annotationDataResult.getResults()) { diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index 5070e4922d7..04a9f4b2e36 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -142,6 +142,22 @@ public static Path initCosmicPath() throws IOException { return targetPath; } + public static Path initInvalidCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/myannot.vcf").getPath()); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } + + return targetPath; + } + public static Path getTempPath() { return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); } From a628d20bcaae7150baabc1c77e7d9245c416f046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 11 Jun 2024 16:50:25 +0200 Subject: [PATCH 09/14] test: rename JUnit test, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java --- .../core/variant/annotation/VariantAnnotationManagerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index 0497d3bc96b..68461f5d9c2 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -291,7 +291,7 @@ public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Excepti } @Test - public void testCosmicAnnotatorExtensionMissingInvalidCosmicFile() throws Exception { + public void testCosmicAnnotatorExtensionInvalidCosmicFile() throws Exception { // Setup COSMIC directory Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initInvalidCosmicPath(); System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); From 7f47c71f47b6d666c50f95d1e417a7932884fe37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 11 Jun 2024 16:52:23 +0200 Subject: [PATCH 10/14] test: set JUnit test as @Category(ShortTests.class), #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java --- .../extensions/CosmicVariantAnnotatorExtensionTaskTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index 04a9f4b2e36..6f1a1f2825c 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -5,12 +5,14 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.Assert; import org.junit.Test; +import org.junit.experimental.categories.Category; import org.opencb.biodata.models.common.DataVersion; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; @@ -22,6 +24,7 @@ import java.util.Collections; import java.util.List; +@Category(ShortTests.class) public class CosmicVariantAnnotatorExtensionTaskTest { private final String ASSEMBLY ="GRCh38"; From 1dcb395bdac7fc803057814acc5d0a3356dd2828 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 11 Jun 2024 16:54:04 +0200 Subject: [PATCH 11/14] storage-core: fix sonnar issues, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java --- .../CosmicVariantAnnotatorExtensionTask.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 33246dc9658..421fb5fa87a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -43,7 +43,6 @@ public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExte private Path dbLocation = null; public static final String COSMIC_ANNOTATOR_INDEX_NAME = "cosmicAnnotatorIndex"; - public static final String COSMIC_VERSION_FILENAME = "cosmicVersion.json"; private static Logger logger = LoggerFactory.getLogger(CosmicVariantAnnotatorExtensionTask.class); @@ -111,14 +110,8 @@ public ObjectMap getMetadata() { @Override public List apply(List list) throws Exception { for (VariantAnnotation variantAnnotation : list) { - Variant variant; - try { - variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), - variantAnnotation.getAlternate()); - } catch (Exception e) { - logger.warn("Skipping variant: " + e.getMessage()); - continue; - } + Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getEnd(), + variantAnnotation.getReference(), variantAnnotation.getAlternate()); byte[] key = variant.toString().getBytes(); byte[] dbContent = rdb.get(key); if (dbContent != null) { @@ -165,8 +158,7 @@ private void initRockDB(boolean forceCreate) throws ToolException { rdb = RocksDB.openReadOnly(dbOption, dbLocation.toAbsolutePath().toString()); } } catch (RocksDBException e) { - // Do some error handling - throw new ToolException("", e); + throw new ToolException("Error initializing RocksDB", e); } } } From a07cc13b327648bcb22e019c1a0ab8b2021bd7fb Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 17 Sep 2024 16:30:47 +0200 Subject: [PATCH 12/14] core: set defaul password expiration of 90 days, #TASK-6871 --- opencga-core/src/main/resources/configuration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-core/src/main/resources/configuration.yml b/opencga-core/src/main/resources/configuration.yml index 0b2b007817e..7d8c91c3409 100644 --- a/opencga-core/src/main/resources/configuration.yml +++ b/opencga-core/src/main/resources/configuration.yml @@ -9,7 +9,7 @@ jobDir: ${OPENCGA.USER.WORKSPACE}/jobs # Maximum number of login attempts before banning a user account account: maxLoginAttempts: ${OPENCGA.ACCOUNT.MAX_LOGIN_ATTEMPTS} - passwordExpirationDays: 0 + passwordExpirationDays: 90 panel: host: "http://resources.opencb.org/opencb/opencga/disease-panels" From 0e5515c8ae4c37e8b4ab7439f456eb97e387ef72 Mon Sep 17 00:00:00 2001 From: pfurio Date: Thu, 19 Sep 2024 14:56:03 +0200 Subject: [PATCH 13/14] server: avoid NPE when body of POST WS is null, #TASK-4073 --- .../server/rest/operations/VariantOperationWebService.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java index 14fb051c85c..4b9613b9268 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/operations/VariantOperationWebService.java @@ -593,7 +593,12 @@ public Response submitOperationToProject(String toolId, String project, ToolPara public Response submitOperation(String toolId, String project, String study, ToolParams params, String jobName, String jobDescription, String jobDependsOn, String jobTags, String jobScheduledStartTime, String jobPriority, Boolean dryRun) { try { - Map paramsMap = params.toParams(); + Map paramsMap; + if (params != null) { + paramsMap = params.toParams(); + } else { + paramsMap = new HashMap<>(); + } if (StringUtils.isNotEmpty(study)) { paramsMap.put(ParamConstants.STUDY_PARAM, study); } From 9002818b3b035096e78a0b3da4da56dd55132301 Mon Sep 17 00:00:00 2001 From: pfurio Date: Thu, 19 Sep 2024 17:44:42 +0200 Subject: [PATCH 14/14] server: avoid npe for empty body in post calls, #TASK-4073 --- .../org/opencb/opencga/server/rest/OpenCGAWSServer.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index 3dae031c245..2df5a8d71f4 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -882,7 +882,12 @@ protected DataResult submitJobRaw(String toolId, String project, String stu String jobDescription, String jobDependsOnStr, String jobTagsStr, String jobScheduledStartTime, String jobPriority, Boolean dryRun) throws CatalogException { - Map paramsMap = bodyParams.toParams(); + Map paramsMap; + if (bodyParams != null) { + paramsMap = bodyParams.toParams(); + } else { + paramsMap = new HashMap<>(); + } if (StringUtils.isNotEmpty(study)) { paramsMap.putIfAbsent(ParamConstants.STUDY_PARAM, study); }