-
Notifications
You must be signed in to change notification settings - Fork 98
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'release-3.x.x' into TASK-6981
- Loading branch information
Showing
14 changed files
with
763 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
.../storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; | ||
|
||
import org.opencb.biodata.models.variant.avro.VariantAnnotation; | ||
import org.opencb.commons.datastore.core.ObjectMap; | ||
import org.opencb.commons.run.Task; | ||
|
||
import java.net.URI; | ||
import java.util.List; | ||
|
||
public interface VariantAnnotatorExtensionTask extends Task<VariantAnnotation, VariantAnnotation> { | ||
|
||
/** | ||
* Set up the annotator extension. | ||
* This method will be called before any other method. It might generate extra files or data needed for the annotation. | ||
* | ||
* @param output Output directory where the annotator extension should write the files | ||
* @return List of URIs of generated files (if any) | ||
* @throws Exception if the annotator extension set up fails | ||
*/ | ||
List<URI> setup(URI output) throws Exception; | ||
|
||
/** | ||
* Check if the annotator extension is available for the given options. | ||
* @throws IllegalArgumentException if the annotator extension is not available | ||
*/ | ||
void checkAvailable() throws IllegalArgumentException; | ||
|
||
/** | ||
* Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available. | ||
* @return true if the annotator extension is available | ||
*/ | ||
default boolean isAvailable() { | ||
try { | ||
checkAvailable(); | ||
return true; | ||
} catch (IllegalArgumentException e) { | ||
return false; | ||
} | ||
} | ||
|
||
@Override | ||
default void pre() throws Exception { | ||
Task.super.pre(); | ||
checkAvailable(); | ||
} | ||
|
||
/** | ||
* Get the options for the annotator extension. | ||
* @return Options for the annotator extension | ||
*/ | ||
ObjectMap getOptions(); | ||
|
||
/** | ||
* Get the metadata for the annotator extension. | ||
* @return Metadata for the annotator extension | ||
*/ | ||
ObjectMap getMetadata(); | ||
|
||
} |
54 changes: 54 additions & 0 deletions
54
...rage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; | ||
|
||
import org.opencb.commons.datastore.core.ObjectMap; | ||
import org.opencb.opencga.storage.core.variant.VariantStorageOptions; | ||
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; | ||
|
||
import java.lang.reflect.InvocationTargetException; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
|
||
public class VariantAnnotatorExtensionsFactory { | ||
|
||
public List<VariantAnnotatorExtensionTask> getVariantAnnotatorExtensions(ObjectMap options) { | ||
|
||
List<VariantAnnotatorExtensionTask> tasks = new LinkedList<>(); | ||
for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { | ||
VariantAnnotatorExtensionTask task = null; | ||
switch (extensionId) { | ||
case CosmicVariantAnnotatorExtensionTask.ID: | ||
task = new CosmicVariantAnnotatorExtensionTask(options); | ||
break; | ||
default: | ||
String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); | ||
if (extensionClass != null) { | ||
task = getVariantAnnotatorExtension(extensionClass, options); | ||
} else { | ||
throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'"); | ||
} | ||
} | ||
|
||
if (task == null) { | ||
throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'"); | ||
} | ||
|
||
tasks.add(task); | ||
} | ||
return tasks; | ||
} | ||
|
||
private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) { | ||
try { | ||
Class<?> clazz = Class.forName(className); | ||
return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); | ||
} catch (ClassNotFoundException | ||
| NoSuchMethodException | ||
| InstantiationException | ||
| IllegalAccessException | ||
| InvocationTargetException e) { | ||
throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); | ||
} | ||
} | ||
|
||
|
||
} |
86 changes: 86 additions & 0 deletions
86
...age/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; | ||
|
||
import com.fasterxml.jackson.core.JsonProcessingException; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import org.apache.commons.collections4.CollectionUtils; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; | ||
import org.opencb.biodata.models.sequence.SequenceLocation; | ||
import org.opencb.biodata.models.variant.Variant; | ||
import org.opencb.biodata.models.variant.avro.EvidenceEntry; | ||
import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; | ||
import org.opencb.biodata.tools.variant.VariantNormalizer; | ||
import org.opencb.opencga.core.common.JacksonUtils; | ||
import org.rocksdb.RocksDB; | ||
import org.rocksdb.RocksDBException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.stream.Collectors; | ||
|
||
public class CosmicExtensionTaskCallback implements CosmicParserCallback { | ||
|
||
private RocksDB rdb; | ||
private VariantNormalizer variantNormalizer; | ||
private ObjectMapper defaultObjectMapper; | ||
|
||
private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class); | ||
|
||
private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|(<CNV[0-9]+>)|(<DUP>)|(<DEL>)|(<INS>)|(<INV>)"; | ||
|
||
public CosmicExtensionTaskCallback(RocksDB rdb) { | ||
this.rdb = rdb; | ||
this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig() | ||
.setReuseVariants(true) | ||
.setNormalizeAlleles(true) | ||
.setDecomposeMNVs(false)); | ||
this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); | ||
} | ||
|
||
@Override | ||
public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List<EvidenceEntry> evidenceEntries) { | ||
// Add evidence entries in the RocksDB | ||
// More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed | ||
List<String> normalisedVariantStringList; | ||
try { | ||
normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(), | ||
sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()); | ||
if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) { | ||
for (String normalisedVariantString : normalisedVariantStringList) { | ||
rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries)); | ||
} | ||
return true; | ||
} | ||
return false; | ||
} catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) { | ||
logger.warn(StringUtils.join(e.getStackTrace(), "\n")); | ||
return false; | ||
} | ||
} | ||
|
||
protected List<String> getNormalisedVariantString(String chromosome, int start, String reference, String alternate) | ||
throws NonStandardCompliantSampleField { | ||
Variant variant = new Variant(chromosome, start, reference, alternate); | ||
return getNormalisedVariantString(variant); | ||
} | ||
|
||
protected List<String> getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField { | ||
// Checks no weird characters are part of the reference & alternate alleles | ||
if (isValid(variant)) { | ||
List<Variant> normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true); | ||
return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList()); | ||
} else { | ||
logger.warn("Variant {} is not valid: skipping it!", variant); | ||
} | ||
|
||
return Collections.emptyList(); | ||
} | ||
|
||
protected boolean isValid(Variant variant) { | ||
return (variant.getReference().matches(VARIANT_STRING_PATTERN) | ||
&& (variant.getAlternate().matches(VARIANT_STRING_PATTERN) | ||
&& !variant.getAlternate().equals(variant.getReference()))); | ||
} | ||
} |
Oops, something went wrong.