Skip to content

Commit

Permalink
storage: Ensure SampleIndexConfiguration always exists. Add migration…
Browse files Browse the repository at this point in the history
…. #TASK-6765
  • Loading branch information
j-coll committed Sep 11, 2024
1 parent 89214a8 commit a4b75fa
Show file tree
Hide file tree
Showing 33 changed files with 258 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -541,9 +541,12 @@ public OpenCGAResult<Job> configureSampleIndex(String studyStr, SampleIndexConfi
boolean skipRebuild, String token)
throws CatalogException, StorageEngineException {
return secureOperation("configure", studyStr, new ObjectMap(), token, engine -> {
String version = engine.getCellBaseUtils().getCellBaseClient().getClientConfiguration().getVersion();
sampleIndexConfiguration.validate(version);
String cellbaseVersion = engine.getCellBaseUtils().getVersionFromServer();
sampleIndexConfiguration.validate(cellbaseVersion);
String studyFqn = getStudyFqn(studyStr, token);
if (!engine.getMetadataManager().studyExists(studyFqn)) {
engine.getMetadataManager().createStudy(studyFqn, cellbaseVersion);
}
engine.getMetadataManager().addSampleIndexConfiguration(studyFqn, sampleIndexConfiguration, true);

catalogManager.getStudyManager()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ private RestResponse<Job> runExport() throws Exception {
putNestedIfNotEmpty(beanParams, "proteinKeyword", commandOptions.proteinKeyword, true);
putNestedIfNotEmpty(beanParams, "drug", commandOptions.drug, true);
putNestedIfNotEmpty(beanParams, "customAnnotation", commandOptions.customAnnotation, true);
putNestedIfNotEmpty(beanParams, "source", commandOptions.source, true);
putNestedIfNotEmpty(beanParams, "unknownGenotype", commandOptions.unknownGenotype, true);
putNestedIfNotNull(beanParams, "sampleMetadata", commandOptions.sampleMetadata, true);
putNestedIfNotNull(beanParams, "sort", commandOptions.sort, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1955,7 +1955,7 @@ public class QueryVariantCommandOptions {
@Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0)
public boolean panelIntersection = false;

@Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.", required = false, arity = 1)
@Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.", required = false, arity = 1)
public String source;

@Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,9 @@ public class RunExportCommandOptions {
@Parameter(names = {"--custom-annotation"}, description = "Custom annotation: {key}[<|>|<=|>=]{number} or {key}[~=|=]{text}", required = false, arity = 1)
public String customAnnotation;

@Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.", required = false, arity = 1)
public String source;

@Parameter(names = {"--unknown-genotype"}, description = "Returned genotype for unknown genotypes. Common values: [0/0, 0|0, ./.]", required = false, arity = 1)
public String unknownGenotype;

Expand Down Expand Up @@ -1959,7 +1962,7 @@ public class QueryCommandOptions {
@Parameter(names = {"--panel-intersection"}, description = "Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.", required = false, help = true, arity = 0)
public boolean panelIntersection = false;

@Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.", required = false, arity = 1)
@Parameter(names = {"--source"}, description = "Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.", required = false, arity = 1)
public String source;

@Parameter(names = {"--trait"}, description = "List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...", required = false, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ protected final VariantStorageEngine getVariantStorageEngineByProject(String pro
* @return List of projects
* @throws Exception on error
*/
protected final List<String> getVariantStorageProjects(String organizationId) throws Exception {
protected final List<String> getVariantStorageProjects() throws Exception {
Set<String> projects = new LinkedHashSet<>();

for (String studyFqn : getVariantStorageStudies(organizationId)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public class AddAllelesColumnToPhoenix extends StorageMigrationTool {

@Override
protected void run() throws Exception {
for (String project : getVariantStorageProjects(organizationId)) {
for (String project : getVariantStorageProjects()) {
VariantStorageEngine engine = getVariantStorageEngineByProject(project);
if (engine.getStorageEngineId().equals("hadoop")) {
logger.info("Adding missing columns (if any) for project " + project);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public class DetectIllegalConcurrentFileLoadingsMigration extends StorageMigrati

@Override
protected void run() throws Exception {
for (String project : getVariantStorageProjects(organizationId)) {
for (String project : getVariantStorageProjects()) {
VariantStorageEngine engine = getVariantStorageEngineByProject(project);
if (!engine.getStorageEngineId().equals("hadoop")) {
continue;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package org.opencb.opencga.app.migrations.v3.v3_4_0.storage;

import org.opencb.opencga.app.migrations.StorageMigrationTool;
import org.opencb.opencga.catalog.migration.Migration;
import org.opencb.opencga.core.config.storage.SampleIndexConfiguration;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.variant.VariantStorageEngine;

import java.time.Instant;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

@Migration(id = "ensure_sample_index_configuration_is_defined",
description = "Ensure that the SampleIndexConfiguration object is correctly defined. #TASK-6765", version = "3.4.0",
language = Migration.MigrationLanguage.JAVA,
domain = Migration.MigrationDomain.STORAGE,
patch = 1,
date = 20240910)
public class EnsureSampleIndexConfigurationIsAlwaysDefined extends StorageMigrationTool {

@Override
protected void run() throws Exception {

for (String variantStorageProject : getVariantStorageProjects()) {
VariantStorageEngine engine = getVariantStorageEngineByProject(variantStorageProject);
if (engine.getMetadataManager().exists()) {
for (Integer studyId : engine.getMetadataManager().getStudyIds()) {
StudyMetadata studyMetadata = engine.getMetadataManager().getStudyMetadata(studyId);
List<StudyMetadata.SampleIndexConfigurationVersioned> configurations = studyMetadata.getSampleIndexConfigurations();
if (configurations == null || configurations.isEmpty()) {
configurations = new ArrayList<>(1);
logger.info("Creating default SampleIndexConfiguration for study '" + studyMetadata.getName() + "' (" + studyId + ")");
configurations.add(new StudyMetadata.SampleIndexConfigurationVersioned(
preFileDataConfiguration(),
StudyMetadata.DEFAULT_SAMPLE_INDEX_VERSION,
Date.from(Instant.now()), StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE));
studyMetadata.setSampleIndexConfigurations(configurations);
}
}
}
}
}


public static SampleIndexConfiguration preFileDataConfiguration() {
// If missing, it was assuming cellbase v5
SampleIndexConfiguration sampleIndexConfiguration = SampleIndexConfiguration.defaultConfiguration(false);
sampleIndexConfiguration.getFileDataConfiguration().setIncludeOriginalCall(false);
sampleIndexConfiguration.getFileDataConfiguration().setIncludeSecondaryAlternates(false);
return sampleIndexConfiguration;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.datastore.core.result.Error;
import org.opencb.opencga.catalog.auth.authorization.AuthorizationManager;
import org.opencb.opencga.catalog.db.DBAdaptorFactory;
import org.opencb.opencga.catalog.db.api.AuditDBAdaptor;
Expand Down Expand Up @@ -183,6 +184,18 @@ public void audit(String organizationId, String userId, Enums.Action action, Enu
audit(organizationId, userId, action, resource, resourceId, resourceUuid, studyId, studyUuid, params, status, new ObjectMap());
}

public void auditError(String organizationId, String userId, Enums.Action action, Enums.Resource resource, String resourceId,
String resourceUuid, String studyId, String studyUuid, ObjectMap params, Exception e) {
Error error;
if (e instanceof CatalogException) {
error = ((CatalogException) e).getError();
} else {
error = new Error(0, "", e.getMessage());
}
AuditRecord.Status status = new AuditRecord.Status(AuditRecord.Status.Result.ERROR, error);
audit(organizationId, userId, action, resource, resourceId, resourceUuid, studyId, studyUuid, params, status, new ObjectMap());
}

public void audit(String organizationId, String userId, Enums.Action action, Enums.Resource resource, String resourceId,
String resourceUuid, String studyId, String studyUuid, ObjectMap params, AuditRecord.Status status,
ObjectMap attributes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.opencb.opencga.catalog.utils.ParamUtils;
import org.opencb.opencga.catalog.utils.UuidUtils;
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.cellbase.CellBaseValidator;
import org.opencb.opencga.core.common.JacksonUtils;
import org.opencb.opencga.core.common.TimeUtils;
import org.opencb.opencga.core.config.Configuration;
Expand Down Expand Up @@ -406,7 +407,6 @@ public OpenCGAResult<Study> create(String projectStr, Study study, QueryOptions
String organizationId = catalogFqn.getOrganizationId();
String userId = tokenPayload.getUserId(organizationId);
Project project = catalogManager.getProjectManager().resolveId(catalogFqn, null, tokenPayload).first();

ObjectMap auditParams = new ObjectMap()
.append("projectId", projectStr)
.append("study", study)
Expand All @@ -417,7 +417,16 @@ public OpenCGAResult<Study> create(String projectStr, Study study, QueryOptions
options = ParamUtils.defaultObject(options, QueryOptions::new);

authorizationManager.checkIsAtLeastOrganizationOwnerOrAdmin(organizationId, userId);

String cellbaseVersion;
if (project.getCellbase() == null || StringUtils.isEmpty(project.getCellbase().getUrl())) {
CellBaseValidator cellBaseValidator = new CellBaseValidator(
project.getCellbase(),
project.getOrganism().getScientificName(),
project.getOrganism().getAssembly());
cellbaseVersion = cellBaseValidator.getVersionFromServer();
} else {
cellbaseVersion = null;
}
long projectUid = project.getUid();

// Initialise fields
Expand All @@ -427,7 +436,7 @@ public OpenCGAResult<Study> create(String projectStr, Study study, QueryOptions
study.setType(ParamUtils.defaultObject(study.getType(), StudyType::init));
study.setSources(ParamUtils.defaultObject(study.getSources(), Collections::emptyList));
study.setDescription(ParamUtils.defaultString(study.getDescription(), ""));
study.setInternal(StudyInternal.init());
study.setInternal(StudyInternal.init(cellbaseVersion));
study.setStatus(ParamUtils.defaultObject(study.getStatus(), Status::new));
study.setCreationDate(ParamUtils.checkDateOrGetCurrentDate(study.getCreationDate(),
StudyDBAdaptor.QueryParams.CREATION_DATE.key()));
Expand Down Expand Up @@ -496,10 +505,14 @@ public OpenCGAResult<Study> create(String projectStr, Study study, QueryOptions
result.setResults(Arrays.asList(study));
}
return result;
} catch (CatalogException e) {
auditManager.auditCreate(organizationId, userId, Enums.Resource.STUDY, study.getId(), "", study.getId(), "", auditParams,
new AuditRecord.Status(AuditRecord.Status.Result.ERROR, e.getError()));
throw e;
} catch (Exception e) {
auditManager.auditError(organizationId, userId, Enums.Action.CREATE, Enums.Resource.STUDY, study.getId(),
"", study.getId(), "", auditParams, e);
if (e instanceof CatalogException) {
throw (CatalogException) e;
} else {
throw new CatalogException("Error creating study '" + study.getId() + "'", e);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion opencga-client/src/main/R/R/Clinical-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ setMethod("clinicalClient", "OpencgaR", function(OpencgaR, annotationSet, clinic
#' @param panelRoleInCancer Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ].
#' @param panelFeatureType Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ].
#' @param panelIntersection Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.
#' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.
#' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.
#' @param trait List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...
queryVariant=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL,
subcategory="clinical/variant", subcategoryId=NULL, action="query", params=params, httpMethod="GET",
Expand Down
2 changes: 1 addition & 1 deletion opencga-client/src/main/R/R/Variant-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ setMethod("variantClient", "OpencgaR", function(OpencgaR, endpointName, params=N
#' @param panelRoleInCancer Filter genes from specific panels that match certain role in cancer. Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ].
#' @param panelFeatureType Filter elements from specific panels by type. Accepted values : [ gene, region, str, variant ].
#' @param panelIntersection Intersect panel genes and regions with given genes and regions from que input query. This will prevent returning variants from regions out of the panel.
#' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default), 'secondary_annotation_index' and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. Some data might be missing or be partial.
#' @param source Select the variant data source from where to fetch the data. Accepted values are 'variant_index' (default) and 'secondary_sample_index'. When selecting a secondary_index, the data will be retrieved exclusively from that secondary index, and the 'include/exclude' parameters will be ignored. If the given query can not be fully resolved using the secondary index, an exception will be raised. As the returned variants will only contain data from the secondary_index, some data might be missing or be partial.
#' @param trait List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,...
query=fetchOpenCGA(object=OpencgaR, category="analysis", categoryId=NULL, subcategory="variant",
subcategoryId=NULL, action="query", params=params, httpMethod="GET", as.queryParam=NULL, ...),
Expand Down
Loading

0 comments on commit a4b75fa

Please sign in to comment.