diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java new file mode 100644 index 00000000..59fe903d --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java @@ -0,0 +1,6 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; + +import java.util.Map; + +public record Concept(String conceptPath, String name, Map meta) { +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java new file mode 100644 index 00000000..585c3deb --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java @@ -0,0 +1,30 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.core.ParameterizedTypeReference; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpMethod; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestTemplate; + +import java.util.List; + +@Component +@ConditionalOnProperty("dictionary.host") +public class DictionaryService { + + public static final ParameterizedTypeReference> CONCEPT_LIST_TYPE_REFERENCE = new ParameterizedTypeReference<>() { + }; + private final String dictionaryHost; + private final RestTemplate restTemplate; + + public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTemplate, @Value("${TARGET_STACK}") String targetStack) { + this.dictionaryHost = dictionaryHostTemplate.replace("___TARGET_STACK___", targetStack); + this.restTemplate = new RestTemplate(); + } + + public List getConcepts(List conceptPaths) { + return restTemplate.exchange(dictionaryHost, HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody(); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index eb58da8d..f0f4056a 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -1,5 +1,7 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.io; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.Concept; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.file.CodecFactory; @@ -16,6 +18,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; public class PfbWriter implements ResultWriter { @@ -24,6 +27,8 @@ public class PfbWriter implements ResultWriter { public static final String DRS_URL_TABLE_PREFIX = "drs-url-"; private Logger log = LoggerFactory.getLogger(PfbWriter.class); + private final DictionaryService dictionaryService; + private final Schema metadataSchema; private final Schema nodeSchema; @@ -43,9 +48,10 @@ public class PfbWriter implements ResultWriter { private static final Set SINGULAR_FIELDS = Set.of("patient_id"); - public PfbWriter(File tempFile, String queryId) { + public PfbWriter(File tempFile, String queryId, DictionaryService dictionaryService) { this.file = tempFile; this.queryId = queryId; + this.dictionaryService = dictionaryService; this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId); this.drsUrlTableName = formatFieldName(DRS_URL_TABLE_PREFIX + queryId); entityFieldAssembler = SchemaBuilder.record("entity") @@ -118,13 +124,23 @@ public void writeHeader(String[] data) { } private void writeDrsUris() { + Map conceptMap = dictionaryService.getConcepts(fields).stream() + .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); GenericRecord entityRecord = new GenericData.Record(entitySchema); for (String field : fields) { GenericRecord drsUriData = new GenericData.Record(drsUriSchema); drsUriData.put("concept_path", field); - // todo: lookup DRS URIs - drsUriData.put("drs_uri", List.of("https://a-drs-uri.com/")); + + Concept concept = conceptMap.get(field); + List drsUris = List.of(); + if (concept != null) { + Map meta = concept.meta(); + if (meta != null) { + drsUris = meta.values().stream().toList(); + } + } + drsUriData.put("drs_uri", drsUris); entityRecord.put("object", drsUriData); entityRecord.put("name", drsUrlTableName); diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index d1819741..039f93bb 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -1,7 +1,9 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.io; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import java.io.File; @@ -12,11 +14,15 @@ import static org.junit.jupiter.api.Assertions.*; +@ExtendWith(MockitoExtension.class) public class PfbWriterTest { + @Mock + private DictionaryService dictionaryService; + @Test public void writeValidPFB() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); List> nullableList = new ArrayList<>(); @@ -39,21 +45,21 @@ public void writeValidPFB() { @Test public void formatFieldName_spacesAndBackslashes_replacedWithUnderscore() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); String formattedName = pfbWriter.formatFieldName("\\Topmed Study Accession with Subject ID\\\\"); assertEquals("_Topmed_Study_Accession_with_Subject_ID__", formattedName); } @Test public void formatFieldName_startsWithDigit_prependUnderscore() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); String formattedName = pfbWriter.formatFieldName("123Topmed Study Accession with Subject ID\\\\"); assertEquals("_123Topmed_Study_Accession_with_Subject_ID__", formattedName); } @Test public void formatFieldName_randomGarbage_replaceWithUnderscore() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); String formattedName = pfbWriter.formatFieldName("$$$my garbage @vro var!able nam#"); assertEquals("___my_garbage__vro_var_able_nam_", formattedName); } diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java index a00a8ad0..31952b49 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java @@ -9,6 +9,7 @@ import java.util.stream.Collectors; import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import edu.harvard.hms.dbmi.avillach.hpds.processing.io.CsvWriter; import edu.harvard.hms.dbmi.avillach.hpds.processing.io.PfbWriter; import edu.harvard.hms.dbmi.avillach.hpds.processing.io.ResultWriter; @@ -48,6 +49,8 @@ public class QueryService { private final CountProcessor countProcessor; private final MultiValueQueryProcessor multiValueQueryProcessor; + private final DictionaryService dictionaryService; + HashMap results = new HashMap<>(); @@ -57,6 +60,7 @@ public QueryService (AbstractProcessor abstractProcessor, TimeseriesProcessor timeseriesProcessor, CountProcessor countProcessor, MultiValueQueryProcessor multiValueQueryProcessor, + @Autowired(required = false) DictionaryService dictionaryService, @Value("${SMALL_JOB_LIMIT}") Integer smallJobLimit, @Value("${SMALL_TASK_THREADS}") Integer smallTaskThreads, @Value("${LARGE_TASK_THREADS}") Integer largeTaskThreads) { @@ -65,6 +69,7 @@ public QueryService (AbstractProcessor abstractProcessor, this.timeseriesProcessor = timeseriesProcessor; this.countProcessor = countProcessor; this.multiValueQueryProcessor = multiValueQueryProcessor; + this.dictionaryService = dictionaryService; SMALL_JOB_LIMIT = smallJobLimit; SMALL_TASK_THREADS = smallTaskThreads; @@ -136,7 +141,7 @@ private AsyncResult initializeResult(Query query) throws IOException { String queryId = UUIDv5.UUIDFromString(query.toString()).toString(); ResultWriter writer; if (ResultType.DATAFRAME_PFB.equals(query.getExpectedResultType())) { - writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId); + writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId, dictionaryService); } else { writer = new CsvWriter(File.createTempFile("result-" + System.nanoTime(), ".sstmp")); } diff --git a/service/src/main/resources/application-bdc-auth-dev.properties b/service/src/main/resources/application-bdc-auth-dev.properties index 7b9dce89..cce65412 100644 --- a/service/src/main/resources/application-bdc-auth-dev.properties +++ b/service/src/main/resources/application-bdc-auth-dev.properties @@ -7,4 +7,6 @@ HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/ data-export.s3.bucket-name=pic-sure-auth-dev-data-export data-export.s3.region=us-east-1 -data-export.s3.signedUrl-expiry-minutes=60 \ No newline at end of file +data-export.s3.signedUrl-expiry-minutes=60 + +dictionary.host = https://wildfly.___TARGET_STACK___/ \ No newline at end of file diff --git a/service/src/main/resources/application-bdc-auth-prod.properties b/service/src/main/resources/application-bdc-auth-prod.properties index a63bc6e3..625b0c72 100644 --- a/service/src/main/resources/application-bdc-auth-prod.properties +++ b/service/src/main/resources/application-bdc-auth-prod.properties @@ -7,4 +7,6 @@ HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/ data-export.s3.bucket-name=pic-sure-auth-prod-data-export data-export.s3.region=us-east-1 -data-export.s3.signedUrl-expiry-minutes=60 \ No newline at end of file +data-export.s3.signedUrl-expiry-minutes=60 + +dictionary.host = https://wildfly.___TARGET_STACK___/ \ No newline at end of file