Skip to content

Commit

Permalink
Initial commit for dictionary service
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Nov 18, 2024
1 parent a0ad216 commit 34b71d5
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary;

import java.util.Map;

public record Concept(String conceptPath, String name, Map<String, String> meta) {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary;

import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpMethod;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestTemplate;

import java.util.List;

@Component
@ConditionalOnProperty("dictionary.host")
public class DictionaryService {

public static final ParameterizedTypeReference<List<Concept>> CONCEPT_LIST_TYPE_REFERENCE = new ParameterizedTypeReference<>() {
};
private final String dictionaryHost;
private final RestTemplate restTemplate;

public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTemplate, @Value("${TARGET_STACK}") String targetStack) {
this.dictionaryHost = dictionaryHostTemplate.replace("___TARGET_STACK___", targetStack);
this.restTemplate = new RestTemplate();
}

public List<Concept> getConcepts(List<String> conceptPaths) {
return restTemplate.exchange(dictionaryHost, HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody();
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.io;

import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.Concept;
import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.CodecFactory;
Expand All @@ -16,6 +18,7 @@
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;

public class PfbWriter implements ResultWriter {
Expand All @@ -24,6 +27,8 @@ public class PfbWriter implements ResultWriter {
public static final String DRS_URL_TABLE_PREFIX = "drs-url-";
private Logger log = LoggerFactory.getLogger(PfbWriter.class);

private final DictionaryService dictionaryService;

private final Schema metadataSchema;
private final Schema nodeSchema;

Expand All @@ -43,9 +48,10 @@ public class PfbWriter implements ResultWriter {

private static final Set<String> SINGULAR_FIELDS = Set.of("patient_id");

public PfbWriter(File tempFile, String queryId) {
public PfbWriter(File tempFile, String queryId, DictionaryService dictionaryService) {
this.file = tempFile;
this.queryId = queryId;
this.dictionaryService = dictionaryService;
this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId);
this.drsUrlTableName = formatFieldName(DRS_URL_TABLE_PREFIX + queryId);
entityFieldAssembler = SchemaBuilder.record("entity")
Expand Down Expand Up @@ -118,13 +124,23 @@ public void writeHeader(String[] data) {
}

private void writeDrsUris() {
Map<String, Concept> conceptMap = dictionaryService.getConcepts(fields).stream()
.collect(Collectors.toMap(Concept::conceptPath, Function.identity()));
GenericRecord entityRecord = new GenericData.Record(entitySchema);

for (String field : fields) {
GenericRecord drsUriData = new GenericData.Record(drsUriSchema);
drsUriData.put("concept_path", field);
// todo: lookup DRS URIs
drsUriData.put("drs_uri", List.of("https://a-drs-uri.com/"));

Concept concept = conceptMap.get(field);
List<String> drsUris = List.of();
if (concept != null) {
Map<String, String> meta = concept.meta();
if (meta != null) {
drsUris = meta.values().stream().toList();
}
}
drsUriData.put("drs_uri", drsUris);

entityRecord.put("object", drsUriData);
entityRecord.put("name", drsUrlTableName);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.io;

import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.File;
Expand All @@ -12,11 +14,15 @@
import static org.junit.jupiter.api.Assertions.*;


@ExtendWith(MockitoExtension.class)
public class PfbWriterTest {

@Mock
private DictionaryService dictionaryService;

@Test
public void writeValidPFB() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService);

pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"});
List<List<String>> nullableList = new ArrayList<>();
Expand All @@ -39,21 +45,21 @@ public void writeValidPFB() {

@Test
public void formatFieldName_spacesAndBackslashes_replacedWithUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService);
String formattedName = pfbWriter.formatFieldName("\\Topmed Study Accession with Subject ID\\\\");
assertEquals("_Topmed_Study_Accession_with_Subject_ID__", formattedName);
}

@Test
public void formatFieldName_startsWithDigit_prependUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService);
String formattedName = pfbWriter.formatFieldName("123Topmed Study Accession with Subject ID\\\\");
assertEquals("_123Topmed_Study_Accession_with_Subject_ID__", formattedName);
}

@Test
public void formatFieldName_randomGarbage_replaceWithUnderscore() {
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService);
String formattedName = pfbWriter.formatFieldName("$$$my garbage @vro var!able nam#");
assertEquals("___my_garbage__vro_var_able_nam_", formattedName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.stream.Collectors;

import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType;
import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService;
import edu.harvard.hms.dbmi.avillach.hpds.processing.io.CsvWriter;
import edu.harvard.hms.dbmi.avillach.hpds.processing.io.PfbWriter;
import edu.harvard.hms.dbmi.avillach.hpds.processing.io.ResultWriter;
Expand Down Expand Up @@ -48,6 +49,8 @@ public class QueryService {
private final CountProcessor countProcessor;
private final MultiValueQueryProcessor multiValueQueryProcessor;

private final DictionaryService dictionaryService;

HashMap<String, AsyncResult> results = new HashMap<>();


Expand All @@ -57,6 +60,7 @@ public QueryService (AbstractProcessor abstractProcessor,
TimeseriesProcessor timeseriesProcessor,
CountProcessor countProcessor,
MultiValueQueryProcessor multiValueQueryProcessor,
@Autowired(required = false) DictionaryService dictionaryService,
@Value("${SMALL_JOB_LIMIT}") Integer smallJobLimit,
@Value("${SMALL_TASK_THREADS}") Integer smallTaskThreads,
@Value("${LARGE_TASK_THREADS}") Integer largeTaskThreads) {
Expand All @@ -65,6 +69,7 @@ public QueryService (AbstractProcessor abstractProcessor,
this.timeseriesProcessor = timeseriesProcessor;
this.countProcessor = countProcessor;
this.multiValueQueryProcessor = multiValueQueryProcessor;
this.dictionaryService = dictionaryService;

SMALL_JOB_LIMIT = smallJobLimit;
SMALL_TASK_THREADS = smallTaskThreads;
Expand Down Expand Up @@ -136,7 +141,7 @@ private AsyncResult initializeResult(Query query) throws IOException {
String queryId = UUIDv5.UUIDFromString(query.toString()).toString();
ResultWriter writer;
if (ResultType.DATAFRAME_PFB.equals(query.getExpectedResultType())) {
writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId);
writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId, dictionaryService);
} else {
writer = new CsvWriter(File.createTempFile("result-" + System.nanoTime(), ".sstmp"));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/

data-export.s3.bucket-name=pic-sure-auth-dev-data-export
data-export.s3.region=us-east-1
data-export.s3.signedUrl-expiry-minutes=60
data-export.s3.signedUrl-expiry-minutes=60

dictionary.host = https://wildfly.___TARGET_STACK___/
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/

data-export.s3.bucket-name=pic-sure-auth-prod-data-export
data-export.s3.region=us-east-1
data-export.s3.signedUrl-expiry-minutes=60
data-export.s3.signedUrl-expiry-minutes=60

dictionary.host = https://wildfly.___TARGET_STACK___/

0 comments on commit 34b71d5

Please sign in to comment.