Skip to content

Commit

Permalink
Merge pull request #154 from hubmapconsortium/1.7.1
Browse files Browse the repository at this point in the history
1.7.1
  • Loading branch information
yuanzhou authored Aug 21, 2020
2 parents 7a132c8 + 7ed0bbf commit 3eaf498
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 118 deletions.
2 changes: 1 addition & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
# The commons github branch to be used during image build (default to master if not set or null)
- COMMONS_BRANCH=${COMMONS_BRANCH:-master}
# Build the image with name and tag
image: search-api:1.7
image: search-api:1.7.1
hostname: search-api
container_name: search-api
volumes:
Expand Down
2 changes: 1 addition & 1 deletion src/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.5.2.3
1.5.2.4
18 changes: 6 additions & 12 deletions src/elasticsearch/addl_index_transformations/portal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,8 @@ def transform(doc, batch_id='unspecified'):
... "organ_donor_data": [
... {
... "data_type": "Nominal",
... "grouping_code": "365873007",
... "grouping_concept_preferred_term":
... "Gender finding",
... "preferred_term": "Masculine gender"
... "grouping_concept_preferred_term": "Sex",
... "preferred_term": "Male"
... }
... ]
... }
Expand All @@ -72,14 +70,10 @@ def transform(doc, batch_id='unspecified'):
'data_types': ['codex_cytokit', 'seqFish'],
'descendant_counts': {'entity_type': {'Sample or Dataset': 1}},
'descendants': [{'entity_type': 'Sample or Dataset'}],
'donor': {'mapped_metadata': {'gender': 'Masculine gender',
'sex': 'Masculine gender'},
'donor': {'mapped_metadata': {'sex': ['Male']},
'metadata': {'organ_donor_data': [{'data_type': 'Nominal',
'grouping_code': '365873007',
'grouping_concept_preferred_term': 'Gender '
'finding',
'preferred_term': 'Masculine '
'gender'}]}},
'grouping_concept_preferred_term': 'Sex',
'preferred_term': 'Male'}]}},
'entity_type': 'dataset',
'everything': ['1',
'1234',
Expand All @@ -97,7 +91,7 @@ def transform(doc, batch_id='unspecified'):
'mapped_data_access_level': 'Consortium',
'mapped_data_types': ['CODEX [Cytokit + SPRM] / seqFISH'],
'mapped_status': 'New',
'mapper_metadata': {'size': 1218},
'mapper_metadata': {'size': 1125},
'origin_sample': {'mapped_organ': 'Lymph Node', 'organ': 'LY01'},
'status': 'New'}
Expand Down
135 changes: 31 additions & 104 deletions src/elasticsearch/addl_index_transformations/portal/translate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
import re
from datetime import datetime
from collections import defaultdict

from yaml import safe_load as load_yaml

Expand Down Expand Up @@ -223,129 +224,55 @@ def _translate_donor_metadata(doc):
>>> doc
{'metadata': 'Not a dict!', 'mapped_metadata': {}}
>>> doc = {
... "metadata": {
... "organ_donor_data": [
... {
... "data_type": "Nominal",
... "grouping_code": "365873007",
... "grouping_concept_preferred_term":
... "Gender finding",
... "preferred_term": "Masculine gender",
... },
... {
... "data_type": "Numeric",
... "data_value": "58",
... "grouping_code": "424144002",
... "grouping_concept_preferred_term":
... "Current chronological age",
... "units": "months"
... },
... {
... "data_type": "Numeric",
... "data_value": "22",
... "grouping_code": "60621009",
... "grouping_concept_preferred_term":
... "Body mass index",
... "units": "kg/m^17"
... },
... {
... "data_type": "Nominal",
... "grouping_code": "415229000",
... "preferred_term": "African race",
... }
... ]
... }
... }
>>> _translate_donor_metadata(doc)
>>> len(doc['metadata']['organ_donor_data'])
4
>>> from pprint import pprint
>>> pprint(doc['mapped_metadata'])
{'age': 4.8,
'bmi': 22.0,
'gender': 'Masculine gender',
'race': 'African race',
'sex': 'Masculine gender'}
Multi-valued fields are supported:
>>> doc = {
... "metadata": {
... "organ_donor_data": [
... {
... "data_type": "Nominal",
... "preferred_term": "Male",
... "grouping_concept": "C1522384",
... "grouping_concept_preferred_term": "Sex",
... "grouping_code": "57312000",
... }
... ]
... "organ_donor_data": [{
... "preferred_term": "Diabetes",
... "grouping_concept_preferred_term": "Medical history"
... },
... {
... "preferred_term": "Cancer",
... "grouping_concept_preferred_term": "Medical history"
... }]
... }
... }
>>> _translate_donor_metadata(doc)
>>> pprint(doc['mapped_metadata'])
{'gender': 'Male', 'sex': 'Male'}
>>> doc['mapped_metadata']
{'medical_history': ['Diabetes', 'Cancer']}
Numeric fields are turned into floats, and units are concatenated into field name:
>>> doc = {
... "metadata": {
... "organ_donor_data": [{
... "preferred_term": "Diabetes",
... "grouping_code": "UNKNOWN",
... "grouping_concept_preferred_term": "Medical history ... or anything else"
... "data_type": "Numeric",
... "data_value": "87.6",
... "grouping_concept_preferred_term": "Weight",
... "units": "kg"
... }]
... }
... }
>>> _translate_donor_metadata(doc)
>>> pprint(doc['mapped_metadata'])
{'medical_history_or_anything_else': ['Diabetes']}
>>> doc['mapped_metadata']
{'weight_in_kg': [87.6]}
'''
_map(doc, 'metadata', _donor_metadata_map)


def _donor_metadata_map(metadata):
AGE = 'age'
BMI = 'bmi'
GENDER = 'gender'
SEX = 'sex'
RACE = 'race'
# The "grouping_codes" seem to be the most stable,
# by "grouping_concepts" or "grouping_terms" could also be used.
grouping_codes = {
'60621009': BMI,
'424144002': AGE,
'365873007': GENDER,
'57312000': SEX,
'415229000': RACE
}
mapped_metadata = {}
mapped_metadata = defaultdict(list)
if isinstance(metadata, dict) and 'organ_donor_data' in metadata:
for kv in metadata['organ_donor_data']:
if not kv['grouping_code'] in grouping_codes:
# NOTE: This branch shouldn't be used on a regular basis:
# Using a grouping_code makes it more robust if the
# grouping_concept_preferred_term changes.
# TODO: I see that some of the new fields are multi-valued.
# Perhaps make all donor metadata arrays for consistency?
normed = re.sub(r'\W+', '_', kv['grouping_concept_preferred_term']).lower()
if normed in mapped_metadata:
mapped_metadata[normed].append(kv['preferred_term'])
else:
mapped_metadata[normed] = [kv['preferred_term']]
continue
k = grouping_codes[kv['grouping_code']]
if k == AGE and kv['units'] == 'months':
v = round(float(kv['data_value']) / 12, 1)
else:
v = (
kv['preferred_term']
if kv['data_type'] == 'Nominal'
else float(kv['data_value'])
)
if k == SEX:
# TODO: When the UI is caught up, only use sex.
mapped_metadata[GENDER] = v
elif k == GENDER and SEX not in mapped_metadata:
# If we still have old donor metadata, we can move the UI forward
mapped_metadata[SEX] = v
mapped_metadata[k] = v
return mapped_metadata
term = kv['grouping_concept_preferred_term'] \
+ (f' in {kv["units"]}' if 'units' in kv and len(kv['units']) else '')
key = re.sub(r'\W+', '_', term).lower()
value = (
float(kv['data_value'])
if 'data_type' in kv and kv['data_type'] == 'Numeric'
else kv['preferred_term']
)
mapped_metadata[key].append(value)
return dict(mapped_metadata)

0 comments on commit 3eaf498

Please sign in to comment.