Skip to content

Commit

Permalink
Merge pull request #426 from hubmapconsortium/test-release
Browse files Browse the repository at this point in the history
v2.2.2 release
  • Loading branch information
yuanzhou authored Jan 10, 2022
2 parents 8c0fac1 + 82f58dc commit 3117d79
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 13 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,7 @@ __pycache__/

Pipfile
Pipfile.lock

#Eclipse project files
.project
.pydevproject
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.2.1
2.2.2
35 changes: 30 additions & 5 deletions src/elasticsearch/addl_index_transformations/portal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ def transform(doc, batch_id='unspecified'):
... 'metadata_path': 'No!',
... 'tissue_id': 'No!',
... 'donor_id': 'No!',
... 'cell_barcode_size': '123',
... 'should_be_int': '123',
... 'should_be_float': '123.456',
... 'keep_this_field': 'Yes!',
... 'is_boolean': '1'
... }
Expand Down Expand Up @@ -115,13 +118,18 @@ def transform(doc, batch_id='unspecified'):
'preferred_term': 'Male'}]}},
'entity_type': 'dataset',
'group_name': 'EXT - Outside HuBMAP',
'mapped_consortium': 'Outside HuBMAP',
'mapped_create_timestamp': '2019-12-04 19:58:29',
'mapped_data_access_level': 'Consortium',
'mapped_data_types': ['snRNA-seq [Salmon]'],
'mapped_external_group_name': 'Outside HuBMAP',
'mapped_metadata': {},
'mapped_status': 'New',
'metadata': {'metadata': {'is_boolean': 'TRUE', 'keep_this_field': 'Yes!'}},
'metadata': {'metadata': {'cell_barcode_size': '123',
'is_boolean': 'TRUE',
'keep_this_field': 'Yes!',
'should_be_float': 123.456,
'should_be_int': 123}},
'origin_sample': {'mapped_organ': 'Lymph Node', 'organ': 'LY'},
'rui_location': '{"ccf_annotations": '
'["http://purl.obolibrary.org/obo/UBERON_0001157"]}',
Expand Down Expand Up @@ -200,16 +208,23 @@ def _simple_clean(doc):
metadata = doc['metadata']['metadata']

bad_fields = [
'collectiontype', # Inserted by IEC.
'collectiontype', 'null', # Inserted by IEC.
'data_path', 'metadata_path', 'version', # Only meaningful at submission time.
'donor_id', 'tissue_id' # For internal use only.
]

# Ideally, we'd pull from https://github.com/hubmapconsortium/ingest-validation-tools/blob/main/docs/field-types.yaml
# here, or make the TSV parsing upstream schema aware,
# instead of trying to guess, but I think the number of special cases will be relatively small.
not_really_a_number = ['cell_barcode_size', 'cell_barcode_offset']

# Explicitly convert items to list,
# so we can remove keys from the metadata dict:
for k, v in list(metadata.items()):
if k in bad_fields or k.startswith('_'):
del metadata[k]
continue

# Normalize booleans to all-caps, the Excel default.
# (There is no guaratee that boolean fields with be prefixed this way,
# but at the moment it is the case.)
Expand All @@ -218,9 +233,19 @@ def _simple_clean(doc):
metadata[k] = 'FALSE'
if v in ['1', 'true', 'True']:
metadata[k] = 'TRUE'
# Other converstions are handled by ES numeric detection.
# See: portal/config.yaml
# https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic-field-mapping.html
continue

if k not in not_really_a_number:
try:
as_number = int(v)
except ValueError:
try:
as_number = float(v)
except ValueError:
as_number = None
if as_number is not None:
metadata[k] = as_number


# TODO: Reenable this when we have time, and can make sure we don't need these fields.
#
Expand Down
19 changes: 13 additions & 6 deletions src/elasticsearch/addl_index_transformations/portal/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,19 +100,26 @@ def _access_level_map(access_level):

def _translate_external_consortium(doc):
'''
>>> doc = {}
>>> _translate_external_consortium(doc); doc
{'mapped_consortium': 'HuBMAP'}
>>> doc = {'group_name': 'Inside HuBMAP'}
>>> _translate_external_consortium(doc); doc
{'group_name': 'Inside HuBMAP'}
{'group_name': 'Inside HuBMAP', 'mapped_consortium': 'HuBMAP'}
>>> doc = {'group_name': 'EXT - Outside HuBMAP'}
>>> _translate_external_consortium(doc); doc
{'group_name': 'EXT - Outside HuBMAP', 'mapped_external_group_name': 'Outside HuBMAP'}
{'group_name': 'EXT - Outside HuBMAP', 'mapped_external_group_name': 'Outside HuBMAP', 'mapped_consortium': 'Outside HuBMAP'}
'''
group_name = doc.get('group_name')
if group_name is None:
return
if 'EXT' in group_name:
doc['mapped_external_group_name'] = group_name.replace('EXT - ', '')
if group_name is not None and 'EXT' in group_name:
mapped_consortium = group_name.replace('EXT - ', '')
doc['mapped_external_group_name'] = mapped_consortium
else:
mapped_consortium = 'HuBMAP'
doc['mapped_consortium'] = mapped_consortium


# Timestamp:
Expand Down
1 change: 0 additions & 1 deletion src/search-schema/data/definitions/enums/assay_types.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ PAS_pyramid:
bulk-RNA:
description: Bulk RNA-seq
alt-names: ['bulk RNA']
alt-names: []
primary: true
contains-pii: true
vitessce-hints: []
Expand Down
4 changes: 4 additions & 0 deletions src/search-schema/data/definitions/enums/organ_types.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ LK:
LL:
description: Lung (Left)
iri: http://purl.obolibrary.org/obo/UBERON_0002168
LN:
description: Knee (Left)
LV:
description: Liver
LY:
Expand All @@ -65,6 +67,8 @@ RK:
RL:
description: Lung (Right)
iri: http://purl.obolibrary.org/obo/UBERON_0002167
RN:
description: Knee (Right)
SI:
description: Small Intestine
SK:
Expand Down

0 comments on commit 3117d79

Please sign in to comment.