From 1588ae2f52cb67ee44d82a5487486241f89cd99e Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Wed, 21 Aug 2024 15:09:30 -0400 Subject: [PATCH 1/6] Include immediate ancestor and descendant IDs in index --- src/hubmap_translator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hubmap_translator.py b/src/hubmap_translator.py index 37445dd2..7d48fafb 100644 --- a/src/hubmap_translator.py +++ b/src/hubmap_translator.py @@ -1355,6 +1355,9 @@ def generate_doc(self, entity, return_type): entity['immediate_ancestors'] = immediate_ancestors entity['immediate_descendants'] = immediate_descendants + entity['immediate_ancestor_ids'] = immediate_ancestor_ids + entity['immediate_descendant_ids'] = immediate_descendant_ids + # The `sample_category` is "organ" and the `organ` code is set at the same time if entity['entity_type'] in ['Sample', 'Dataset', 'Publication']: # Add new properties From 635fcef67459d1ea3ccc2aee5e236325e01134c1 Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Thu, 22 Aug 2024 12:53:59 -0400 Subject: [PATCH 2/6] throw in the `is_spatial` logic --- src/hubmap_translator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/hubmap_translator.py b/src/hubmap_translator.py index 7d48fafb..15619762 100644 --- a/src/hubmap_translator.py +++ b/src/hubmap_translator.py @@ -1358,6 +1358,11 @@ def generate_doc(self, entity, return_type): entity['immediate_ancestor_ids'] = immediate_ancestor_ids entity['immediate_descendant_ids'] = immediate_descendant_ids + if (entity['entity_type'] == 'Sample'): + entity['is_spatial'] = entity.get('rui_location', None) is not None + if (entity['entity_type'] == 'Dataset'): + entity['is_spatial'] = any([a.get('rui_location', None) is not None for a in ancestors]) + # The `sample_category` is "organ" and the `organ` code is set at the same time if entity['entity_type'] in ['Sample', 'Dataset', 'Publication']: # Add new properties From b40271582ff54c48073f691e39de14ca9add2a08 Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Fri, 23 Aug 2024 11:04:37 -0400 Subject: [PATCH 3/6] move spatial info addition to portal transformations --- .../portal/translate.py | 30 +++++++++++++++++++ src/hubmap_translator.py | 5 ---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/translate.py b/src/hubmap_translation/addl_index_transformations/portal/translate.py index e1f67328..267981b3 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/translate.py +++ b/src/hubmap_translation/addl_index_transformations/portal/translate.py @@ -23,6 +23,7 @@ def translate(doc, organ_map): _translate_timestamp(doc) _translate_access_level(doc) _translate_external_consortium(doc) + _add_spatial_info(doc) def _map(doc, key, map): @@ -301,3 +302,32 @@ def _add_origin_samples_unique_mapped_organs(doc): if doc['entity_type'] in ['Sample', 'Dataset'] and 'origin_samples' in doc: doc['origin_samples_unique_mapped_organs'] = _get_unique_mapped_organs( doc['origin_samples']) + +def _add_spatial_info(doc): + ''' + Add a boolean field "is_spatial" to the document based on the entity type and the presence of an rui_location field. + + For samples, the is_spatial field is set to True if the rui_location field is present. + >>> doc = {'entity_type': 'Sample', 'rui_location': 'https://example.com'} + >>> _add_spatial_info(doc) + >>> doc['is_spatial'] + True + + For datasets, the is_spatial field is set to True if any ancestor has an rui_location field. + The rui_location field is also copied from the nearest ancestor with an rui_location field. + >>> doc = {'entity_type': 'Dataset', 'ancestors': [{'rui_location': 'https://example.com'}, {'rui_location': 'https://example2.com'}]} + >>> _add_spatial_info(doc) + >>> doc['is_spatial'] + True + >>> doc['rui_location'] + 'https://example2.com' + ''' + if (doc['entity_type'] == 'Sample'): + doc['is_spatial'] = doc.get('rui_location', None) is not None + if (doc['entity_type'] == 'Dataset'): + ancestors = doc.get('ancestors', []) + # Find the nearest ancestor with an rui_location - the last one in the list with an rui_location field. + nearest_rui_location_ancestor = ancestors[::-1].find(lambda x: x.get('rui_location', None) is not None) + if nearest_rui_location_ancestor is not None: + doc['is_spatial'] = nearest_rui_location_ancestor is not None + doc['rui_location'] = nearest_rui_location_ancestor['rui_location'] \ No newline at end of file diff --git a/src/hubmap_translator.py b/src/hubmap_translator.py index 15619762..7d48fafb 100644 --- a/src/hubmap_translator.py +++ b/src/hubmap_translator.py @@ -1358,11 +1358,6 @@ def generate_doc(self, entity, return_type): entity['immediate_ancestor_ids'] = immediate_ancestor_ids entity['immediate_descendant_ids'] = immediate_descendant_ids - if (entity['entity_type'] == 'Sample'): - entity['is_spatial'] = entity.get('rui_location', None) is not None - if (entity['entity_type'] == 'Dataset'): - entity['is_spatial'] = any([a.get('rui_location', None) is not None for a in ancestors]) - # The `sample_category` is "organ" and the `organ` code is set at the same time if entity['entity_type'] in ['Sample', 'Dataset', 'Publication']: # Add new properties From 6da78790a889cfe8da67d2190419fd16cb09a348 Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Fri, 23 Aug 2024 12:02:26 -0400 Subject: [PATCH 4/6] only `flake8` the translate.py code --- .../addl_index_transformations/portal/translate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/translate.py b/src/hubmap_translation/addl_index_transformations/portal/translate.py index 267981b3..9df65629 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/translate.py +++ b/src/hubmap_translation/addl_index_transformations/portal/translate.py @@ -303,6 +303,7 @@ def _add_origin_samples_unique_mapped_organs(doc): doc['origin_samples_unique_mapped_organs'] = _get_unique_mapped_organs( doc['origin_samples']) + def _add_spatial_info(doc): ''' Add a boolean field "is_spatial" to the document based on the entity type and the presence of an rui_location field. @@ -330,4 +331,4 @@ def _add_spatial_info(doc): nearest_rui_location_ancestor = ancestors[::-1].find(lambda x: x.get('rui_location', None) is not None) if nearest_rui_location_ancestor is not None: doc['is_spatial'] = nearest_rui_location_ancestor is not None - doc['rui_location'] = nearest_rui_location_ancestor['rui_location'] \ No newline at end of file + doc['rui_location'] = nearest_rui_location_ancestor['rui_location'] From b3084cfd1fa83dd6e32454716b4613b2f7a4f8ff Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Fri, 23 Aug 2024 12:34:50 -0400 Subject: [PATCH 5/6] fix approach for nearest rui location --- .../addl_index_transformations/portal/translate.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/translate.py b/src/hubmap_translation/addl_index_transformations/portal/translate.py index 9df65629..5dc93aad 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/translate.py +++ b/src/hubmap_translation/addl_index_transformations/portal/translate.py @@ -310,17 +310,14 @@ def _add_spatial_info(doc): For samples, the is_spatial field is set to True if the rui_location field is present. >>> doc = {'entity_type': 'Sample', 'rui_location': 'https://example.com'} - >>> _add_spatial_info(doc) - >>> doc['is_spatial'] + >>> _add_spatial_info(doc); doc['is_spatial'] True For datasets, the is_spatial field is set to True if any ancestor has an rui_location field. The rui_location field is also copied from the nearest ancestor with an rui_location field. >>> doc = {'entity_type': 'Dataset', 'ancestors': [{'rui_location': 'https://example.com'}, {'rui_location': 'https://example2.com'}]} - >>> _add_spatial_info(doc) - >>> doc['is_spatial'] + >>> _add_spatial_info(doc); doc['is_spatial']; doc['rui_location'] True - >>> doc['rui_location'] 'https://example2.com' ''' if (doc['entity_type'] == 'Sample'): @@ -328,7 +325,8 @@ def _add_spatial_info(doc): if (doc['entity_type'] == 'Dataset'): ancestors = doc.get('ancestors', []) # Find the nearest ancestor with an rui_location - the last one in the list with an rui_location field. - nearest_rui_location_ancestor = ancestors[::-1].find(lambda x: x.get('rui_location', None) is not None) + nearest_rui_location_ancestor = next( + (ancestor for ancestor in reversed(ancestors) if 'rui_location' in ancestor), None) if nearest_rui_location_ancestor is not None: doc['is_spatial'] = nearest_rui_location_ancestor is not None doc['rui_location'] = nearest_rui_location_ancestor['rui_location'] From 0432e8b9fe76c180f846c8f2eda9d58bb3a37aba Mon Sep 17 00:00:00 2001 From: Nikolay Akhmetov Date: Mon, 26 Aug 2024 12:14:02 -0400 Subject: [PATCH 6/6] add `immediate_ancestor_ids` and `immediate_descendant_ids` to list of properties added at index runtime --- src/hubmap_translator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hubmap_translator.py b/src/hubmap_translator.py index ef9e7ca6..dd79dd8e 100644 --- a/src/hubmap_translator.py +++ b/src/hubmap_translator.py @@ -39,6 +39,8 @@ 'datasets', 'immediate_ancestors', 'immediate_descendants' + 'immediate_ancestor_ids', + 'immediate_descendant_ids' ] # A map keyed by entity attribute names stored in Neo4j and retrieved from entity-api, with