From 99fe6e8fce3a9ef03fc4f7a477f16d3f00ad9129 Mon Sep 17 00:00:00 2001 From: John Conroy Date: Mon, 4 Nov 2024 11:14:57 -0500 Subject: [PATCH 1/4] Add soft assaytype and get pipeline from api --- .../addl_index_transformations/portal/add_assay_details.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py index e0b3b76f..e9e3fadf 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py @@ -137,8 +137,13 @@ def add_assay_details(doc, transformation_resources): doc['raw_dataset_type'] = re.sub( "\\[(.*?)\\]", '', doc.get('dataset_type', '')).rstrip() - if pipeline := re.search("(?<=\\[)[^][]*(?=])", doc.get('dataset_type', '')): + if pipeline := assay_details.get('pipeline-shorthand'): + doc['pipeline'] = pipeline + elif pipeline := re.search("(?<=\\[)[^][]*(?=])", doc.get('dataset_type', '')): doc['pipeline'] = pipeline.group() + + if soft_assaytype := assay_details.get('assaytype'): + doc['soft_assaytype'] = soft_assaytype # Preserve the previous shape of mapped_data_types. doc['assay_display_name'] = [assay_details.get('description')] # Remove once the portal-ui has transitioned to use assay_display_name. From 0ccb3f6b6c70cc77f7fa5d751bd50094fcaeb586 Mon Sep 17 00:00:00 2001 From: John Conroy Date: Mon, 4 Nov 2024 11:15:03 -0500 Subject: [PATCH 2/4] Update tests --- .../portal/tests/test_assay_details.py | 6 +++++- .../portal/tests/test_transform.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py index 3369509e..d35524e3 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py @@ -64,7 +64,8 @@ def test_raw_dataset_type(mocker): 'entity_type': 'Dataset', 'assay_modality': 'single', 'creation_action': 'Create Dataset Activity', - 'processing': 'raw' + 'processing': 'raw', + 'soft_assaytype': 'sciRNAseq' } add_assay_details(input_raw_doc, transformation_resources) assert input_raw_doc == expected_raw_output_doc @@ -74,6 +75,7 @@ def mock_processed_soft_assay(uuid=None, headers=None): return mock_response({ "assaytype": "salmon_rnaseq_sciseq", "contains-pii": True, + "pipeline-shorthand": "Salmon", "description": "sciRNA-seq [Salmon]", "primary": False, "vitessce-hints": [ @@ -106,6 +108,7 @@ def test_processed_dataset_type(mocker): 'processing': 'processed', 'processing_type': 'hubmap', 'uuid': '22684b9011fc5aea5cb3f89670a461e8', + 'soft_assaytype': 'salmon_rnaseq_sciseq', 'vitessce-hints': [ "is_sc", "rna" @@ -233,6 +236,7 @@ def test_transform_image_pyramid(mocker): 'uuid': '69c70762689b20308bb049ac49653342', 'vitessce-hints': [], 'visualization': True, + "soft_assaytype": "PAS", 'entity_type': 'Dataset', } diff --git a/src/hubmap_translation/addl_index_transformations/portal/tests/test_transform.py b/src/hubmap_translation/addl_index_transformations/portal/tests/test_transform.py index bcffd5af..77ecdf12 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/tests/test_transform.py +++ b/src/hubmap_translation/addl_index_transformations/portal/tests/test_transform.py @@ -115,6 +115,7 @@ '["http://purl.obolibrary.org/obo/UBERON_0001157"]}', 'status': 'New', 'uuid': '69c70762689b20308bb049ac49653342', + 'soft_assaytype': 'salmon_rnaseq_10x', 'vitessce-hints': ['is_sc', 'rna'], 'visualization': True, } @@ -137,6 +138,7 @@ def raise_for_status(self): def mock_soft_assay(uuid=None, headers=None): return mock_response({'assaytype': 'salmon_rnaseq_10x', 'contains-pii': False, + 'pipeline-shorthand': 'Salmon', 'description': 'scRNA-seq (10x Genomics) [Salmon]', 'primary': False, 'vitessce-hints': ['is_sc', 'rna']}) From 8861bbade1c55722ea68067285a71c66e47913b9 Mon Sep 17 00:00:00 2001 From: John Conroy Date: Mon, 4 Nov 2024 13:22:54 -0500 Subject: [PATCH 3/4] Update tests for image pyramids --- .../portal/add_assay_details.py | 22 +++++++--- .../portal/tests/test_assay_details.py | 40 ++++++++++++++++++- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py index e9e3fadf..3b54d748 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py @@ -130,6 +130,20 @@ def _get_descendants(doc, transformation_resources): raise +def _add_pipeline(doc, assay_details): + if pipeline := assay_details.get('pipeline-shorthand'): + doc['pipeline'] = pipeline + # pipeline-shorthand is not returned for EPICs. + elif doc.get('processing_type') == 'external': + doc[pipeline] = assay_details.get('description') + # pipeline-shorthand is not returned for Image Pyramids. + elif set(['pyramid', 'is_image']).issubset(set(assay_details.get('vitessce-hints'))): + doc['pipeline'] = 'Image Pyramid' + # Fallback to get pipeline in the dataset_type's brackets. + elif pipeline := re.search("(?<=\\[)[^][]*(?=])", doc.get('dataset_type', '')): + doc['pipeline'] = pipeline.group() + + def add_assay_details(doc, transformation_resources): if 'dataset_type' in doc: assay_details = _get_assay_details(doc, transformation_resources) @@ -137,10 +151,8 @@ def add_assay_details(doc, transformation_resources): doc['raw_dataset_type'] = re.sub( "\\[(.*?)\\]", '', doc.get('dataset_type', '')).rstrip() - if pipeline := assay_details.get('pipeline-shorthand'): - doc['pipeline'] = pipeline - elif pipeline := re.search("(?<=\\[)[^][]*(?=])", doc.get('dataset_type', '')): - doc['pipeline'] = pipeline.group() + _add_dataset_categories(doc, assay_details) + _add_pipeline(doc, assay_details) if soft_assaytype := assay_details.get('assaytype'): doc['soft_assaytype'] = soft_assaytype @@ -150,8 +162,6 @@ def add_assay_details(doc, transformation_resources): doc['mapped_data_types'] = [assay_details.get('description')] doc['vitessce-hints'] = assay_details.get('vitessce-hints') - _add_dataset_categories(doc, assay_details) - error_msg = assay_details.get('error') if error_msg: _log_transformation_error(doc, error_msg) diff --git a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py index d35524e3..6e438b3d 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py @@ -206,7 +206,7 @@ def mock_image_pyramid_support(uuid=None, headers=None): }) -def test_transform_image_pyramid(mocker): +def test_transform_image_pyramid_parent(mocker): mocker.patch('requests.get', side_effect=[ # initial request to has_visualization with parent entity mock_image_pyramid_parent(), @@ -244,6 +244,44 @@ def test_transform_image_pyramid(mocker): assert image_pyramid_input_doc == image_pyramid_output_doc +def test_transform_image_pyramid_support(mocker): + mocker.patch('requests.get', side_effect=[ + mock_image_pyramid_support(), + mock_empty_descendants(), + ]) + image_pyramid_input_doc = { + 'uuid': '0bf9cb40adebcfb261dfbe9244607508', + 'dataset_type': 'Histology [Image Pyramid]', + 'entity_type': 'Dataset', + 'creation_action': 'Central Process' + } + + image_pyramid_output_doc = { + 'assay_display_name': ['Image Pyramid'], + 'assay_modality': 'single', + 'creation_action': 'Central Process', + 'dataset_type': 'Histology [Image Pyramid]', + 'mapped_data_types': ['Image Pyramid'], + "processing": "processed", + 'raw_dataset_type': 'Histology', + 'uuid': '0bf9cb40adebcfb261dfbe9244607508', + 'pipeline': 'Image Pyramid', + 'processing_type': 'hubmap', + 'vitessce-hints': [ + "is_image", + "is_support", + "pyramid", + + ], + 'visualization': False, + "soft_assaytype": "image_pyramid", + 'entity_type': 'Dataset', + } + + add_assay_details(image_pyramid_input_doc, transformation_resources) + assert image_pyramid_input_doc == image_pyramid_output_doc + + def test_hubmap_processing(): hubmap_processed_input_doc = { 'creation_action': 'Central Process', From abbad8c0e4cd204da895227a61009fe5637358c8 Mon Sep 17 00:00:00 2001 From: John Conroy Date: Mon, 4 Nov 2024 13:41:19 -0500 Subject: [PATCH 4/4] Add epic test --- .../portal/add_assay_details.py | 2 +- .../portal/tests/test_assay_details.py | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py index 3b54d748..7b2ef8ba 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/add_assay_details.py @@ -135,7 +135,7 @@ def _add_pipeline(doc, assay_details): doc['pipeline'] = pipeline # pipeline-shorthand is not returned for EPICs. elif doc.get('processing_type') == 'external': - doc[pipeline] = assay_details.get('description') + doc['pipeline'] = assay_details.get('description') # pipeline-shorthand is not returned for Image Pyramids. elif set(['pyramid', 'is_image']).issubset(set(assay_details.get('vitessce-hints'))): doc['pipeline'] = 'Image Pyramid' diff --git a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py index 6e438b3d..cdf664d8 100644 --- a/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py +++ b/src/hubmap_translation/addl_index_transformations/portal/tests/test_assay_details.py @@ -282,6 +282,58 @@ def test_transform_image_pyramid_support(mocker): assert image_pyramid_input_doc == image_pyramid_output_doc +def mock_epic(uuid=None, headers=None): + return mock_response({ + "assaytype": None, + "description": "Segmentation Mask", + "is-multi-assay": False, + "pipeline-shorthand": "", + "primary": False, + "vitessce-hints": [ + "segmentation_mask", + "is_image", + "pyramid" + ] + }) + + +def test_transform_epic(mocker): + mocker.patch('requests.get', side_effect=[ + mock_epic(), + mock_empty_descendants(), + ]) + epic_input_doc = { + 'uuid': 'abc123', + 'dataset_type': 'Segmentation Mask', + 'entity_type': 'Dataset', + 'creation_action': 'External Process' + } + + epic_output_doc = { + 'assay_display_name': ['Segmentation Mask'], + 'assay_modality': 'single', + 'creation_action': 'External Process', + 'dataset_type': 'Segmentation Mask', + 'mapped_data_types': ['Segmentation Mask'], + "processing": "processed", + 'raw_dataset_type': 'Segmentation Mask', + 'uuid': 'abc123', + 'pipeline': 'Segmentation Mask', + 'processing_type': 'external', + 'vitessce-hints': [ + "segmentation_mask", + "is_image", + "pyramid", + + ], + 'visualization': False, + 'entity_type': 'Dataset', + } + + add_assay_details(epic_input_doc, transformation_resources) + assert epic_input_doc == epic_output_doc + + def test_hubmap_processing(): hubmap_processed_input_doc = { 'creation_action': 'Central Process',