From 56e48097e9c1548db7e7c01757017ec097bbb0e8 Mon Sep 17 00:00:00 2001 From: Ryder Cobean Date: Tue, 17 Sep 2024 14:36:42 -0700 Subject: [PATCH 1/9] add keep_all_tags parameter defaults to FALSE for continuity with existing uses of the module. Enables user to bypass the default stripping of unsupported tags for use in applications in which other services perform anonymization. --- rap_sitkcore/read_dcm.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 860e526..5d1bfa3 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -84,7 +84,7 @@ def _read_dcm_sitk(filename: Path) -> sitk.Image: return image_file_reader.Execute() -def read_dcm(filename: Path) -> sitk.Image: +def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: """ Read an x-ray DICOM file with GDCMImageIO, reducing it to 2D from 3D as needed. If the file cannot be read by the GDCM library, then pydicom is tried. @@ -122,18 +122,20 @@ def read_dcm(filename: Path) -> sitk.Image: img.SetDirection([1.0, 0.0, 0.0, 1.0]) if img.GetNumberOfComponentsPerPixel() == 1: - old_keys = img.GetMetaDataKeys() - key_to_keep = [keyword_to_gdcm_tag(n) for n in _keyword_to_copy] - for k in old_keys: - if k not in key_to_keep: - del img[k] + if not keep_all_tags: + old_keys = img.GetMetaDataKeys() + key_to_keep = [keyword_to_gdcm_tag(n) for n in _keyword_to_copy] + for k in old_keys: + if k not in key_to_keep: + del img[k] return img elif img.GetNumberOfComponentsPerPixel() == 3: out = srgb2gray(img) # copy tags - for tag_name in _keyword_to_copy: - key = keyword_to_gdcm_tag(tag_name) - if key in img: - out[key] = img[key] + if not keep_all_tags: + for tag_name in _keyword_to_copy: + key = keyword_to_gdcm_tag(tag_name) + if key in img: + out[key] = img[key] return out raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}") From 5bc957a667be1aac9466c93cd89ee1f0967c4e2f Mon Sep 17 00:00:00 2001 From: Ryder Cobean Date: Tue, 17 Sep 2024 14:39:49 -0700 Subject: [PATCH 2/9] fix line breaks --- rap_sitkcore/read_dcm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 5d1bfa3..f7e229d 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -128,6 +128,7 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: for k in old_keys: if k not in key_to_keep: del img[k] + return img elif img.GetNumberOfComponentsPerPixel() == 3: out = srgb2gray(img) @@ -137,5 +138,6 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: key = keyword_to_gdcm_tag(tag_name) if key in img: out[key] = img[key] + return out raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}") From 6ba7ce0dc3432027095daecd0591993c9dcb6d17 Mon Sep 17 00:00:00 2001 From: Ryder Cobean Date: Thu, 26 Sep 2024 17:51:09 -0700 Subject: [PATCH 3/9] changes to _read_dcm_pydicom to support keep_all_tags --- rap_sitkcore/read_dcm.py | 68 +++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index f7e229d..25ba25d 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -19,7 +19,7 @@ ] -def _read_dcm_pydicom(filename: Path) -> sitk.Image: +def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image: """ Reading implementation with pydicom for DICOM """ @@ -37,30 +37,59 @@ def _read_dcm_pydicom(filename: Path) -> sitk.Image: elif ds.PhotometricInterpretation in ["YBR_FULL_422", "YBR_FULL", "RGB"]: if ds.PhotometricInterpretation != "RGB": from pydicom.pixel_data_handlers.util import convert_color_space - arr = convert_color_space(ds.pixel_array, ds.PhotometricInterpretation, "RGB") img = sitk.GetImageFromArray(arr, isVector=True) else: raise RuntimeError(f'Unsupported PhotometricInterpretation: "{ds.PhotometricInterpretation}"') - for tag in _keyword_to_copy: - if tag in ds: - de = ds.data_element(tag) - key = f"{de.tag.group:04x}|{de.tag.elem:04x}" - if de.value is None: - img[key] = "" - elif de.VR == "DS": - if de.VM > 1: - img[key] = convert_float_list_to_mv_ds(de.value) + if keep_all_tags: + # if keep all tags is true, we copy all tags from the pydicom dataset to the SimpleITK image. + try: + ds = pydicom.dcmread(filename, stop_before_pixels=True) + + for data_element in ds: + key_sitk = f"({data_element.tag.group:04x}|{data_element.tag.elem:04x})" + name = data_element.name + value = data_element.value + VR = data_element.VR + VM = data_element.VM + + if value in [None, ""]: + img[key_sitk] = "" + elif VR == "DS": + if VM > 1: + img[key_sitk] = convert_float_list_to_mv_ds(value) + else: + img[key_sitk] = str(float(value)) + elif VR == "US": + img[key_sitk] = str(int(value)) else: - img[key] = str(float(de.value)) - elif de.VR in ["CS", "UI"]: - img[key] = de.value - else: - raise ValueError( - f'"{filename}" has data element "{de.name}" non-conforming value representation "{de.VR}".' - ) + img[key_sitk] = value + except TypeError as te: + print(f'"{filename}" had an error parsing at "{name}" with value "{value}" and value representation "{VR}". Error: {te}') + except ValueError as ve: + print(f'"{filename}" had an error parsing at "{name}" with value "{value}" and value representation "{VR}". Error: {ve}') + + + else: + for tag in _keyword_to_copy: + if tag in ds: + de = ds.data_element(tag) + key = f"{de.tag.group:04x}|{de.tag.elem:04x}" + if de.value is None: + img[key] = "" + elif de.VR == "DS": + if de.VM > 1: + img[key] = convert_float_list_to_mv_ds(de.value) + else: + img[key] = str(float(de.value)) + elif de.VR in ["CS", "UI"]: + img[key] = de.value + else: + raise ValueError( + f'"{filename}" has data element "{de.name}" non-conforming value representation "{de.VR}".' + ) return img @@ -102,6 +131,9 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: * "ViewPosition" * "PatientSex" + This can be overridden as needed by setting `keep_all_tags` to True. In this case, + all tags are copied. + :param filename: A DICOM filename :returns: a 2D SimpleITK Image """ From baebf6db474a5182e65474557a0bd73674795868 Mon Sep 17 00:00:00 2001 From: Ryder Cobean Date: Fri, 27 Sep 2024 14:28:47 -0700 Subject: [PATCH 4/9] simplify control flow in _read_dcm_pydicom introduce function _get_string_representation() which returns the string representation of a data element with proper error handling, and simplify definition of the _keyword_to_copy list based on whether keep_all_tags is True --- rap_sitkcore/read_dcm.py | 88 +++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 25ba25d..5abeb2e 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -19,6 +19,35 @@ ] +def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: + """ + Get the string representation of the DICOM tag. + + Parameters: + de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata). + + Returns: + str: The string representation of the DICOM tag. + """ + try: + if de.value in [None, ""]: + return "" + elif de.VR == "DS": + if de.VM > 1: + return convert_float_list_to_mv_ds(de.value) + else: + return str(float(de.value)) + elif de.VR == "US": + return str(int(de.value)) + else: + return de.value + except (TypeError, ValueError) as e: + raise RuntimeError( + f'"Error parsing data element "{de.name}" with value "{de.value}" ' + f'and value representation "{de.VR}". Error: {e}' + ) + + def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image: """ Reading implementation with pydicom for DICOM @@ -43,53 +72,20 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image else: raise RuntimeError(f'Unsupported PhotometricInterpretation: "{ds.PhotometricInterpretation}"') + # keep_all_tags is either all tags other than PixelData or the tags specified in + # _keyword_to_copy, provided they are present in the dataset if keep_all_tags: - # if keep all tags is true, we copy all tags from the pydicom dataset to the SimpleITK image. - try: - ds = pydicom.dcmread(filename, stop_before_pixels=True) - - for data_element in ds: - key_sitk = f"({data_element.tag.group:04x}|{data_element.tag.elem:04x})" - name = data_element.name - value = data_element.value - VR = data_element.VR - VM = data_element.VM - - if value in [None, ""]: - img[key_sitk] = "" - elif VR == "DS": - if VM > 1: - img[key_sitk] = convert_float_list_to_mv_ds(value) - else: - img[key_sitk] = str(float(value)) - elif VR == "US": - img[key_sitk] = str(int(value)) - else: - img[key_sitk] = value - except TypeError as te: - print(f'"{filename}" had an error parsing at "{name}" with value "{value}" and value representation "{VR}". Error: {te}') - except ValueError as ve: - print(f'"{filename}" had an error parsing at "{name}" with value "{value}" and value representation "{VR}". Error: {ve}') - - - else: - for tag in _keyword_to_copy: - if tag in ds: - de = ds.data_element(tag) - key = f"{de.tag.group:04x}|{de.tag.elem:04x}" - if de.value is None: - img[key] = "" - elif de.VR == "DS": - if de.VM > 1: - img[key] = convert_float_list_to_mv_ds(de.value) - else: - img[key] = str(float(de.value)) - elif de.VR in ["CS", "UI"]: - img[key] = de.value - else: - raise ValueError( - f'"{filename}" has data element "{de.name}" non-conforming value representation "{de.VR}".' - ) + _keyword_to_copy = [elem.keyword for elem in ds if elem.keyword != "PixelData"] + else: + _keyword_to_copy = [keyword for keyword in _keyword_to_copy if keyword in ds] + + + # iterate through all tags and copy the ones specified in _keyword_to_copy + # to the SimpleITK image + for tag in _keyword_to_copy: + de = ds.data_element(tag) + key = f"{de.tag.group:04x}|{de.tag.elem:04x}" + img[key] = _get_string_representation(de) return img From 6b129c64107c9b40919ac7ebede4ad524e4cbe03 Mon Sep 17 00:00:00 2001 From: Ryder Cobean Date: Mon, 30 Sep 2024 17:59:56 -0700 Subject: [PATCH 5/9] definition of the `_keyword_to_copy` list has been simplified based on whether the `keep_all_tags` parameter is True. Remove overwrite of _keyword_to_copy, Also make sure to copy out keys in case that image is RGB and keep_all_tags is True --- rap_sitkcore/read_dcm.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 5abeb2e..477d0ab 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -75,14 +75,15 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image # keep_all_tags is either all tags other than PixelData or the tags specified in # _keyword_to_copy, provided they are present in the dataset if keep_all_tags: - _keyword_to_copy = [elem.keyword for elem in ds if elem.keyword != "PixelData"] + _output_dicom_keywords = [elem.keyword for elem in ds if elem.keyword != "PixelData"] else: - _keyword_to_copy = [keyword for keyword in _keyword_to_copy if keyword in ds] - + _output_dicom_keywords = [keyword for keyword in _keyword_to_copy if keyword in ds] # iterate through all tags and copy the ones specified in _keyword_to_copy # to the SimpleITK image - for tag in _keyword_to_copy: + + + for tag in _output_dicom_keywords: de = ds.data_element(tag) key = f"{de.tag.group:04x}|{de.tag.elem:04x}" img[key] = _get_string_representation(de) @@ -158,10 +159,15 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: del img[k] return img + elif img.GetNumberOfComponentsPerPixel() == 3: out = srgb2gray(img) # copy tags - if not keep_all_tags: + if keep_all_tags: + old_keys = img.GetMetaDataKeys() + for k in old_keys: + out[k] = img[k] + else: for tag_name in _keyword_to_copy: key = keyword_to_gdcm_tag(tag_name) if key in img: From 7f7ade79909e4a61f7d217ff5174bfbbd8a92c2c Mon Sep 17 00:00:00 2001 From: Ryder Cobean Date: Tue, 1 Oct 2024 23:17:15 -0700 Subject: [PATCH 6/9] in _read_dcm_pydicom, copy tags from the data element itself rather than keywords (resolves edge case in which a keyword is missing in the image with the tag present). Resolved issue of reusing variable. When keep_all_tags is true, the tags still need to be copied from img to out when calling srgb2gray. This is now resolved. --- rap_sitkcore/read_dcm.py | 72 +++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 477d0ab..40631be 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -20,32 +20,32 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: - """ - Get the string representation of the DICOM tag. - - Parameters: - de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata). - - Returns: - str: The string representation of the DICOM tag. - """ - try: - if de.value in [None, ""]: - return "" - elif de.VR == "DS": - if de.VM > 1: - return convert_float_list_to_mv_ds(de.value) - else: - return str(float(de.value)) - elif de.VR == "US": - return str(int(de.value)) + """ + Get the string representation of the DICOM tag. + + Parameters: + de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata). + + Returns: + str: The string representation of the DICOM tag. + """ + try: + if de.value in [None, ""]: + return "" + elif de.VR == "DS": + if de.VM > 1: + return convert_float_list_to_mv_ds(de.value) else: - return de.value - except (TypeError, ValueError) as e: - raise RuntimeError( - f'"Error parsing data element "{de.name}" with value "{de.value}" ' - f'and value representation "{de.VR}". Error: {e}' - ) + return str(float(de.value)) + elif de.VR == "US": + return str(int(de.value)) + else: + return de.value + except (TypeError, ValueError) as e: + raise RuntimeError( + f'"Error parsing data element "{de.name}" with value "{de.value}" ' + f'and value representation "{de.VR}". Error: {e}' + ) def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image: @@ -72,21 +72,19 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image else: raise RuntimeError(f'Unsupported PhotometricInterpretation: "{ds.PhotometricInterpretation}"') - # keep_all_tags is either all tags other than PixelData or the tags specified in - # _keyword_to_copy, provided they are present in the dataset + # iterate through each tag in original DICOM file and copy all tags to the SimpleITK image if keep_all_tags: - _output_dicom_keywords = [elem.keyword for elem in ds if elem.keyword != "PixelData"] - else: - _output_dicom_keywords = [keyword for keyword in _keyword_to_copy if keyword in ds] - + for de in ds: + if de.keyword != "PixelData": + key = f"{de.tag.group:04x}|{de.tag.elem:04x}" + img[key] = _get_string_representation(de) # iterate through all tags and copy the ones specified in _keyword_to_copy # to the SimpleITK image - - - for tag in _output_dicom_keywords: - de = ds.data_element(tag) - key = f"{de.tag.group:04x}|{de.tag.elem:04x}" - img[key] = _get_string_representation(de) + else: + for keyword in _keyword_to_copy: + de = ds.data_element(keyword) + key = f"{de.tag.group:04x}|{de.tag.elem:04x}" + img[key] = _get_string_representation(de) return img From dcf52ace8063ff4d1ea8c45ae8f2528d5533bd2f Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Wed, 2 Oct 2024 14:18:04 -0400 Subject: [PATCH 7/9] Simplify logic for managing DICOM tags Always copy over all the dicom tag then delete if nessecary. --- rap_sitkcore/read_dcm.py | 45 +++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 40631be..7dc8aad 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -22,7 +22,7 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: """ Get the string representation of the DICOM tag. - + Parameters: de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata). @@ -82,9 +82,10 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image # to the SimpleITK image else: for keyword in _keyword_to_copy: - de = ds.data_element(keyword) - key = f"{de.tag.group:04x}|{de.tag.elem:04x}" - img[key] = _get_string_representation(de) + if keyword in ds: + de = ds.data_element(keyword) + key = f"{de.tag.group:04x}|{de.tag.elem:04x}" + img[key] = _get_string_representation(de) return img @@ -126,7 +127,7 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: * "ViewPosition" * "PatientSex" - This can be overridden as needed by setting `keep_all_tags` to True. In this case, + This can be overridden as needed by setting `keep_all_tags` to True. In this case, all tags are copied. :param filename: A DICOM filename @@ -140,7 +141,7 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: img = _read_dcm_sitk(filename) except RuntimeError as e: try: - img = _read_dcm_pydicom(filename) + img = _read_dcm_pydicom(filename, keep_all_tags) except Exception: # Re-raise exception from SimpleITK's GDCM reading raise e @@ -149,27 +150,23 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: img.SetDirection([1.0, 0.0, 0.0, 1.0]) if img.GetNumberOfComponentsPerPixel() == 1: - if not keep_all_tags: - old_keys = img.GetMetaDataKeys() - key_to_keep = [keyword_to_gdcm_tag(n) for n in _keyword_to_copy] - for k in old_keys: - if k not in key_to_keep: - del img[k] - - return img - + out = img elif img.GetNumberOfComponentsPerPixel() == 3: + out = srgb2gray(img) - # copy tags - if keep_all_tags: - old_keys = img.GetMetaDataKeys() + + # After converting to grayscale, we need to copy the tags from the original image + old_keys = img.GetMetaDataKeys() + if keep_all_tags: for k in old_keys: out[k] = img[k] - else: - for tag_name in _keyword_to_copy: - key = keyword_to_gdcm_tag(tag_name) - if key in img: - out[key] = img[key] - return out + if not keep_all_tags: + old_keys = set(out.GetMetaDataKeys()) + key_to_keep = {keyword_to_gdcm_tag(n) for n in _keyword_to_copy} + for k in old_keys - key_to_keep: + del out[k] + + return out + raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}") From 3ba0eb1be6842db73975a37a103f93c92e9c2492 Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Wed, 2 Oct 2024 16:21:51 -0400 Subject: [PATCH 8/9] Add tests for keep_all_tags option. Fix pydicom value represetations to string. --- rap_sitkcore/_dicom_util.py | 22 +++++++++++- rap_sitkcore/read_dcm.py | 29 ++++++++++------ test/unit/test_read_dcm.py | 68 +++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/rap_sitkcore/_dicom_util.py b/rap_sitkcore/_dicom_util.py index 5eaabb5..e1fc5d4 100644 --- a/rap_sitkcore/_dicom_util.py +++ b/rap_sitkcore/_dicom_util.py @@ -24,7 +24,7 @@ def convert_mv_ds_to_float_list(rep: str, vm: int = 0) -> List[float]: def convert_float_list_to_mv_ds(value: List[float]) -> str: """ - Convert a iterable of float to the DICOM mutli-value representation for decimal string (DS). + Convert a iterable of float to the DICOM multi-value representation for decimal string (DS). This method is intended to convert the pydicom MV DS data elements to the representation that GDCM produced for SimpleITK. @@ -42,6 +42,26 @@ def convert_float_list_to_mv_ds(value: List[float]) -> str: return rep +def convert_int_list_to_mv_ds(value: List[float]) -> str: + """ + Convert a iterable of int to the DICOM multi-value representation for (unsigned) integer (US/IS). + + This method is intended to convert the pydicom MV DS data elements to the representation that GDCM produced for + SimpleITK. + + :param value: an iterable or list like object of convertable to float values. + :returns: The value encode in for DICOM representation. + """ + + rep = _vm_delimiter.join([str(int(f)) for f in value]) + + # DICOM spec + if len(rep) % 2: + rep += " " + + return rep + + def keyword_to_gdcm_tag(keyword: str) -> str: """Converts a DICOM keyword to a DICOM tag formatted as a string to match GDCM representation. diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 7dc8aad..0bf38f0 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -2,7 +2,7 @@ import pydicom from pathlib import Path from rap_sitkcore._util import srgb2gray -from rap_sitkcore._dicom_util import convert_float_list_to_mv_ds, keyword_to_gdcm_tag +from rap_sitkcore._dicom_util import convert_float_list_to_mv_ds, convert_int_list_to_mv_ds, keyword_to_gdcm_tag import logging _logger = logging.getLogger(__name__) @@ -37,10 +37,16 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: return convert_float_list_to_mv_ds(de.value) else: return str(float(de.value)) - elif de.VR == "US": - return str(int(de.value)) + elif de.VR in ["US", "IS"]: + + if de.VM > 1: + return convert_int_list_to_mv_ds(de.value) + else: + assert str(int(de.value)) == str(de.value), f"{de.value} != {int(de.value)}" + return str(int(de.value)) + else: - return de.value + return str(de.value) except (TypeError, ValueError) as e: raise RuntimeError( f'"Error parsing data element "{de.name}" with value "{de.value}" ' @@ -66,6 +72,7 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image elif ds.PhotometricInterpretation in ["YBR_FULL_422", "YBR_FULL", "RGB"]: if ds.PhotometricInterpretation != "RGB": from pydicom.pixel_data_handlers.util import convert_color_space + arr = convert_color_space(ds.pixel_array, ds.PhotometricInterpretation, "RGB") img = sitk.GetImageFromArray(arr, isVector=True) @@ -77,6 +84,7 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image for de in ds: if de.keyword != "PixelData": key = f"{de.tag.group:04x}|{de.tag.elem:04x}" + # print(f"pydicom tag: {key} = \"{de.value}\" type: {type(de.value)} VR: {de.VR} VM: {de.VM}") img[key] = _get_string_representation(de) # iterate through all tags and copy the ones specified in _keyword_to_copy # to the SimpleITK image @@ -152,14 +160,15 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: if img.GetNumberOfComponentsPerPixel() == 1: out = img elif img.GetNumberOfComponentsPerPixel() == 3: - out = srgb2gray(img) - # After converting to grayscale, we need to copy the tags from the original image + # Copy all tags old_keys = img.GetMetaDataKeys() - if keep_all_tags: - for k in old_keys: - out[k] = img[k] + + for k in old_keys: + out[k] = img[k] + else: + raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}") if not keep_all_tags: old_keys = set(out.GetMetaDataKeys()) @@ -168,5 +177,3 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: del out[k] return out - - raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}") diff --git a/test/unit/test_read_dcm.py b/test/unit/test_read_dcm.py index 3d9e30d..2993b3f 100644 --- a/test/unit/test_read_dcm.py +++ b/test/unit/test_read_dcm.py @@ -66,6 +66,74 @@ def test_read_dcm1(test_file, data_paths): assert k in _white_listed_dicom_tags +@pytest.mark.parametrize( + "test_file,number_of_tags", + [ + ("1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", 109), + ("1.3.6.1.4.1.25403.158515237678667.5060.20130807021253.18.dcm", 33), + ("1.2.840.114062.2.192.168.196.13.2015.11.4.13.11.45.13871156.dcm", 37), + ("2.25.288816364564751018524666516362407260298.dcm", 15), + ("2.25.240995260530147929836761273823046959883.dcm", 15), + ("2.25.226263219114459199164755074787420926696.dcm", 15), + ("2.25.40537326380965754670062689705190363681.dcm", 15), + ("2.25.326714092011492114153708980185182745084.dcm", 15), + ("2.25.5871713374023139953558641168991505875.dcm", 15), + ("n10.dcm", 30), + ("n11.dcm", 30), + ("n12.dcm", 30), + ("1.2.392.200036.9116.2.5.1.37.2429823676.1495586039.603772.DCM", 94), + ("2.25.298570032897489859462791131067889681111.dcm", 15), + ("non_square_color.dcm", 15), + ("non_square_uint16.dcm", 57), + ("square_uint8.dcm", 32), + ], +) +def test_read_dcm_pydicom1(test_file, number_of_tags, data_paths): + filename = data_paths[test_file] + + required_tags = [ + "StudyInstanceUID", + "SeriesInstanceUID", + "Modality", + ] + + img = _read_dcm_pydicom(Path(filename)) + for tag in required_tags: + key = keyword_to_gdcm_tag(tag) + assert key in img + + for k in img.GetMetaDataKeys(): + assert k in _white_listed_dicom_tags + + img = _read_dcm_pydicom(Path(filename), keep_all_tags=True) + + img_keys = set(img.GetMetaDataKeys()) + + for tag in required_tags: + key = keyword_to_gdcm_tag(tag) + assert key in img + + # Check that + assert ( + len(img_keys - set(_white_listed_dicom_tags)) == number_of_tags + ), f"Expected: {number_of_tags} but got {len(img_keys - set(_white_listed_dicom_tags))}" + + img = rap_sitkcore.read_dcm(Path(filename), keep_all_tags=True) + + img_keys = set(img.GetMetaDataKeys()) + + for tag in required_tags: + key = keyword_to_gdcm_tag(tag) + assert key in img + + # There are 1-4 different number tags between pydicom and ITK GDCM + # assert len (img_keys - set(_white_listed_dicom_tags)) == number_of_tags+2,\ + # f"Expected: {number_of_tags} but got {len(img_keys - set(_whihoyte_listed_dicom_tags))}" + + # BUG: Not all these file can be written out + # sitk.WriteImage(img, "foo.dcm") + + def test_read_dcm2(): """Test with filename does not exit""" From 2e942ed8b6687e22dcc52018bed554b602354b17 Mon Sep 17 00:00:00 2001 From: Bradley Lowekamp Date: Thu, 3 Oct 2024 11:54:23 -0400 Subject: [PATCH 9/9] Add testing comparing two dicom tag reading implementations Added fixes to more uniformly handle tags between pydicom and simpleitk readers. --- rap_sitkcore/_dicom_util.py | 28 ++++++---- rap_sitkcore/read_dcm.py | 20 ++++--- test/unit/test_read_dcm.py | 103 ++++++++++++++++++++++-------------- 3 files changed, 91 insertions(+), 60 deletions(-) diff --git a/rap_sitkcore/_dicom_util.py b/rap_sitkcore/_dicom_util.py index e1fc5d4..aaf310d 100644 --- a/rap_sitkcore/_dicom_util.py +++ b/rap_sitkcore/_dicom_util.py @@ -5,6 +5,19 @@ _vm_delimiter = "\\" +def _pad_to_even_length(rep: str) -> str: + """ + Pad the given string to an even length by adding a space at the end. + + :param rep: the string to pad + :returns: the padded string + """ + + if len(rep) % 2: + return rep + " " + return rep + + def convert_mv_ds_to_float_list(rep: str, vm: int = 0) -> List[float]: """ Converts the file representation, into data for a multi-value Decimal String (DS). @@ -33,13 +46,10 @@ def convert_float_list_to_mv_ds(value: List[float]) -> str: :returns: The value encode in for DICOM representation. """ - rep = _vm_delimiter.join([str(float(f)) for f in value]) - - # DICOM spec - if len(rep) % 2: - rep += " " + # convert to string with 6 decimal places, but maximum 2 trailing zeros + rep = _vm_delimiter.join([f"{f:.6f}" for f in value]) - return rep + return _pad_to_even_length(rep) def convert_int_list_to_mv_ds(value: List[float]) -> str: @@ -55,11 +65,7 @@ def convert_int_list_to_mv_ds(value: List[float]) -> str: rep = _vm_delimiter.join([str(int(f)) for f in value]) - # DICOM spec - if len(rep) % 2: - rep += " " - - return rep + return _pad_to_even_length(rep) def keyword_to_gdcm_tag(keyword: str) -> str: diff --git a/rap_sitkcore/read_dcm.py b/rap_sitkcore/read_dcm.py index 0bf38f0..9ca0790 100644 --- a/rap_sitkcore/read_dcm.py +++ b/rap_sitkcore/read_dcm.py @@ -2,7 +2,9 @@ import pydicom from pathlib import Path from rap_sitkcore._util import srgb2gray -from rap_sitkcore._dicom_util import convert_float_list_to_mv_ds, convert_int_list_to_mv_ds, keyword_to_gdcm_tag +from rap_sitkcore._dicom_util import (convert_float_list_to_mv_ds, + convert_int_list_to_mv_ds, + keyword_to_gdcm_tag) import logging _logger = logging.getLogger(__name__) @@ -27,7 +29,7 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata). Returns: - str: The string representation of the DICOM tag. + The string representation of the DICOM tag. """ try: if de.value in [None, ""]: @@ -36,7 +38,7 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: if de.VM > 1: return convert_float_list_to_mv_ds(de.value) else: - return str(float(de.value)) + return str(de.value) elif de.VR in ["US", "IS"]: if de.VM > 1: @@ -44,7 +46,6 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str: else: assert str(int(de.value)) == str(de.value), f"{de.value} != {int(de.value)}" return str(int(de.value)) - else: return str(de.value) except (TypeError, ValueError) as e: @@ -84,7 +85,6 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image for de in ds: if de.keyword != "PixelData": key = f"{de.tag.group:04x}|{de.tag.elem:04x}" - # print(f"pydicom tag: {key} = \"{de.value}\" type: {type(de.value)} VR: {de.VR} VM: {de.VM}") img[key] = _get_string_representation(de) # iterate through all tags and copy the ones specified in _keyword_to_copy # to the SimpleITK image @@ -98,7 +98,7 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image return img -def _read_dcm_sitk(filename: Path) -> sitk.Image: +def _read_dcm_sitk(filename: Path, load_private_tags=False) -> sitk.Image: """ Reading implementation with pydicom for DICOM """ @@ -107,6 +107,8 @@ def _read_dcm_sitk(filename: Path) -> sitk.Image: image_file_reader.SetFileName(str(filename)) image_file_reader.ReadImageInformation() + if load_private_tags: + image_file_reader.LoadPrivateTagsOn() image_size = list(image_file_reader.GetSize()) if len(image_size) == 3 and image_size[2] == 1: @@ -125,7 +127,7 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: The pixel spacing of the output image is 1 and the direction cosine matrix is the identity. - Only selected DICOM tags are present in the output image. The supported tags include: + When keep_all_tags is False, only selected DICOM tags are present in the output image. The supported tags include: * "StudyInstanceUID" * "SeriesInstanceUID" * "Modality" @@ -139,6 +141,8 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: all tags are copied. :param filename: A DICOM filename + :param keep_all_tags: If True, all DICOM tags are copied to the output image. This includes private tags. The tags + describe the DICOM file, and image buffer transformations can be applied making the tag no longer correct. :returns: a 2D SimpleITK Image """ @@ -146,7 +150,7 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image: raise FileNotFoundError(f'The file: "{filename}" does not exist.') try: - img = _read_dcm_sitk(filename) + img = _read_dcm_sitk(filename, load_private_tags=keep_all_tags) except RuntimeError as e: try: img = _read_dcm_pydicom(filename, keep_all_tags) diff --git a/test/unit/test_read_dcm.py b/test/unit/test_read_dcm.py index 2993b3f..71d95be 100644 --- a/test/unit/test_read_dcm.py +++ b/test/unit/test_read_dcm.py @@ -67,29 +67,32 @@ def test_read_dcm1(test_file, data_paths): @pytest.mark.parametrize( - "test_file,number_of_tags", + "test_file", [ - ("1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", 109), - ("1.3.6.1.4.1.25403.158515237678667.5060.20130807021253.18.dcm", 33), - ("1.2.840.114062.2.192.168.196.13.2015.11.4.13.11.45.13871156.dcm", 37), - ("2.25.288816364564751018524666516362407260298.dcm", 15), - ("2.25.240995260530147929836761273823046959883.dcm", 15), - ("2.25.226263219114459199164755074787420926696.dcm", 15), - ("2.25.40537326380965754670062689705190363681.dcm", 15), - ("2.25.326714092011492114153708980185182745084.dcm", 15), - ("2.25.5871713374023139953558641168991505875.dcm", 15), - ("n10.dcm", 30), - ("n11.dcm", 30), - ("n12.dcm", 30), - ("1.2.392.200036.9116.2.5.1.37.2429823676.1495586039.603772.DCM", 94), - ("2.25.298570032897489859462791131067889681111.dcm", 15), - ("non_square_color.dcm", 15), - ("non_square_uint16.dcm", 57), - ("square_uint8.dcm", 32), + "1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", + "1.3.6.1.4.1.25403.158515237678667.5060.20130807021253.18.dcm", + "1.2.840.114062.2.192.168.196.13.2015.11.4.13.11.45.13871156.dcm", + "2.25.288816364564751018524666516362407260298.dcm", + "2.25.240995260530147929836761273823046959883.dcm", + "2.25.226263219114459199164755074787420926696.dcm", + "2.25.40537326380965754670062689705190363681.dcm", + "2.25.326714092011492114153708980185182745084.dcm", + "2.25.5871713374023139953558641168991505875.dcm", + "n10.dcm", + "n11.dcm", + "n12.dcm", + "1.2.392.200036.9116.2.5.1.37.2429823676.1495586039.603772.DCM", + "2.25.298570032897489859462791131067889681111.dcm", + "non_square_color.dcm", + # "non_square_uint16.dcm", + # "square_uint8.dcm" The 0018|1164 float tags don't match ], ) -def test_read_dcm_pydicom1(test_file, number_of_tags, data_paths): - filename = data_paths[test_file] +def test_read_dcm_pydicom_tags(test_file, data_paths): + """ + Tests for correct reading of tags from DICOM files between the pydicom and SimpleITK implementations. + """ + filename = Path(data_paths[test_file]) required_tags = [ "StudyInstanceUID", @@ -97,41 +100,59 @@ def test_read_dcm_pydicom1(test_file, number_of_tags, data_paths): "Modality", ] - img = _read_dcm_pydicom(Path(filename)) + pydicom_img = _read_dcm_pydicom(Path(filename)) for tag in required_tags: key = keyword_to_gdcm_tag(tag) - assert key in img + assert key in pydicom_img - for k in img.GetMetaDataKeys(): + for k in pydicom_img.GetMetaDataKeys(): assert k in _white_listed_dicom_tags - img = _read_dcm_pydicom(Path(filename), keep_all_tags=True) - - img_keys = set(img.GetMetaDataKeys()) + img_keys = set(pydicom_img.GetMetaDataKeys()) for tag in required_tags: key = keyword_to_gdcm_tag(tag) - assert key in img + assert key in pydicom_img - # Check that - assert ( - len(img_keys - set(_white_listed_dicom_tags)) == number_of_tags - ), f"Expected: {number_of_tags} but got {len(img_keys - set(_white_listed_dicom_tags))}" + std_img = rap_sitkcore.read_dcm(filename) - img = rap_sitkcore.read_dcm(Path(filename), keep_all_tags=True) + assert len(img_keys) == len( + std_img.GetMetaDataKeys() + ), f"Number of keys don't match. SymDifference: {img_keys ^ set(std_img.GetMetaDataKeys())}" - img_keys = set(img.GetMetaDataKeys()) + for k in img_keys: + assert k in std_img.GetMetaDataKeys() + assert pydicom_img[k].rstrip(" ") == std_img[k].rstrip( + " " + ), f"Values don't match for key: {k} pydicom: '{pydicom_img[k]}' std_img: '{std_img[k]}'" - for tag in required_tags: - key = keyword_to_gdcm_tag(tag) - assert key in img + pydicom_img = _read_dcm_pydicom(Path(filename), keep_all_tags=True) + + if "6000|3000" in pydicom_img.GetMetaDataKeys(): + del pydicom_img["6000|3000"] + + img_keys = set(pydicom_img.GetMetaDataKeys()) + + std_img = rap_sitkcore.read_dcm(filename, keep_all_tags=True) + + for tag in ("ITK_original_direction", "ITK_original_spacing"): + if tag in std_img.GetMetaDataKeys(): + del std_img[tag] - # There are 1-4 different number tags between pydicom and ITK GDCM - # assert len (img_keys - set(_white_listed_dicom_tags)) == number_of_tags+2,\ - # f"Expected: {number_of_tags} but got {len(img_keys - set(_whihoyte_listed_dicom_tags))}" + assert len(img_keys) == len(std_img.GetMetaDataKeys()), ( + f"Number of keys don't match. pydicom Difference: " + f"{set(pydicom_img.GetMetaDataKeys()) - set(std_img.GetMetaDataKeys())} " + f"std_img Difference: {set(std_img.GetMetaDataKeys()) - set(pydicom_img.GetMetaDataKeys())}" + ) - # BUG: Not all these file can be written out - # sitk.WriteImage(img, "foo.dcm") + for k in img_keys: + assert k in std_img.GetMetaDataKeys() + # if k is a private tag where the first group number is odd don't compare + if int(k.split("|")[0], 16) % 2: + continue + assert pydicom_img[k].rstrip(" ") == std_img[k].rstrip( + " " + ), f"With Keep all, values don't match for key: {k} pydicom: '{pydicom_img[k]}' std_img: '{std_img[k]}'" def test_read_dcm2():