Skip to content

Commit

Permalink
Merge pull request #53 from niaid/keep_all_tags
Browse files Browse the repository at this point in the history
Add flag to read_dcm to keep all DICOM tags
  • Loading branch information
blowekamp authored Oct 8, 2024
2 parents 478972e + 2e942ed commit b43d840
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 43 deletions.
38 changes: 32 additions & 6 deletions rap_sitkcore/_dicom_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@
_vm_delimiter = "\\"


def _pad_to_even_length(rep: str) -> str:
"""
Pad the given string to an even length by adding a space at the end.
:param rep: the string to pad
:returns: the padded string
"""

if len(rep) % 2:
return rep + " "
return rep


def convert_mv_ds_to_float_list(rep: str, vm: int = 0) -> List[float]:
"""
Converts the file representation, into data for a multi-value Decimal String (DS).
Expand All @@ -24,7 +37,7 @@ def convert_mv_ds_to_float_list(rep: str, vm: int = 0) -> List[float]:

def convert_float_list_to_mv_ds(value: List[float]) -> str:
"""
Convert a iterable of float to the DICOM mutli-value representation for decimal string (DS).
Convert a iterable of float to the DICOM multi-value representation for decimal string (DS).
This method is intended to convert the pydicom MV DS data elements to the representation that GDCM produced for
SimpleITK.
Expand All @@ -33,13 +46,26 @@ def convert_float_list_to_mv_ds(value: List[float]) -> str:
:returns: The value encode in for DICOM representation.
"""

rep = _vm_delimiter.join([str(float(f)) for f in value])
# convert to string with 6 decimal places, but maximum 2 trailing zeros
rep = _vm_delimiter.join([f"{f:.6f}" for f in value])

# DICOM spec
if len(rep) % 2:
rep += " "
return _pad_to_even_length(rep)

return rep

def convert_int_list_to_mv_ds(value: List[float]) -> str:
"""
Convert a iterable of int to the DICOM multi-value representation for (unsigned) integer (US/IS).
This method is intended to convert the pydicom MV DS data elements to the representation that GDCM produced for
SimpleITK.
:param value: an iterable or list like object of convertable to float values.
:returns: The value encode in for DICOM representation.
"""

rep = _vm_delimiter.join([str(int(f)) for f in value])

return _pad_to_even_length(rep)


def keyword_to_gdcm_tag(keyword: str) -> str:
Expand Down
118 changes: 81 additions & 37 deletions rap_sitkcore/read_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import pydicom
from pathlib import Path
from rap_sitkcore._util import srgb2gray
from rap_sitkcore._dicom_util import convert_float_list_to_mv_ds, keyword_to_gdcm_tag
from rap_sitkcore._dicom_util import (convert_float_list_to_mv_ds,
convert_int_list_to_mv_ds,
keyword_to_gdcm_tag)
import logging

_logger = logging.getLogger(__name__)
Expand All @@ -19,7 +21,41 @@
]


def _read_dcm_pydicom(filename: Path) -> sitk.Image:
def _get_string_representation(de: pydicom.dataelem.DataElement) -> str:
"""
Get the string representation of the DICOM tag.
Parameters:
de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata).
Returns:
The string representation of the DICOM tag.
"""
try:
if de.value in [None, ""]:
return ""
elif de.VR == "DS":
if de.VM > 1:
return convert_float_list_to_mv_ds(de.value)
else:
return str(de.value)
elif de.VR in ["US", "IS"]:

if de.VM > 1:
return convert_int_list_to_mv_ds(de.value)
else:
assert str(int(de.value)) == str(de.value), f"{de.value} != {int(de.value)}"
return str(int(de.value))
else:
return str(de.value)
except (TypeError, ValueError) as e:
raise RuntimeError(
f'"Error parsing data element "{de.name}" with value "{de.value}" '
f'and value representation "{de.VR}". Error: {e}'
)


def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image:
"""
Reading implementation with pydicom for DICOM
"""
Expand All @@ -44,28 +80,25 @@ def _read_dcm_pydicom(filename: Path) -> sitk.Image:
else:
raise RuntimeError(f'Unsupported PhotometricInterpretation: "{ds.PhotometricInterpretation}"')

for tag in _keyword_to_copy:
if tag in ds:
de = ds.data_element(tag)
key = f"{de.tag.group:04x}|{de.tag.elem:04x}"
if de.value is None:
img[key] = ""
elif de.VR == "DS":
if de.VM > 1:
img[key] = convert_float_list_to_mv_ds(de.value)
else:
img[key] = str(float(de.value))
elif de.VR in ["CS", "UI"]:
img[key] = de.value
else:
raise ValueError(
f'"{filename}" has data element "{de.name}" non-conforming value representation "{de.VR}".'
)
# iterate through each tag in original DICOM file and copy all tags to the SimpleITK image
if keep_all_tags:
for de in ds:
if de.keyword != "PixelData":
key = f"{de.tag.group:04x}|{de.tag.elem:04x}"
img[key] = _get_string_representation(de)
# iterate through all tags and copy the ones specified in _keyword_to_copy
# to the SimpleITK image
else:
for keyword in _keyword_to_copy:
if keyword in ds:
de = ds.data_element(keyword)
key = f"{de.tag.group:04x}|{de.tag.elem:04x}"
img[key] = _get_string_representation(de)

return img


def _read_dcm_sitk(filename: Path) -> sitk.Image:
def _read_dcm_sitk(filename: Path, load_private_tags=False) -> sitk.Image:
"""
Reading implementation with pydicom for DICOM
"""
Expand All @@ -74,6 +107,8 @@ def _read_dcm_sitk(filename: Path) -> sitk.Image:
image_file_reader.SetFileName(str(filename))

image_file_reader.ReadImageInformation()
if load_private_tags:
image_file_reader.LoadPrivateTagsOn()

image_size = list(image_file_reader.GetSize())
if len(image_size) == 3 and image_size[2] == 1:
Expand All @@ -84,15 +119,15 @@ def _read_dcm_sitk(filename: Path) -> sitk.Image:
return image_file_reader.Execute()


def read_dcm(filename: Path) -> sitk.Image:
def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image:
"""
Read an x-ray DICOM file with GDCMImageIO, reducing it to 2D from 3D as needed.
If the file cannot be read by the GDCM library, then pydicom is tried.
Color images are converted to grayscale.
The pixel spacing of the output image is 1 and the direction cosine matrix is the identity.
Only selected DICOM tags are present in the output image. The supported tags include:
When keep_all_tags is False, only selected DICOM tags are present in the output image. The supported tags include:
* "StudyInstanceUID"
* "SeriesInstanceUID"
* "Modality"
Expand All @@ -102,18 +137,23 @@ def read_dcm(filename: Path) -> sitk.Image:
* "ViewPosition"
* "PatientSex"
This can be overridden as needed by setting `keep_all_tags` to True. In this case,
all tags are copied.
:param filename: A DICOM filename
:param keep_all_tags: If True, all DICOM tags are copied to the output image. This includes private tags. The tags
describe the DICOM file, and image buffer transformations can be applied making the tag no longer correct.
:returns: a 2D SimpleITK Image
"""

if not filename.is_file():
raise FileNotFoundError(f'The file: "{filename}" does not exist.')

try:
img = _read_dcm_sitk(filename)
img = _read_dcm_sitk(filename, load_private_tags=keep_all_tags)
except RuntimeError as e:
try:
img = _read_dcm_pydicom(filename)
img = _read_dcm_pydicom(filename, keep_all_tags)
except Exception:
# Re-raise exception from SimpleITK's GDCM reading
raise e
Expand All @@ -122,18 +162,22 @@ def read_dcm(filename: Path) -> sitk.Image:
img.SetDirection([1.0, 0.0, 0.0, 1.0])

if img.GetNumberOfComponentsPerPixel() == 1:
old_keys = img.GetMetaDataKeys()
key_to_keep = [keyword_to_gdcm_tag(n) for n in _keyword_to_copy]
for k in old_keys:
if k not in key_to_keep:
del img[k]
return img
out = img
elif img.GetNumberOfComponentsPerPixel() == 3:
out = srgb2gray(img)
# copy tags
for tag_name in _keyword_to_copy:
key = keyword_to_gdcm_tag(tag_name)
if key in img:
out[key] = img[key]
return out
raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}")

# Copy all tags
old_keys = img.GetMetaDataKeys()

for k in old_keys:
out[k] = img[k]
else:
raise RuntimeError(f"Unsupported number of components: {img.GetNumberOfComponentsPerPixel()}")

if not keep_all_tags:
old_keys = set(out.GetMetaDataKeys())
key_to_keep = {keyword_to_gdcm_tag(n) for n in _keyword_to_copy}
for k in old_keys - key_to_keep:
del out[k]

return out
89 changes: 89 additions & 0 deletions test/unit/test_read_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,95 @@ def test_read_dcm1(test_file, data_paths):
assert k in _white_listed_dicom_tags


@pytest.mark.parametrize(
"test_file",
[
"1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm",
"1.3.6.1.4.1.25403.158515237678667.5060.20130807021253.18.dcm",
"1.2.840.114062.2.192.168.196.13.2015.11.4.13.11.45.13871156.dcm",
"2.25.288816364564751018524666516362407260298.dcm",
"2.25.240995260530147929836761273823046959883.dcm",
"2.25.226263219114459199164755074787420926696.dcm",
"2.25.40537326380965754670062689705190363681.dcm",
"2.25.326714092011492114153708980185182745084.dcm",
"2.25.5871713374023139953558641168991505875.dcm",
"n10.dcm",
"n11.dcm",
"n12.dcm",
"1.2.392.200036.9116.2.5.1.37.2429823676.1495586039.603772.DCM",
"2.25.298570032897489859462791131067889681111.dcm",
"non_square_color.dcm",
# "non_square_uint16.dcm",
# "square_uint8.dcm" The 0018|1164 float tags don't match
],
)
def test_read_dcm_pydicom_tags(test_file, data_paths):
"""
Tests for correct reading of tags from DICOM files between the pydicom and SimpleITK implementations.
"""
filename = Path(data_paths[test_file])

required_tags = [
"StudyInstanceUID",
"SeriesInstanceUID",
"Modality",
]

pydicom_img = _read_dcm_pydicom(Path(filename))
for tag in required_tags:
key = keyword_to_gdcm_tag(tag)
assert key in pydicom_img

for k in pydicom_img.GetMetaDataKeys():
assert k in _white_listed_dicom_tags

img_keys = set(pydicom_img.GetMetaDataKeys())

for tag in required_tags:
key = keyword_to_gdcm_tag(tag)
assert key in pydicom_img

std_img = rap_sitkcore.read_dcm(filename)

assert len(img_keys) == len(
std_img.GetMetaDataKeys()
), f"Number of keys don't match. SymDifference: {img_keys ^ set(std_img.GetMetaDataKeys())}"

for k in img_keys:
assert k in std_img.GetMetaDataKeys()
assert pydicom_img[k].rstrip(" ") == std_img[k].rstrip(
" "
), f"Values don't match for key: {k} pydicom: '{pydicom_img[k]}' std_img: '{std_img[k]}'"

pydicom_img = _read_dcm_pydicom(Path(filename), keep_all_tags=True)

if "6000|3000" in pydicom_img.GetMetaDataKeys():
del pydicom_img["6000|3000"]

img_keys = set(pydicom_img.GetMetaDataKeys())

std_img = rap_sitkcore.read_dcm(filename, keep_all_tags=True)

for tag in ("ITK_original_direction", "ITK_original_spacing"):
if tag in std_img.GetMetaDataKeys():
del std_img[tag]

assert len(img_keys) == len(std_img.GetMetaDataKeys()), (
f"Number of keys don't match. pydicom Difference: "
f"{set(pydicom_img.GetMetaDataKeys()) - set(std_img.GetMetaDataKeys())} "
f"std_img Difference: {set(std_img.GetMetaDataKeys()) - set(pydicom_img.GetMetaDataKeys())}"
)

for k in img_keys:
assert k in std_img.GetMetaDataKeys()
# if k is a private tag where the first group number is odd don't compare
if int(k.split("|")[0], 16) % 2:
continue
assert pydicom_img[k].rstrip(" ") == std_img[k].rstrip(
" "
), f"With Keep all, values don't match for key: {k} pydicom: '{pydicom_img[k]}' std_img: '{std_img[k]}'"


def test_read_dcm2():
"""Test with filename does not exit"""

Expand Down

0 comments on commit b43d840

Please sign in to comment.