Skip to content

Commit

Permalink
Add testing comparing two dicom tag reading implementations
Browse files Browse the repository at this point in the history
Added fixes to more uniformly handle tags between pydicom and
simpleitk readers.
  • Loading branch information
blowekamp committed Oct 3, 2024
1 parent 3ba0eb1 commit 2e942ed
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 60 deletions.
28 changes: 17 additions & 11 deletions rap_sitkcore/_dicom_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@
_vm_delimiter = "\\"


def _pad_to_even_length(rep: str) -> str:
"""
Pad the given string to an even length by adding a space at the end.
:param rep: the string to pad
:returns: the padded string
"""

if len(rep) % 2:
return rep + " "
return rep


def convert_mv_ds_to_float_list(rep: str, vm: int = 0) -> List[float]:
"""
Converts the file representation, into data for a multi-value Decimal String (DS).
Expand Down Expand Up @@ -33,13 +46,10 @@ def convert_float_list_to_mv_ds(value: List[float]) -> str:
:returns: The value encode in for DICOM representation.
"""

rep = _vm_delimiter.join([str(float(f)) for f in value])

# DICOM spec
if len(rep) % 2:
rep += " "
# convert to string with 6 decimal places, but maximum 2 trailing zeros
rep = _vm_delimiter.join([f"{f:.6f}" for f in value])

return rep
return _pad_to_even_length(rep)


def convert_int_list_to_mv_ds(value: List[float]) -> str:
Expand All @@ -55,11 +65,7 @@ def convert_int_list_to_mv_ds(value: List[float]) -> str:

rep = _vm_delimiter.join([str(int(f)) for f in value])

# DICOM spec
if len(rep) % 2:
rep += " "

return rep
return _pad_to_even_length(rep)


def keyword_to_gdcm_tag(keyword: str) -> str:
Expand Down
20 changes: 12 additions & 8 deletions rap_sitkcore/read_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import pydicom
from pathlib import Path
from rap_sitkcore._util import srgb2gray
from rap_sitkcore._dicom_util import convert_float_list_to_mv_ds, convert_int_list_to_mv_ds, keyword_to_gdcm_tag
from rap_sitkcore._dicom_util import (convert_float_list_to_mv_ds,
convert_int_list_to_mv_ds,
keyword_to_gdcm_tag)
import logging

_logger = logging.getLogger(__name__)
Expand All @@ -27,7 +29,7 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str:
de (pydicom.dataelem.DataElement): The DICOM date element (a particular tag and its metadata).
Returns:
str: The string representation of the DICOM tag.
The string representation of the DICOM tag.
"""
try:
if de.value in [None, ""]:
Expand All @@ -36,15 +38,14 @@ def _get_string_representation(de: pydicom.dataelem.DataElement) -> str:
if de.VM > 1:
return convert_float_list_to_mv_ds(de.value)
else:
return str(float(de.value))
return str(de.value)
elif de.VR in ["US", "IS"]:

if de.VM > 1:
return convert_int_list_to_mv_ds(de.value)
else:
assert str(int(de.value)) == str(de.value), f"{de.value} != {int(de.value)}"
return str(int(de.value))

else:
return str(de.value)
except (TypeError, ValueError) as e:
Expand Down Expand Up @@ -84,7 +85,6 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image
for de in ds:
if de.keyword != "PixelData":
key = f"{de.tag.group:04x}|{de.tag.elem:04x}"
# print(f"pydicom tag: {key} = \"{de.value}\" type: {type(de.value)} VR: {de.VR} VM: {de.VM}")
img[key] = _get_string_representation(de)
# iterate through all tags and copy the ones specified in _keyword_to_copy
# to the SimpleITK image
Expand All @@ -98,7 +98,7 @@ def _read_dcm_pydicom(filename: Path, keep_all_tags: bool = False) -> sitk.Image
return img


def _read_dcm_sitk(filename: Path) -> sitk.Image:
def _read_dcm_sitk(filename: Path, load_private_tags=False) -> sitk.Image:
"""
Reading implementation with pydicom for DICOM
"""
Expand All @@ -107,6 +107,8 @@ def _read_dcm_sitk(filename: Path) -> sitk.Image:
image_file_reader.SetFileName(str(filename))

image_file_reader.ReadImageInformation()
if load_private_tags:
image_file_reader.LoadPrivateTagsOn()

image_size = list(image_file_reader.GetSize())
if len(image_size) == 3 and image_size[2] == 1:
Expand All @@ -125,7 +127,7 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image:
The pixel spacing of the output image is 1 and the direction cosine matrix is the identity.
Only selected DICOM tags are present in the output image. The supported tags include:
When keep_all_tags is False, only selected DICOM tags are present in the output image. The supported tags include:
* "StudyInstanceUID"
* "SeriesInstanceUID"
* "Modality"
Expand All @@ -139,14 +141,16 @@ def read_dcm(filename: Path, keep_all_tags: bool = False) -> sitk.Image:
all tags are copied.
:param filename: A DICOM filename
:param keep_all_tags: If True, all DICOM tags are copied to the output image. This includes private tags. The tags
describe the DICOM file, and image buffer transformations can be applied making the tag no longer correct.
:returns: a 2D SimpleITK Image
"""

if not filename.is_file():
raise FileNotFoundError(f'The file: "{filename}" does not exist.')

try:
img = _read_dcm_sitk(filename)
img = _read_dcm_sitk(filename, load_private_tags=keep_all_tags)
except RuntimeError as e:
try:
img = _read_dcm_pydicom(filename, keep_all_tags)
Expand Down
103 changes: 62 additions & 41 deletions test/unit/test_read_dcm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,71 +67,92 @@ def test_read_dcm1(test_file, data_paths):


@pytest.mark.parametrize(
"test_file,number_of_tags",
"test_file",
[
("1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm", 109),
("1.3.6.1.4.1.25403.158515237678667.5060.20130807021253.18.dcm", 33),
("1.2.840.114062.2.192.168.196.13.2015.11.4.13.11.45.13871156.dcm", 37),
("2.25.288816364564751018524666516362407260298.dcm", 15),
("2.25.240995260530147929836761273823046959883.dcm", 15),
("2.25.226263219114459199164755074787420926696.dcm", 15),
("2.25.40537326380965754670062689705190363681.dcm", 15),
("2.25.326714092011492114153708980185182745084.dcm", 15),
("2.25.5871713374023139953558641168991505875.dcm", 15),
("n10.dcm", 30),
("n11.dcm", 30),
("n12.dcm", 30),
("1.2.392.200036.9116.2.5.1.37.2429823676.1495586039.603772.DCM", 94),
("2.25.298570032897489859462791131067889681111.dcm", 15),
("non_square_color.dcm", 15),
("non_square_uint16.dcm", 57),
("square_uint8.dcm", 32),
"1.3.6.1.4.1.25403.163683357445804.11044.20131119114627.12.dcm",
"1.3.6.1.4.1.25403.158515237678667.5060.20130807021253.18.dcm",
"1.2.840.114062.2.192.168.196.13.2015.11.4.13.11.45.13871156.dcm",
"2.25.288816364564751018524666516362407260298.dcm",
"2.25.240995260530147929836761273823046959883.dcm",
"2.25.226263219114459199164755074787420926696.dcm",
"2.25.40537326380965754670062689705190363681.dcm",
"2.25.326714092011492114153708980185182745084.dcm",
"2.25.5871713374023139953558641168991505875.dcm",
"n10.dcm",
"n11.dcm",
"n12.dcm",
"1.2.392.200036.9116.2.5.1.37.2429823676.1495586039.603772.DCM",
"2.25.298570032897489859462791131067889681111.dcm",
"non_square_color.dcm",
# "non_square_uint16.dcm",
# "square_uint8.dcm" The 0018|1164 float tags don't match
],
)
def test_read_dcm_pydicom1(test_file, number_of_tags, data_paths):
filename = data_paths[test_file]
def test_read_dcm_pydicom_tags(test_file, data_paths):
"""
Tests for correct reading of tags from DICOM files between the pydicom and SimpleITK implementations.
"""
filename = Path(data_paths[test_file])

required_tags = [
"StudyInstanceUID",
"SeriesInstanceUID",
"Modality",
]

img = _read_dcm_pydicom(Path(filename))
pydicom_img = _read_dcm_pydicom(Path(filename))
for tag in required_tags:
key = keyword_to_gdcm_tag(tag)
assert key in img
assert key in pydicom_img

for k in img.GetMetaDataKeys():
for k in pydicom_img.GetMetaDataKeys():
assert k in _white_listed_dicom_tags

img = _read_dcm_pydicom(Path(filename), keep_all_tags=True)

img_keys = set(img.GetMetaDataKeys())
img_keys = set(pydicom_img.GetMetaDataKeys())

for tag in required_tags:
key = keyword_to_gdcm_tag(tag)
assert key in img
assert key in pydicom_img

# Check that
assert (
len(img_keys - set(_white_listed_dicom_tags)) == number_of_tags
), f"Expected: {number_of_tags} but got {len(img_keys - set(_white_listed_dicom_tags))}"
std_img = rap_sitkcore.read_dcm(filename)

img = rap_sitkcore.read_dcm(Path(filename), keep_all_tags=True)
assert len(img_keys) == len(
std_img.GetMetaDataKeys()
), f"Number of keys don't match. SymDifference: {img_keys ^ set(std_img.GetMetaDataKeys())}"

img_keys = set(img.GetMetaDataKeys())
for k in img_keys:
assert k in std_img.GetMetaDataKeys()
assert pydicom_img[k].rstrip(" ") == std_img[k].rstrip(
" "
), f"Values don't match for key: {k} pydicom: '{pydicom_img[k]}' std_img: '{std_img[k]}'"

for tag in required_tags:
key = keyword_to_gdcm_tag(tag)
assert key in img
pydicom_img = _read_dcm_pydicom(Path(filename), keep_all_tags=True)

if "6000|3000" in pydicom_img.GetMetaDataKeys():
del pydicom_img["6000|3000"]

img_keys = set(pydicom_img.GetMetaDataKeys())

std_img = rap_sitkcore.read_dcm(filename, keep_all_tags=True)

for tag in ("ITK_original_direction", "ITK_original_spacing"):
if tag in std_img.GetMetaDataKeys():
del std_img[tag]

# There are 1-4 different number tags between pydicom and ITK GDCM
# assert len (img_keys - set(_white_listed_dicom_tags)) == number_of_tags+2,\
# f"Expected: {number_of_tags} but got {len(img_keys - set(_whihoyte_listed_dicom_tags))}"
assert len(img_keys) == len(std_img.GetMetaDataKeys()), (
f"Number of keys don't match. pydicom Difference: "
f"{set(pydicom_img.GetMetaDataKeys()) - set(std_img.GetMetaDataKeys())} "
f"std_img Difference: {set(std_img.GetMetaDataKeys()) - set(pydicom_img.GetMetaDataKeys())}"
)

# BUG: Not all these file can be written out
# sitk.WriteImage(img, "foo.dcm")
for k in img_keys:
assert k in std_img.GetMetaDataKeys()
# if k is a private tag where the first group number is odd don't compare
if int(k.split("|")[0], 16) % 2:
continue
assert pydicom_img[k].rstrip(" ") == std_img[k].rstrip(
" "
), f"With Keep all, values don't match for key: {k} pydicom: '{pydicom_img[k]}' std_img: '{std_img[k]}'"


def test_read_dcm2():
Expand Down

0 comments on commit 2e942ed

Please sign in to comment.