Skip to content

Commit

Permalink
Merge pull request #22 from ArcanaFramework/dicom-read-metadata
Browse files Browse the repository at this point in the history
Implemented select_metadata for dicom series
  • Loading branch information
tclose authored Dec 8, 2023
2 parents 75aab0f + a2e16e3 commit a93cbdc
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 18 deletions.
2 changes: 1 addition & 1 deletion extras/fileformats/extras/medimage/dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def dicom_dir_generate_sample_data(dcmdir: DicomDir, dest_dir: Path, seed: ty.Un


@FileSet.generate_sample_data.register
def dicom_set_generate_sample_data(dcm_series: DicomSeries, dest_dir: Path, seed: ty.Union[int, Random] = 0, stem: ty.Optional[str] = None) -> ty.Iterable[Path]:
def dicom_series_generate_sample_data(dcm_series: DicomSeries, dest_dir: Path, seed: ty.Union[int, Random] = 0, stem: ty.Optional[str] = None) -> ty.Iterable[Path]:
rng = Random(seed)
dicom_dir = dicom_dir_generate_sample_data(dcm_series, dest_dir=mkdtemp(), seed=rng, stem=None)[0]
stem = gen_filename(rng, stem=stem)
Expand Down
70 changes: 53 additions & 17 deletions fileformats/medimage/dicom.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import typing as ty
from copy import copy
from operator import itemgetter
from collections import defaultdict
from collections import defaultdict, Counter
from pathlib import Path
from functools import cached_property
from fileformats.core import hook, FileSet
from fileformats.generic import DirectoryContaining, SetOf
from fileformats.generic import DirectoryContaining, SetOf, TypedSet
from fileformats.application import Dicom
from .base import MedicalImage

Expand Down Expand Up @@ -41,29 +40,66 @@ class DicomDir(DicomCollection, DirectoryContaining[Dicom]):
class DicomSeries(DicomCollection, SetOf[Dicom]):
@classmethod
def from_paths(
cls, fspaths: ty.Iterable[Path], common_ok: bool = False
) -> ty.Tuple[ty.Set[FileSet], ty.Set[Path]]:
cls, fspaths: ty.Iterable[Path], common_ok: bool = False,
selected_keys: ty.Optional[ty.Sequence[str]] = None
) -> ty.Tuple[ty.Set["DicomSeries"], ty.Set[Path]]:
"""Separates a list of DICOM files into separate series from the file-system
paths
Parameters
----------
fspaths : ty.Iterable[Path]
the fspaths pointing to the DICOM files
common_ok : bool, optional
included to match the signature of the overriden method, but ignored as each
dicom should belong to only one series.
selected_keys : ty.Optional[ty.Sequence[str]], optional
metadata keys to load from the DICOM files, typically used for performance
reasons, by default None (i.e. all metadata is loaded)
Returns
-------
tuple[set[DicomSeries], set[Path]]
the found dicom series objects and any unrecognised file paths
"""
dicoms, remaining = Dicom.from_paths(fspaths, common_ok=common_ok)
series_dict = defaultdict(list)
for dicom in dicoms:
series_dict[(str(dicom["StudyInstanceUID"]), str(dicom["SeriesNumber"]))].append(dicom)
dicom.select_metadata(selected_keys)
series_dict[
(str(dicom["StudyInstanceUID"]), str(dicom["SeriesNumber"]))
].append(dicom)
return set([cls(s) for s in series_dict.values()]), remaining


@FileSet.read_metadata.register
def dicom_collection_read_metadata(collection: DicomCollection) -> ty.Mapping[str, ty.Any]:
def dicom_collection_read_metadata(
collection: DicomCollection, selected_keys: ty.Optional[ty.Sequence[str]] = None
) -> ty.Mapping[str, ty.Any]:
# Collated DICOM headers across series
collated = copy(collection.contents[0].metadata)
for i, dicom in enumerate(collection.contents[1:], start=1):
collated = {}
key_repeats = Counter()
varying_keys = set()
# We use the "contents" property implementation in TypeSet instead of the overload
# in DicomCollection because we don't want the metadata to be read ahead of the
# the `select_metadata` call below
base_class = TypedSet if isinstance(collection, DicomSeries) else DirectoryContaining
for dicom in base_class.contents.fget(collection):
dicom.select_metadata(selected_keys)
for key, val in dicom.metadata.items():
if val != collated[key]:
try:
prev_val = collated[key]
except KeyError:
collated[key] = val # Insert initial value (should only happen on first iter)
key_repeats.update([key])
else:
if key in varying_keys:
collated[key].append(val)
# Check whether the value is the same as the values in the previous
# images in the series
if (
not isinstance(collated[key], list)
or isinstance(val, list)
and not isinstance(collated[key][0], list)
):
collated[key] = [collated[key]] * i + [val]
collated[key].append(val)
elif val != prev_val:
collated[key] = [prev_val] * key_repeats[key] + [val]
varying_keys.add(key)
else:
key_repeats.update([key])
return collated

0 comments on commit a93cbdc

Please sign in to comment.