diff --git a/extras/fileformats/extras/medimage/__init__.py b/extras/fileformats/extras/medimage/__init__.py index bd5bed0..b11785f 100644 --- a/extras/fileformats/extras/medimage/__init__.py +++ b/extras/fileformats/extras/medimage/__init__.py @@ -5,3 +5,4 @@ from . import diffusion from . import nifti from . import raw +from . import base diff --git a/extras/fileformats/extras/medimage/base.py b/extras/fileformats/extras/medimage/base.py index 80ab157..e297b4c 100644 --- a/extras/fileformats/extras/medimage/base.py +++ b/extras/fileformats/extras/medimage/base.py @@ -6,7 +6,7 @@ @extra_implementation(MedicalImage.deidentify) -def dicom_deidentify( +def no_deidentification_necessary( image: MedicalImage, out_dir: ty.Optional[Path] = None, new_stem: ty.Optional[str] = None, @@ -15,8 +15,13 @@ def dicom_deidentify( """Assume that no deidentification is needed for medical images by default. We make a copy of the image in the output directory for consistency with the behavior of other deidentification formats""" + if image.contains_phi: + raise NotImplementedError( + f"{type(image)} images contain Protected Health Information (PHI) and needs a " + "specific deidentification method" + ) if out_dir is None: out_dir = Path(tempfile.mkdtemp()) out_dir.mkdir(exist_ok=True, parents=True) - image.copy(out_dir, new_stem=new_stem, mode=copy_mode) - return image + cpy = image.copy(out_dir, new_stem=new_stem, mode=copy_mode) + return cpy diff --git a/extras/fileformats/extras/medimage/dicom.py b/extras/fileformats/extras/medimage/dicom.py index 03643ab..e284374 100644 --- a/extras/fileformats/extras/medimage/dicom.py +++ b/extras/fileformats/extras/medimage/dicom.py @@ -96,7 +96,8 @@ def dicom_deidentify( out_dir = Path(tempfile.mkdtemp()) out_dir.mkdir(parents=True, exist_ok=True) dcm = dicom.load() - dcm.PatientBirthDate = "" # dcm.PatientBirthDate[:4] + "0101" + dcm.PatientBirthDate = dcm.PatientBirthDate[:4] + "0101" + dcm.PatientName = "Anonymous^Anonymous" for field in FIELDS_TO_DEIDENTIFY: try: elem = dcm[field] @@ -155,7 +156,7 @@ def dicom_collection_deidentify( ("0008", "009C"), # Consulting Physician's Name ("0008", "1070"), # Operators' Name ("0010", "4000"), # Patient Comments - ("0010", "0010"), # Patient's Name + # ("0010", "0010"), # Patient's Name ("0010", "0021"), # Issuer of Patient ID ("0010", "0032"), # Patient's Birth Time ("0010", "0050"), # Patient's Insurance Plan Code SQ diff --git a/fileformats/medimage/base.py b/fileformats/medimage/base.py index 29eb413..0e8d89f 100644 --- a/fileformats/medimage/base.py +++ b/fileformats/medimage/base.py @@ -2,7 +2,6 @@ import typing as ty from pathlib import Path import logging -from typing_extensions import Self from fileformats.core import extra, FileSet, mtime_cached_property from fileformats.core.mixin import WithClassifiers from .contents import ContentsClassifier @@ -16,6 +15,10 @@ from typing import TypeAlias else: from typing_extensions import TypeAlias +if sys.version_info >= (3, 12): + from typing import Self +else: + from typing_extensions import Self if ty.TYPE_CHECKING: import numpy.typing # noqa: F401 @@ -39,6 +42,7 @@ class MedicalImage(WithClassifiers, FileSet): image_contents = () allowed_classifiers = (ContentsClassifier,) exclusive_classifiers = (ImagingModality, AnatomicalEntity, Derivative) + contains_phi: bool = True @extra def read_array(self) -> DataArrayType: diff --git a/fileformats/medimage/dicom.py b/fileformats/medimage/dicom.py index 271d359..ab50829 100644 --- a/fileformats/medimage/dicom.py +++ b/fileformats/medimage/dicom.py @@ -1,7 +1,7 @@ +import sys import typing as ty from collections import defaultdict, Counter from pathlib import Path -from typing_extensions import Self, TypeAlias from fileformats.core.decorators import mtime_cached_property from fileformats.core import extra, FileSet, extra_implementation from fileformats.core.collection import TypedCollection @@ -9,6 +9,15 @@ from fileformats.application import Dicom from .base import MedicalImage +if sys.version_info >= (3, 9): + from typing import TypeAlias +else: + from typing_extensions import TypeAlias +if sys.version_info >= (3, 12): + from typing import Self +else: + from typing_extensions import Self + if ty.TYPE_CHECKING: import pydicom.tag diff --git a/fileformats/medimage/misc.py b/fileformats/medimage/misc.py index e869e1c..f7f3646 100644 --- a/fileformats/medimage/misc.py +++ b/fileformats/medimage/misc.py @@ -14,6 +14,7 @@ class AnalyzeHeader(BinaryFile): ext = ".hdr" + contains_phi = False class Analyze(WithSeparateHeader, MedicalImage, BinaryFile): diff --git a/fileformats/medimage/nifti.py b/fileformats/medimage/nifti.py index 0129ab8..81ef938 100644 --- a/fileformats/medimage/nifti.py +++ b/fileformats/medimage/nifti.py @@ -9,6 +9,7 @@ class Nifti(MedicalImage, BinaryFile): ext: str = ".nii" + contains_phi = False class WithBids(WithSideCars): diff --git a/fileformats/medimage/raw/pet/base.py b/fileformats/medimage/raw/pet/base.py index 9a0addf..407fe8c 100644 --- a/fileformats/medimage/raw/pet/base.py +++ b/fileformats/medimage/raw/pet/base.py @@ -1,9 +1,29 @@ +import sys +import typing as ty +from pathlib import Path +from fileformats.core import FileSet, extra from fileformats.generic import BinaryFile +if sys.version_info >= (3, 12): + from typing import Self +else: + from typing_extensions import Self + class PetRawData(BinaryFile): """Base class for raw PET data files""" + @extra + def deidentify( + self, + out_dir: ty.Optional[Path] = None, + new_stem: ty.Optional[str] = None, + copy_mode: FileSet.CopyMode = FileSet.CopyMode.copy, + ) -> Self: + """Returns a new copy of the data with any subject-identifying information + stripped from the from the data header""" + raise NotImplementedError + class PetListMode(PetRawData): "raw projection data" diff --git a/pyproject.toml b/pyproject.toml index 4cdb23d..29bf491 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "fileformats-medimage" description = "Classes for representing various medical imaging file formats in Python classes for use in type hinting in data workflows" readme = "README.rst" requires-python = ">=3.8" -dependencies = ["fileformats >=0.7.1"] +dependencies = ["fileformats >=0.14.1"] license = { file = "LICENSE" } authors = [{ name = "Thomas G. Close", email = "tom.g.close@gmail.com" }] maintainers = [{ name = "Thomas G. Close", email = "tom.g.close@gmail.com" }] @@ -29,7 +29,14 @@ classifiers = [ dynamic = ["version"] [project.optional-dependencies] -dev = ["black", "pre-commit", "codespell", "flake8", "flake8-pyproject"] +dev = [ + "black", + "pre-commit", + "codespell", + "flake8", + "flake8-pyproject", + "typing_extensions >= 4.6", +] test = [ "pytest >=6.2.5", "pytest-env>=0.6.2",