Skip to content

Commit

Permalink
Merged in feature/memory_efficient_stack (pull request #301)
Browse files Browse the repository at this point in the history
Feature/memory efficient DICOM stack

Approved-by: Randy Taylor
  • Loading branch information
jrkerns committed Nov 16, 2023
2 parents 086688d + 8529fdc commit 9b786b5
Show file tree
Hide file tree
Showing 11 changed files with 282 additions and 108 deletions.
2 changes: 1 addition & 1 deletion bitbucket-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ definitions:
name: Run CBCT Tests
script:
- source venv/bin/activate
- pytest tests_basic/test_cbct.py --cov=pylinac.cbct --cov-report term --junitxml=./test-reports/pytest_results.xml
- pytest tests_basic/test_cbct.py -n 2 --cov=pylinac.cbct --cov-report term --junitxml=./test-reports/pytest_results.xml
caches:
- testfiles
condition:
Expand Down
12 changes: 12 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
Changelog
=========

v 3.18.0
--------

CT
^^

* CatPhan, Quart, Cheese, and ACR phantom analyses now have a new parameter option: ``memory_efficient_mode``.
This mode will use dramatically less memory than the default implementation. This is useful for large datasets
or limited resources on the machine running the process. This does come at a ~25-80% speed penalty depending on the
size of the dataset. Larger datasets will have a larger penalty.


v 3.17.0
--------

Expand Down
202 changes: 124 additions & 78 deletions pylinac/core/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import warnings
from collections import Counter
from datetime import datetime
from functools import cached_property
from io import BufferedReader, BytesIO
from pathlib import Path
from typing import Any, BinaryIO, Iterable, Sequence, Union
Expand Down Expand Up @@ -1096,15 +1097,6 @@ def dpmm(self) -> float:
)
return 1 / (10 * self.properties["PixelHeight"])

@property
def dpmm(self) -> float:
"""The dots/mm value of the XIM images. The value appears to be in cm in the file."""
if self.properties["PixelWidth"] != self.properties["PixelHeight"]:
raise ValueError(
"The XIM image does not have the same pixel height and width"
)
return 1 / (10 * self.properties["PixelHeight"])

def save_as(self, file: str, format: str | None = None) -> None:
"""Save the image to a NORMAL format. PNG is highly suggested. Accepts any format supported by Pillow.
Ironically, an equivalent PNG image (w/ metadata) is ~50% smaller than an .xim image.
Expand Down Expand Up @@ -1246,10 +1238,7 @@ def save(self, filename: str | Path) -> str | Path:
@property
def z_position(self) -> float:
"""The z-position of the slice. Relevant for CT and MR images."""
try:
return self.metadata.ImagePositionPatient[-1]
except AttributeError:
return self.metadata.SliceLocation
return z_position(self.metadata)

@property
def slice_spacing(self) -> float:
Expand Down Expand Up @@ -1552,7 +1541,114 @@ def __sub__(self, other):
return ArrayImage(self.array - other.array)


class DicomImageStack:
class LazyDicomImageStack:
_image_path_keys: list[Path]
metadatas: list[pydicom.Dataset]

def __init__(
self,
folder: str | Path,
dtype: np.dtype | None = None,
min_number: int = 39,
check_uid: bool = True,
):
"""Load a folder with DICOM CT images. This variant is more memory efficient than the standard DicomImageStack.
This is done by loading images from disk on the fly. This assumes all images remain on disk for the lifetime of the instance. This does not
need to be true for the original implementation.
See the documentation for DicomImageStack for parameter descriptions.
"""
self.dtype = dtype
paths = []
# load in images in their received order
if isinstance(folder, (list, tuple)):
paths = folder
elif osp.isdir(folder):
for pdir, sdir, files in os.walk(folder):
for file in files:
paths.append(osp.join(pdir, file))
# we only want to read the metadata once
# so we read it here and then filter and sort
metadatas, paths = self._get_path_metadatas(paths)

# check that at least 1 image was loaded
if len(paths) < 1:
raise FileNotFoundError(
f"No files were found in the specified location: {folder}"
)

# error checking
if check_uid:
most_common_uid = self._get_common_uid_imgs(metadatas, min_number)
metadatas = [m for m in metadatas if m.SeriesInstanceUID == most_common_uid]
# sort according to physical order
order = np.argsort([m.ImagePositionPatient[-1] for m in metadatas])
self.metadatas = [metadatas[i] for i in order]
self._image_path_keys = [paths[i] for i in order]

@classmethod
def from_zip(cls, zip_path: str | Path, dtype: np.dtype | None = None):
"""Load a DICOM ZIP archive.
Parameters
----------
zip_path : str
Path to the ZIP archive.
dtype : dtype, None, optional
The data type to cast the image data as. If None, will use whatever raw image format is.
"""
with TemporaryZipDirectory(zip_path, delete=False) as tmpzip:
obj = cls(tmpzip, dtype)
return obj

def _get_common_uid_imgs(
self, metadata: list[pydicom.Dataset], min_number: int
) -> pydicom.DataElement:
"""Check that all the images are from the same study."""
most_common_uid = Counter(i.SeriesInstanceUID for i in metadata).most_common(1)[
0
]
if most_common_uid[1] < min_number:
raise ValueError(
"The minimum number images from the same study were not found"
)
return most_common_uid[0]

def _get_path_metadatas(
self, paths: list[Path]
) -> (list[pydicom.Dataset], list[Path]):
"""Get the metadata for the images. This also filters out non-image files."""
metadata = []
matched_paths = []
for path in paths:
try:
ds = pydicom.dcmread(path, force=True, stop_before_pixels=True)
if "Image Storage" in ds.SOPClassUID.name:
metadata.append(ds)
matched_paths.append(path)
except (InvalidDicomError, AttributeError, MemoryError):
pass
return metadata, matched_paths

def side_view(self, axis: int) -> np.ndarray:
"""Return the side view of the stack. E.g. if axis=0, return the maximum value along the 0th axis."""
return np.stack([i for i in self], axis=-1).max(axis=axis)

@cached_property
def metadata(self) -> pydicom.FileDataset:
"""The metadata of the first image; shortcut attribute. Only attributes that are common throughout the stack should be used,
otherwise the individual image metadata should be used."""
return self[0].metadata

def __getitem__(self, item: int) -> DicomImage:
return DicomImage(self._image_path_keys[item], dtype=self.dtype)

def __len__(self):
return len(self._image_path_keys)


class DicomImageStack(LazyDicomImageStack):
"""A class that loads and holds a stack of DICOM images (e.g. a CT dataset). The class can take
a folder or zip file and will read CT images. The images must all be the same size. Supports
indexing to individual images.
Expand Down Expand Up @@ -1598,31 +1694,11 @@ def __init__(
dtype : dtype, None, optional
The data type to cast the image data as. If None, will use whatever raw image format is.
"""
self.images = []
paths = []
# load in images in their received order
if isinstance(folder, (list, tuple)):
paths = folder
elif osp.isdir(folder):
for pdir, sdir, files in os.walk(folder):
for file in files:
paths.append(osp.join(pdir, file))
for path in paths:
if self.is_image_slice(path):
img = DicomImage(path, dtype=dtype, raw_pixels=raw_pixels)
self.images.append(img)

# check that at least 1 image was loaded
if len(self.images) < 1:
raise FileNotFoundError(
f"No files were found in the specified location: {folder}"
)

# error checking
if check_uid:
self.images = self._check_number_and_get_common_uid_imgs(min_number)
# sort according to physical order
self.images.sort(key=lambda x: x.metadata.ImagePositionPatient[-1])
super().__init__(folder, dtype, min_number, check_uid)
self.images = [
DicomImage(path, dtype=dtype, raw_pixels=raw_pixels)
for path in self._image_path_keys
]

@classmethod
def from_zip(cls, zip_path: str | Path, dtype: np.dtype | None = None):
Expand All @@ -1639,44 +1715,12 @@ def from_zip(cls, zip_path: str | Path, dtype: np.dtype | None = None):
obj = cls(tmpzip, dtype)
return obj

@staticmethod
def is_image_slice(file: str | Path) -> bool:
"""Test if the file is a CT Image storage DICOM file."""
try:
ds = pydicom.dcmread(file, force=True, stop_before_pixels=True)
return "Image Storage" in ds.SOPClassUID.name
except (InvalidDicomError, AttributeError, MemoryError):
return False

def _check_number_and_get_common_uid_imgs(self, min_number: int) -> list:
"""Check that all the images are from the same study."""
most_common_uid = Counter(
i.metadata.SeriesInstanceUID for i in self.images
).most_common(1)[0]
if most_common_uid[1] < min_number:
raise ValueError(
"The minimum number images from the same study were not found"
)
return [
i for i in self.images if i.metadata.SeriesInstanceUID == most_common_uid[0]
]

def plot(self, slice: int = 0) -> None:
"""Plot a slice of the DICOM dataset.
Parameters
----------
slice : int
The slice to plot.
"""
self.images[slice].plot()

def plot_3view(self):
"""Plot the stack in 3 views: axial, coronal, and sagittal."""
fig, axes = plt.subplots(1, 3)
names = ("Coronal", "Sagittal", "Axial")
for idx, (ax, name) in enumerate(zip(axes, names)):
arry = np.stack(self.images, axis=-1).max(axis=idx)
arry = self.side_view(idx)
ax.imshow(arry, cmap="gray", aspect="equal")
ax.set_title(name)
plt.show()
Expand All @@ -1685,12 +1729,6 @@ def roll(self, direction: str, amount: int):
for img in self.images:
img.roll(direction, amount)

@property
def metadata(self) -> pydicom.FileDataset:
"""The metadata of the first image; shortcut attribute. Only attributes that are common throughout the stack should be used,
otherwise the individual image metadata should be used."""
return self.images[0].metadata

def __getitem__(self, item) -> DicomImage:
return self.images[item]

Expand Down Expand Up @@ -1858,3 +1896,11 @@ def gamma_2d(
capital_gammas.append(capital_gamma)
gamma[row_idx, col_idx] = min(np.nanmin(capital_gammas), gamma_cap_value)
return np.asarray(gamma)


def z_position(metadata: pydicom.Dataset) -> float:
"""The 'z-position' of the image. Relevant for CT and MR images."""
try:
return metadata.ImagePositionPatient[-1]
except AttributeError:
return metadata.SliceLocation
24 changes: 21 additions & 3 deletions pylinac/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import os.path as osp
import struct
import tempfile
import zipfile
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -83,16 +84,33 @@ def retrieve_dicom_file(file: str | Path | BinaryIO) -> pydicom.FileDataset:
class TemporaryZipDirectory(TemporaryDirectory):
"""Creates a temporary directory that unpacks a ZIP archive. Shockingly useful"""

def __init__(self, zfile: str | Path | BinaryIO):
def __init__(self, zfile: str | Path | BinaryIO, delete: bool = True):
"""
Parameters
----------
zfile : str
String that points to a ZIP archive.
delete : bool
Whether to delete the temporary directory when the context manager exits.
"""
super().__init__()
# this is a hack. In Python 3.12, we can pass the delete argument to TemporaryDirectory directly
self.delete = delete
if delete:
super().__init__()
name = self.name
else:
# don't use the super() call because it will delete the directory
# instead, make a temporary directory ourselves and set the name
name = tempfile.mkdtemp()
self.name = name
zfiles = zipfile.ZipFile(zfile)
zfiles.extractall(path=self.name)
zfiles.extractall(path=name)

def cleanup(self) -> None:
if self.delete:
super().cleanup()
else:
pass


def retrieve_filenames(
Expand Down
Loading

0 comments on commit 9b786b5

Please sign in to comment.