Skip to content

Commit

Permalink
Prepare_data program now handles npz files, not just npy.
Browse files Browse the repository at this point in the history
  • Loading branch information
ungi committed Jun 11, 2024
1 parent f331bc3 commit db94eaa
Show file tree
Hide file tree
Showing 3 changed files with 284 additions and 127 deletions.
21 changes: 18 additions & 3 deletions UltrasoundSegmentation/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@
input_dir = args.input_dir
data_files = []
for seg_filename in os.listdir(input_dir):
if seg_filename.endswith(".npy") and "_segmentation" in seg_filename:
if (seg_filename.endswith(".npy") or seg_filename.endswith(".npz")) and "_segmentation" in seg_filename:
data_files.append(os.path.join(input_dir, seg_filename))

print(f"Found {len(data_files)} data files.")
print(f"Found {len(data_files)} segmentation files.")

# Read config file

Expand All @@ -66,19 +66,23 @@
with open(os.path.join(args.output_dir, "prepare_data_config.yaml"), "w") as f:
yaml.dump(config, f)


# Read input files, process and filter data, and save new data to disk

for seg_filename in tqdm(data_files):
data = np.load(seg_filename)
if isinstance(data, np.lib.npyio.NpzFile):
data = data[data.files[0]]
logging.info(f"Loaded {seg_filename} with shape {data.shape} and value range {np.min(data)} - {np.max(data)}")

# Filter data. Keep only segmented ultrasound images with indices stored in _indices.npy file.

indices_filename = seg_filename.replace("_segmentation", "_indices")
if os.path.exists(indices_filename):
indices = np.load(indices_filename)
if isinstance(indices, np.lib.npyio.NpzFile):
indices = indices[indices.files[0]]
logging.info(f"Loaded {indices_filename} with shape {indices.shape}")
logging.info(f"First 10 indices: {indices[:10]}")
data = data[indices, :, :, :]
logging.info(f"Filtered data to shape {data.shape}")
else:
Expand All @@ -94,8 +98,11 @@
# Save resized images to disk

output_filename = os.path.join(args.output_dir, os.path.basename(seg_filename))
if seg_filename.endswith(".npz"):
output_filename = output_filename.replace(".npz", ".npy")
np.save(output_filename, resized_data)
logging.info(f"Saved {output_filename} with shape {resized_data.shape} and value range {np.min(resized_data)} - {np.max(resized_data)}")
logging.info(f"Data type of {output_filename}: {resized_data.dtype}")

# Find matching ultrasound file and read ultrasound data

Expand All @@ -105,6 +112,8 @@
sys.exit(1)

ultrasound_data = np.load(ultrasound_filename)
if isinstance(ultrasound_data, np.lib.npyio.NpzFile):
ultrasound_data = ultrasound_data[ultrasound_data.files[0]]
logging.info(f"Loaded {ultrasound_filename} with shape {ultrasound_data.shape} and value range {np.min(ultrasound_data)} - {np.max(ultrasound_data)}")

# Keep only ultrasound images that have a corresponding segmentation image, with preceding ultrasound frames as requested in separate channels
Expand All @@ -124,6 +133,8 @@
# Save resized images to disk

output_filename = os.path.join(args.output_dir, os.path.basename(ultrasound_filename))
if ultrasound_filename.endswith(".npz"):
output_filename = output_filename.replace(".npz", ".npy")
np.save(output_filename, resized_data)
logging.info(f"Saved {output_filename} with shape {resized_data.shape} and value range {np.min(resized_data)} - {np.max(resized_data)}")

Expand All @@ -132,9 +143,13 @@
transform_filename = seg_filename.replace("_segmentation", "_transform")
if os.path.exists(transform_filename):
transform_data = np.load(transform_filename)
if isinstance(transform_data, np.lib.npyio.NpzFile):
transform_data = transform_data[transform_data.files[0]]
logging.info(f"Loaded {transform_filename} with shape {transform_data.shape}")
transform_data = transform_data[indices, :, :]
output_filename = os.path.join(args.output_dir, os.path.basename(transform_filename))
if transform_filename.endswith(".npz"):
output_filename = output_filename.replace(".npz", ".npy")
np.save(output_filename, transform_data)
logging.info(f"Saved {output_filename} with shape {transform_data.shape}")
else:
Expand Down
2 changes: 1 addition & 1 deletion UltrasoundSegmentation/prepare_data_config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Example config file for prepare_data.py

image_size: !!int 128
image_size: !!int 512
num_preceding_ultrasound_frames: !!int 0 # Number of extra ultrasound images preceding segmented image to be exported
388 changes: 265 additions & 123 deletions UltrasoundSegmentation/test_datafiles.ipynb

Large diffs are not rendered by default.

0 comments on commit db94eaa

Please sign in to comment.