diff --git a/pyproject.toml b/pyproject.toml index 6b12efe6..06ac964e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ "dask-image", "pyarrow", "readfcs", + "ome-types", + "aicsimageio", "tifffile>=2023.8.12", ] diff --git a/scripts/macsima2m.py b/scripts/macsima2m.py new file mode 100644 index 00000000..214208d8 --- /dev/null +++ b/scripts/macsima2m.py @@ -0,0 +1,660 @@ +#!/usr/bin/python +import argparse +import copy +import os +import platform +from pathlib import Path +from uuid import uuid4 + +import numpy as np +import ome_types +import ome_types.model +import pandas as pd + +# from PIL import Image +# import PIL +# from PIL.TiffTags import TAGS +import tifffile as tifff +from bs4 import BeautifulSoup +from ome_types import to_xml +from ome_types.model import OME, Channel, Image, Pixels, Plane, TiffData + +# ---CLI-BLOCK---# +parser = argparse.ArgumentParser() + + +parser.add_argument( + "-i", + "--input", + required=True, + help="Directory containing the antigen & bleaching cycles. Use frontslash to specify path.", +) + +parser.add_argument( + "-o", + "--output", + required=True, + help="Directory where the stacks will be saved. Use frontslash to specify path.\ + If directory does not exist it will be created.", +) + +parser.add_argument( + "-c", + "--cycles", + required=True, + type=int, + nargs="*", + help="By default this input accepts two integer numbers which mark the \ + start and end of the cycles to be taken. Alternatively, if the flag -il is activated \ + this input will accept a list of any number of specific cycles.", +) + + +parser.add_argument( + "-ofl", + "--output_folders_list", + action="store_true", + help="Activate this flag to save the output images in a list of folders rather than in a tree structure. The list structure facilitates pointing to the files to run a batch job.", +) + +parser.add_argument( + "-il", + "--input_mode_list", + action="store_true", + help="Activate this flag to provide the cycles argument a list of specific cycles of interest.", +) + +parser.add_argument( + "-he", + "--hi_exposure_only", + action="store_true", + help="Activate this flag to extract only the set of images with the highest exposure time.", +) + +parser.add_argument( + "-nb", + "--no_bleach_cycles", + action="store_false", + help="Activate this flag to deactivate the extraction of the bleaching cycles, i.e \ + only the antigen images will be extracted.", +) + +args = parser.parse_args() +# ---END_CLI-BLOCK---# + + +# ------INPUT BLOCK-------# +device_name = "MACSIMA" +cycles_dir = Path(args.input) +stack_path = Path(args.output) +condition = args.input_mode_list + +if condition == False and len(args.cycles) == 2: + start = min(args.cycles) + end = max(args.cycles) + antigen_cycle_number = list(range(start, 1 + end)) + +elif condition == True: + antigen_cycle_number = args.cycles + +else: + print( + "Wrong input, try one of the following: \n", + "1) Range mode: Give only two numbers to the cycles argument to define the start and end of cycles to be stacked.\n", + "2) List mode: Activate the optional argument -il in the command line so the numbers are read as a list of specific cycles.", + ) + +# ------ ENDINPUT BLOCK----# + +if os.path.exists(stack_path): + pass +else: + os.mkdir(stack_path) + + +# ---- HELPER FUNCTIONS ----# + + +def antigen_cycle_info(antigen_cycle_no=antigen_cycle_number, cycles_path=cycles_dir, ref_marker="DAPI"): + antigen_cycle = f"{antigen_cycle_no:03d}" + cycle_folder = "_".join([antigen_cycle, "AntigenCycle"]) + workdir = os.path.join(cycles_path, cycle_folder) + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(workdir))) + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } + + for im in images: + + marker_info = im.split("AntigenCycle")[-1].split("__") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(workdir, im)) + # marker and fluorophore () + m = marker_info[0].strip("_") + if ref_marker in m: + cycle_info["marker"].append(ref_marker) + cycle_info["filter"].append(ref_marker) + else: + cycle_info["marker"].append(m) + # cycle_info['filter'].append(marker_info[-1].split('_')[2]) + cycle_info["filter"].append(marker_info[-1].split("bit")[0].split("_")[-2]) + + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + # info.to_csv(os.path.join(cycles_path,'test_antigen.csv')) + + for m in markers_subset: + exposure = info.loc[info["marker"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["marker"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + + return info + + +def bleach_cycle_info(antigen_cycle_no=antigen_cycle_number, cycles_path=cycles_dir, ref_marker="DAPI"): + bleach_cycle_no = antigen_cycle_no + bleach_cycle = f"{bleach_cycle_no:03d}" + cycle_folder = "_".join([bleach_cycle, "BleachCycle"]) + workdir = os.path.join(cycles_path, cycle_folder) + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(workdir))) + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } + + for im in images: + + marker_info = im.split("BleachCycle")[-1].split("_") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(workdir, im)) + # marker and fluorophore + # marker_info=['', 'DAPI', 'V0', 'DAPI', '16bit', 'M-20x-S Fluor full sensor', 'B-1', 'R-1', 'W-2', 'G-1', 'F-10', 'E-16.0.tif'] + m = marker_info[1] + if m == ref_marker: + cycle_info["marker"].append(ref_marker) + else: + cycle_info["marker"].append("_".join([bleach_cycle, "bleach", marker_info[3]])) + cycle_info["filter"].append(marker_info[3]) + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["filter"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + # info.to_csv(os.path.join(cycles_path,'test_bleach.csv')) + + for m in markers_subset: + exposure = info.loc[info["filter"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["filter"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + + return info + + +def create_dir(dir_path): + if os.path.exists(dir_path): + pass + else: + os.mkdir(dir_path) + + +def create_ome(img_info, xy_tile_positions_units, img_path): + img_name = img_info["name"] + img_info["device"] + no_of_channels = img_info["no_channels"] + img_size = img_info["xy_img_size_pix"] + markers = img_info["markers"] + exposures = img_info["exposure_times"] + bit_depth = img_info["bit_depth"] + pixel_size = img_info["pix_size"] + pixel_units = img_info["pix_units"] + sig_bits = img_info["sig_bits"] + if pixel_units == "mm": + pixel_size = 1000 * pixel_size + # pixel_units='um' + no_of_tiles = len(xy_tile_positions_units) + tifff.tiffcomment(img_path, "") + # --Generate tiff_data_blocks--# + tiff_block = [] + UUID = ome_types.model.TiffData.UUID(file_name=img_name, value=uuid4().urn) + for ch in range(0, no_of_channels): + tiff_block.append(TiffData(first_c=ch, ifd=ch, plane_count=1, uuid=UUID)) + # --Generate planes block (contains the information of each tile)--# + plane_block = [] + # length_units=ome_types.model.simple_types.UnitsLength('µm') + for ch in range(0, no_of_channels): + plane_block.append( + Plane( + the_c=ch, + the_t=0, + the_z=0, + position_x=0, # x=0 is just a place holder + position_y=0, # y=0 is just a place holder + position_z=0, + exposure_time=0, + # position_x_unit=pixel_units, + # position_y_unit=pixel_units + ) + ) + # --Generate channels block--# + chann_block = [] + for ch in range(0, no_of_channels): + chann_block.append( + Channel( + id=f"Channel:{ch}", + color=ome_types.model.Color((255, 255, 255)), + emission_wavelength=1, # place holder + excitation_wavelength=1, # place holder + ) + ) + # --Generate pixels block--# + pix_block = [] + ifd_counter = 0 + for t in range(0, no_of_tiles): + template_plane_block = copy.deepcopy(plane_block) + template_chann_block = copy.deepcopy(chann_block) + template_tiffdata_block = copy.deepcopy(tiff_block) + for ch, mark in enumerate(markers): + template_plane_block[ch].position_x = xy_tile_positions_units[t][0] + template_plane_block[ch].position_y = xy_tile_positions_units[t][1] + template_plane_block[ch].exposure_time = exposures[ch] + template_chann_block[ch].id = f"Channel:{100 + t}:{ch}:{mark}" + template_tiffdata_block[ch].ifd = ifd_counter + ifd_counter += 1 + pix_block.append( + Pixels( + id=f"Pixels:{t}", + dimension_order=ome_types.model.Pixels_DimensionOrder("XYCZT"), + size_c=no_of_channels, + size_t=1, + size_x=img_size[0], + size_y=img_size[1], + size_z=1, + type=bit_depth, + big_endian=False, + channels=template_chann_block, + interleaved=False, + physical_size_x=pixel_size, + # physical_size_x_unit=pixel_units, + physical_size_y=pixel_size, + # physical_size_y_unit=pixel_units, + physical_size_z=1.0, + planes=template_plane_block, + significant_bits=sig_bits, + tiff_data_blocks=template_tiffdata_block, + ) + ) + # --Generate image block--# + img_block = [] + for t in range(0, no_of_tiles): + img_block.append(Image(id=f"Image:{t}", pixels=pix_block[t])) + # --Create the OME object with all prebiously defined blocks--# + ome_custom = OME() + ome_custom.creator = " ".join( + [ome_types.__name__, ome_types.__version__, "/ python version-", platform.python_version()] + ) + ome_custom.images = img_block + ome_custom.uuid = uuid4().urn + ome_xml = to_xml(ome_custom) + tifff.tiffcomment(img_path, ome_xml) + + +def setup_coords(x, y, pix_units): + if pix_units == "mm": + x_norm = 1000 * (np.array(x) - np.min(x)) # /pixel_size + y_norm = 1000 * (np.array(y) - np.min(y)) # /pixel_size + x_norm = np.rint(x_norm).astype("int") + y_norm = np.rint(y_norm).astype("int") + # invert y + y_norm = np.max(y_norm) - y_norm + xy_tile_positions = [(i, j) for i, j in zip(x_norm, y_norm)] + return xy_tile_positions + + +def tile_position(metadata_string): + # meta_dict = {TAGS[key] : img.tag[key] for key in img.tag_v2} + # ome=BeautifulSoup(meta_dict['ImageDescription'][0],'xml') + # TODO replace with ome_types + ome = BeautifulSoup(metadata_string, "xml") + x = float(ome.StageLabel["X"]) + y = float(ome.StageLabel["Y"]) + stage_units = ome.StageLabel["XUnit"] + pixel_size = float(ome.Pixels["PhysicalSizeX"]) + pixel_units = ome.Pixels["PhysicalSizeXUnit"] + bit_depth = ome.Pixels["Type"] + significantBits = int(ome.Pixels["SignificantBits"]) + tile_info = { + "x": x, + "y": y, + "stage_units": stage_units, + "pixel_size": pixel_size, + "pixel_units": pixel_units, + "bit_depth": bit_depth, + "sig_bits": significantBits, + } + return tile_info + + +def create_stack( + info, + exp_level=1, + antigen_cycle_no=antigen_cycle_number, + cycles_path=cycles_dir, + isbleach=False, + offset=0, + device=device_name, + ref_marker="DAPI", + results_path=stack_path, +): + + racks = info["rack"].unique() + wells = info["well"].unique() + antigen_cycle = f"{antigen_cycle_no:03d}" + if isbleach: + cycle_prefix = "Bleach" + else: + cycle_prefix = "Antigen" + + cycle_folder = "_".join([antigen_cycle, cycle_prefix + "Cycle"]) + os.path.join(cycles_path, cycle_folder) + # img_ref=PIL.Image.open(info['img_full_path'][0]) + # width=img_ref.width + # height=img_ref.height + # dtype_ref=tifff.imread(info['img_full_path'][0]).dtype + with tifff.TiffFile(info["img_full_path"][0]) as tif: + img_ref = tif.pages[0].asarray() + width = img_ref.shape[1] + height = img_ref.shape[0] + dtype_ref = img_ref.dtype + ref_marker = list(info.loc[info["exposure_level"] == "ref", "marker"])[0] + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + sorted_markers = np.insert(markers_subset, 0, ref_marker) + sorted_filters = [] + for m in sorted_markers: + sorted_filters.append(info.loc[info["marker"] == m, "filter"].tolist()[0]) + if isbleach: + target_name = "filters" + target = "_".join(sorted_filters) + else: + target_name = "markers" + target = "_".join(sorted_markers) + + for r in racks: + output_levels = [] + rack_no = "rack_{n}".format(n=f"{r:02d}") + output_levels.append(rack_no) + # rack_path=stack_path / 'rack_{n}'.format(n=f'{r:02d}') + # create_dir(rack_path) + + for w in wells: + + well_no = "well_{n}".format(n=f"{w:02d}") + output_levels.append(well_no) + # well_path=rack_path / 'well_{n}'.format(n=f'{w:02d}') + # create_dir(well_path) + + groupA = info.loc[(info["rack"] == r) & (info["well"] == w)] + rois = groupA["roi"].unique() + + for roi in rois: + roi_no = "roi_{n}".format(n=f"{roi:02d}") + exp_level_no = "exp_level_{n}".format(n=f"{exp_level:02d}") + # roi_path=well_path / 'roi_{n}'.format(n=f'{roi:02d}') + # create_dir(roi_path) + # exposure_path=roi_path / 'exp_level_{n}'.format(n=f'{exp_level:02d}') + # create_dir(exposure_path) + output_levels.append(roi_no) + output_levels.append(exp_level_no) + + counter = 0 + groupB = groupA.loc[groupA["roi"] == roi] + A = groupB.loc[(groupB["exposure_level"] == "ref")] + stack_size_z = A.shape[0] * len(markers) + fov_id = groupB.loc[groupB["marker"] == ref_marker, "fov"].unique() + fov_id = np.sort(fov_id) + + stack_name = "cycle-{C}-{prefix}-exp-{E}-rack-{R}-well-{W}-roi-{ROI}-{T}-{M}.{img_format}".format( + C=f"{(offset+antigen_cycle_no):03d}", + prefix=cycle_prefix, + E=exp_level, + R=r, + W=w, + ROI=roi, + T=target_name, # markers or filters + M=target, + img_format="ome.tiff", + ) + + X = [] + Y = [] + stack = np.zeros((stack_size_z, height, width), dtype=dtype_ref) + + exposure_per_marker = [] + exp = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == 1) & (groupB["exposure_level"] == "ref"), + "exposure", + ].tolist()[0] + exposure_per_marker.append(exp) + for s in markers_subset: + exp = groupB.loc[ + (groupB["marker"] == s) & (groupB["fov"] == 1) & (groupB["exposure_level"] == exp_level), + "exposure", + ].tolist()[0] + exposure_per_marker.append(exp) + + for tile in fov_id: + + img_ref = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == tile), "img_full_path" + ].tolist() + + if len(img_ref) > 0: + # img=tifff.imread(img_ref[0]) + # img_PIL=PIL.Image.open(img_ref[0]) + with tifff.TiffFile(img_ref[0]) as tif: + img = tif.pages[0].asarray() + metadata = tif.ome_metadata + # stack[counter,:,:]=tifff.imread(img_ref[0]) + stack[counter, :, :] = img + tile_data = tile_position(metadata) + X.append(tile_data["x"]) + Y.append(tile_data["y"]) + counter += 1 + + for m in markers_subset: + img_marker = groupB.loc[ + (groupB["marker"] == m) + & (groupB["fov"] == tile) + & (groupB["exposure_level"] == exp_level), + "img_full_path", + ].tolist()[0] + img = tifff.imread(img_marker) + stack[counter, :, :] = img + counter += 1 + + if args.output_folders_list: + output_folders_path = stack_path / "--".join(output_levels) / "raw" + else: + output_folders_path = stack_path / Path("/".join(output_levels)) / "raw" + + if os.path.exists(output_folders_path): + pass + else: + os.makedirs(output_folders_path) + + final_stack_path = os.path.join(output_folders_path, stack_name) + tifff.imwrite(final_stack_path, stack, photometric="minisblack") + img_info = { + "name": stack_name, + "device": device_name, + "no_channels": len(markers), + "markers": sorted_markers, + "filters": sorted_filters, + "exposure_times": exposure_per_marker, + "xy_img_size_pix": (width, height), + "pix_size": tile_data["pixel_size"], + "pix_units": tile_data["pixel_units"], + "bit_depth": tile_data["bit_depth"], + "sig_bits": tile_data["sig_bits"], + } + create_ome(img_info, setup_coords(X, Y, img_info["pix_units"]), img_path=final_stack_path) + + return img_info + + +def markers_cycle_table(info, antigen_cycle_no=antigen_cycle_number): + ref_marker = list(info.loc[info["exposure_level"] == "ref", "marker"])[0] + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + antigens = [ref_marker] + cycles = [antigen_cycle_no] + for m in markers_subset: + cycles.append(antigen_cycle_no) + antigens.append(m) + return cycles, antigens + + +def main(): + out_ant = { + "cycle_number": [], + "marker_name": [], + "Filter": [], + "background": [], + "exposure": [], + "remove": [], + "exposure_level": [], + } + + out_ble = copy.deepcopy(out_ant) + + offset_value = 1 + max(antigen_cycle_number) + + for i in antigen_cycle_number: + antigen_info = antigen_cycle_info(antigen_cycle_no=i) + exp = antigen_info["exposure_level"].unique() + exp = exp[exp != "ref"] + exp.sort() + + if args.hi_exposure_only: + exp = [max(exp)] + else: + pass + + print("extracting antigen cycle:", i) + + extract_bleach = args.no_bleach_cycles + if extract_bleach: + bcycle = i - 1 + bleach_info = bleach_cycle_info(antigen_cycle_no=bcycle) + + for e in exp: + antigen_stack_info = create_stack(antigen_info, antigen_cycle_no=i, exp_level=e) + + out_ant["cycle_number"].extend(antigen_stack_info["no_channels"] * [i]) + out_ant["marker_name"].extend(antigen_stack_info["markers"]) + out_ant["Filter"].extend(antigen_stack_info["filters"]) + out_ant["remove"].extend(antigen_stack_info["no_channels"] * [""]) + out_ant["exposure"].extend(antigen_stack_info["exposure_times"]) + out_ant["exposure_level"].extend(antigen_stack_info["no_channels"] * [e]) + + if extract_bleach: + print("extracting bleaching cycle:", bcycle) + bleach_stack_info = create_stack( + bleach_info, antigen_cycle_no=bcycle, isbleach=True, offset=offset_value, exp_level=e + ) + background_channels = [ + "" + ] # the blank string corresponds to the reference marker, it is always the first in the sorted_markers list + for m in antigen_stack_info["filters"][1::]: + ch_name = [x for x in bleach_stack_info["markers"] if m in x] + background_channels.extend(ch_name) + + out_ant["background"].extend(background_channels) + + out_ble["background"].extend(bleach_stack_info["no_channels"] * [""]) + out_ble["cycle_number"].extend(bleach_stack_info["no_channels"] * [offset_value + bcycle]) + out_ble["marker_name"].extend(bleach_stack_info["markers"]) + out_ble["Filter"].extend(bleach_stack_info["filters"]) + out_ble["remove"].extend(bleach_stack_info["no_channels"] * ["TRUE"]) + out_ble["exposure"].extend(bleach_stack_info["exposure_times"]) + out_ble["exposure_level"].extend(bleach_stack_info["no_channels"] * [e]) + + else: + out_ant["background"].extend(antigen_stack_info["no_channels"] * [""]) + + for e in exp: + if extract_bleach: + df1 = pd.DataFrame(out_ant).groupby("exposure_level").get_group(e) + df2 = pd.DataFrame(out_ble).groupby("exposure_level").get_group(e) + df = pd.concat([df1, df2], ignore_index=True) + else: + df = pd.DataFrame(out_ant).groupby("exposure_level").get_group(e) + df.drop("exposure_level", axis=1, inplace=True) + df.insert(0, "channel_number", list(range(1, 1 + df.shape[0]))) + df.to_csv(stack_path / f"markers_exp_{e}.csv", index=False) + + +if __name__ == "__main__": + main() diff --git a/scripts/nfmacsima.py b/scripts/nfmacsima.py new file mode 100644 index 00000000..51a1c5d2 --- /dev/null +++ b/scripts/nfmacsima.py @@ -0,0 +1,534 @@ +#!/usr/bin/python +import argparse +import copy +import os +import platform +from pathlib import Path +from uuid import uuid4 + +import numpy as np +import ome_types +import ome_types.model +import pandas as pd + +# from PIL import Image +# import PIL +# from PIL.TiffTags import TAGS +import tifffile as tifff +from bs4 import BeautifulSoup +from ome_types import to_xml +from ome_types.model import OME, Channel, Image, Pixels, Plane, TiffData +from ome_types.model.simple_types import ChannelID, Color, ImageID, PixelsID +from ome_types.model.tiff_data import UUID + +# ---CLI-BLOCK---# +parser = argparse.ArgumentParser() + + +parser.add_argument( + "-i", + "--input", + nargs="*", + required=True, + help="Ordered pairs of paths to the Antigen and corresponding Bleaching cycle, e.g [002_AntigenCycle,001_BleachCycle].", +) + +parser.add_argument("-o", "--output", required=True, help="Directory to save the stack.") + +parser.add_argument("-c", "--cycle", required=True, type=int, help="Number of the antigen cycle.") + +parser.add_argument( + "-rm", + "--ref_marker", + required=False, + type=str, + default="DAPI", + help="Name of the reference marker.Default value is DAPI.", +) + +parser.add_argument( + "-he", + "--hi_exposure_only", + action="store_true", + help="Activate this flag to extract only the set of images with the highest exposure time.", +) + + +args = parser.parse_args() +# ---END_CLI-BLOCK---# + + +# ------FORMAT INPUT BLOCK-------# +device_name = "MACSIMA" +antigen_dir = Path(args.input[0]) +bleach_dir = Path(args.input[1]) +stack_path = Path(args.output) +ref_marker = args.ref_marker +cycle_no = args.cycle + +# ------ ENDINPUT BLOCK----# + +if os.path.exists(stack_path): + pass +else: + os.mkdir(stack_path) + + +# ---- HELPER FUNCTIONS ----# + + +def antigen_cycle_info(antigen_folder=antigen_dir, ref_marker=ref_marker): + + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(antigen_folder))) + + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } + + for im in images: + + # Extraction of marker name info and acquisition info (rack,well,exposure,roi) + marker_info = im.split("AntigenCycle")[-1].split("__") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(antigen_folder, im)) + # marker and fluorophore () + m = marker_info[0].strip("_") + if ref_marker in m: + cycle_info["marker"].append(ref_marker) + cycle_info["filter"].append(ref_marker) + else: + cycle_info["marker"].append(m) + cycle_info["filter"].append(marker_info[-1].split("bit")[0].split("_")[-2]) + + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + # insert the exposure level colum at the end of the info dataframe + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + # label the exposure level of the reference markers with "ref" + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + + for m in markers_subset: + exposure = info.loc[info["marker"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["marker"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + # change the data type to numeric + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + + return info + + +def bleach_cycle_info(bleach_folder=bleach_dir, ref_marker=ref_marker, antigen_cycle_no=cycle_no): + + bleach_cycle = f"{antigen_cycle_no-1:03d}" + + images = list(filter(lambda x: x.endswith(".tif"), os.listdir(bleach_folder))) + + cycle_info = { + "img_full_path": [], + "image": images, + "marker": [], + "filter": [], + "rack": [], + "well": [], + "roi": [], + "fov": [], + "exposure": [], + } + + for im in images: + + marker_info = im.split("BleachCycle")[-1].split("_") + acq_info = im.split("sensor")[-1].split("_") + # add the information to the cycle_info dictionary + # img full path + cycle_info["img_full_path"].append(os.path.join(bleach_folder, im)) + # marker and fluorophore + # marker_info=['', 'DAPI', 'V0', 'DAPI', '16bit', 'M-20x-S Fluor full sensor', 'B-1', 'R-1', 'W-2', 'G-1', 'F-10', 'E-16.0.tif'] + m = marker_info[1] + if m == ref_marker: + cycle_info["marker"].append(ref_marker) + else: + cycle_info["marker"].append("_".join([bleach_cycle, "bleach", marker_info[3]])) + cycle_info["filter"].append(marker_info[3]) + # rack + cycle_info["rack"].append(acq_info[2].split("-")[-1]) + # well + cycle_info["well"].append(acq_info[3].split("-")[-1]) + # roi + cycle_info["roi"].append(acq_info[4].split("-")[-1]) + # fov, i.e. tiles + cycle_info["fov"].append(acq_info[5].split("-")[-1]) + # exposure + exposure = cycle_info["exposure"].append(acq_info[6].split("-")[-1].strip(".tif")) + + info = pd.DataFrame(cycle_info) + + markers = info["filter"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + info.insert(len(cycle_info), "exposure_level", np.zeros(info.shape[0])) + info.loc[info["marker"] == ref_marker, "exposure_level"] = "ref" + # info.to_csv(os.path.join(cycles_path,'test_bleach.csv')) + + for m in markers_subset: + exposure = info.loc[info["filter"] == m]["exposure"].unique() + val = pd.to_numeric(exposure) + val = np.sort(val) + for level, value in enumerate(val): + info.loc[(info["filter"] == m) & (info["exposure"] == str(value)), "exposure_level"] = level + 1 + + info["rack"] = pd.to_numeric(info["rack"], downcast="unsigned") + info["well"] = pd.to_numeric(info["well"], downcast="unsigned") + info["roi"] = pd.to_numeric(info["roi"], downcast="unsigned") + info["fov"] = pd.to_numeric(info["fov"], downcast="unsigned") + info["exposure"] = pd.to_numeric(info["exposure"], downcast="unsigned") + + return info + + +def create_dir(dir_path): + if os.path.exists(dir_path): + pass + else: + os.mkdir(dir_path) + + +def create_ome(img_info, xy_tile_positions_units, img_path): + img_name = img_info["name"] + img_info["device"] + no_of_channels = img_info["no_channels"] + img_size = img_info["xy_img_size_pix"] + markers = img_info["markers"] + exposures = img_info["exposure_times"] + bit_depth = img_info["bit_depth"] + pixel_size = img_info["pix_size"] + pixel_units = img_info["pix_units"] + sig_bits = img_info["sig_bits"] + if pixel_units == "mm": + pixel_size = 1000 * pixel_size + # pixel_units='um' + no_of_tiles = len(xy_tile_positions_units) + tifff.tiffcomment(img_path, "") + # --Generate tiff_data_blocks--# + tiff_block = [] + # UUID=ome_types.model.tiff_data.UUID(file_name=img_name,value=uuid4().urn) + unique_identifier = UUID(file_name=img_name, value=uuid4().urn) + for ch in range(0, no_of_channels): + tiff_block.append(TiffData(first_c=ch, ifd=ch, plane_count=1, uuid=unique_identifier)) + # --Generate planes block (contains the information of each tile)--# + plane_block = [] + # length_units=ome_types.model.simple_types.UnitsLength('µm') + for ch in range(0, no_of_channels): + plane_block.append( + Plane( + the_c=ch, + the_t=0, + the_z=0, + position_x=0, # x=0 is just a place holder + position_y=0, # y=0 is just a place holder + position_z=0, + exposure_time=0, + # position_x_unit=pixel_units, + # position_y_unit=pixel_units + ) + ) + # --Generate channels block--# + chann_block = [] + for ch in range(0, no_of_channels): + chann_block.append( + Channel( + id=ChannelID(f"Channel:{ch}"), + color=Color((255, 255, 255)), + emission_wavelength=1, # place holder + excitation_wavelength=1, # place holder + ) + ) + # --Generate pixels block--# + pix_block = [] + ifd_counter = 0 + for t in range(0, no_of_tiles): + template_plane_block = copy.deepcopy(plane_block) + template_chann_block = copy.deepcopy(chann_block) + template_tiffdata_block = copy.deepcopy(tiff_block) + for ch, mark in enumerate(markers): + template_plane_block[ch].position_x = xy_tile_positions_units[t][0] + template_plane_block[ch].position_y = xy_tile_positions_units[t][1] + template_plane_block[ch].exposure_time = exposures[ch] + template_chann_block[ch].id = f"Channel:{100 + t}:{ch}:{mark}" + template_tiffdata_block[ch].ifd = ifd_counter + ifd_counter += 1 + pix_block.append( + Pixels( + id=PixelsID(f"Pixels:{t}"), + dimension_order=ome_types.model.pixels.DimensionOrder("XYCZT"), + size_c=no_of_channels, + size_t=1, + size_x=img_size[0], + size_y=img_size[1], + size_z=1, + type=bit_depth, + big_endian=False, + channels=template_chann_block, + interleaved=False, + physical_size_x=pixel_size, + # physical_size_x_unit=pixel_units, + physical_size_y=pixel_size, + # physical_size_y_unit=pixel_units, + physical_size_z=1.0, + planes=template_plane_block, + significant_bits=sig_bits, + tiff_data_blocks=template_tiffdata_block, + ) + ) + # --Generate image block--# + img_block = [] + for t in range(0, no_of_tiles): + img_block.append(Image(id=ImageID(f"Image:{t}"), pixels=pix_block[t])) + # --Create the OME object with all prebiously defined blocks--# + ome_custom = OME() + ome_custom.creator = " ".join( + [ome_types.__name__, ome_types.__version__, "/ python version-", platform.python_version()] + ) + ome_custom.images = img_block + ome_custom.uuid = uuid4().urn + ome_xml = to_xml(ome_custom) + tifff.tiffcomment(img_path, ome_xml) + + +def setup_coords(x, y, pix_units): + if pix_units == "mm": + x_norm = 1000 * (np.array(x) - np.min(x)) # /pixel_size + y_norm = 1000 * (np.array(y) - np.min(y)) # /pixel_size + x_norm = np.rint(x_norm).astype("int") + y_norm = np.rint(y_norm).astype("int") + # invert y + y_norm = np.max(y_norm) - y_norm + xy_tile_positions = [(i, j) for i, j in zip(x_norm, y_norm)] + return xy_tile_positions + + +def tile_position(metadata_string): + # meta_dict = {TAGS[key] : img.tag[key] for key in img.tag_v2} + # ome=BeautifulSoup(meta_dict['ImageDescription'][0],'xml') + ome = BeautifulSoup(metadata_string, "xml") + x = float(ome.StageLabel["X"]) + y = float(ome.StageLabel["Y"]) + stage_units = ome.StageLabel["XUnit"] + pixel_size = float(ome.Pixels["PhysicalSizeX"]) + pixel_units = ome.Pixels["PhysicalSizeXUnit"] + bit_depth = ome.Pixels["Type"] + significantBits = int(ome.Pixels["SignificantBits"]) + tile_info = { + "x": x, + "y": y, + "stage_units": stage_units, + "pixel_size": pixel_size, + "pixel_units": pixel_units, + "bit_depth": bit_depth, + "sig_bits": significantBits, + } + return tile_info + + +def create_stack( + info, + exp_level=1, + antigen_cycle_no=cycle_no, + cycle_folder="", + isbleach=False, + offset=0, + device=device_name, + ref_marker=ref_marker, + results_path=stack_path, +): + + racks = info["rack"].unique() + wells = info["well"].unique() + antigen_cycle = f"{antigen_cycle_no:03d}" + if isbleach: + cycle_prefix = "Bleach" + else: + cycle_prefix = "Antigen" + + Path(cycle_folder) + + with tifff.TiffFile(info["img_full_path"][0]) as tif: + img_ref = tif.pages[0].asarray() + width = img_ref.shape[1] + height = img_ref.shape[0] + dtype_ref = img_ref.dtype + ref_marker = list(info.loc[info["exposure_level"] == "ref", "marker"])[0] + markers = info["marker"].unique() + markers_subset = np.setdiff1d(markers, [ref_marker]) + sorted_markers = np.insert(markers_subset, 0, ref_marker) + sorted_filters = [] + for m in sorted_markers: + sorted_filters.append(info.loc[info["marker"] == m, "filter"].tolist()[0]) + if isbleach: + target_name = "filters" + target = "_".join(sorted_filters) + else: + target_name = "markers" + target = "_".join(sorted_markers) + + for r in racks: + + output_levels = [] + rack_no = "rack_{n}".format(n=f"{r:02d}") + output_levels.append(rack_no) + + for w in wells: + + well_no = "well_{n}".format(n=f"{w:02d}") + output_levels.append(well_no) + groupA = info.loc[(info["rack"] == r) & (info["well"] == w)] + rois = groupA["roi"].unique() + + for roi in rois: + + roi_no = "roi_{n}".format(n=f"{roi:02d}") + exp_level_no = "exp_level_{n}".format(n=f"{exp_level:02d}") + output_levels.append(roi_no) + output_levels.append(exp_level_no) + + counter = 0 + groupB = groupA.loc[groupA["roi"] == roi] + A = groupB.loc[(groupB["exposure_level"] == "ref")] + stack_size_z = A.shape[0] * len(markers) + fov_id = groupB.loc[groupB["marker"] == ref_marker, "fov"].unique() + fov_id = np.sort(fov_id) + + stack_name = "cycle-{C}-{prefix}-exp-{E}-rack-{R}-well-{W}-roi-{ROI}-{T}-{M}.{img_format}".format( + C=f"{(offset+antigen_cycle_no):03d}", + prefix=cycle_prefix, + E=exp_level, + R=r, + W=w, + ROI=roi, + T=target_name, # markers or filters + M=target, + img_format="ome.tiff", + ) + + X = [] + Y = [] + stack = np.zeros((stack_size_z, height, width), dtype=dtype_ref) + + exposure_per_marker = [] + exp = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == 1) & (groupB["exposure_level"] == "ref"), + "exposure", + ].tolist()[0] + exposure_per_marker.append(exp) + for s in markers_subset: + exp = groupB.loc[ + (groupB["marker"] == s) & (groupB["fov"] == 1) & (groupB["exposure_level"] == exp_level), + "exposure", + ].tolist()[0] + exposure_per_marker.append(exp) + + for tile in fov_id: + + img_ref = groupB.loc[ + (groupB["marker"] == ref_marker) & (groupB["fov"] == tile), "img_full_path" + ].tolist() + + if len(img_ref) > 0: + + with tifff.TiffFile(img_ref[0]) as tif: + img = tif.pages[0].asarray() + metadata = tif.ome_metadata + + stack[counter, :, :] = img + tile_data = tile_position(metadata) + X.append(tile_data["x"]) + Y.append(tile_data["y"]) + counter += 1 + + for m in markers_subset: + img_marker = groupB.loc[ + (groupB["marker"] == m) + & (groupB["fov"] == tile) + & (groupB["exposure_level"] == exp_level), + "img_full_path", + ].tolist()[0] + img = tifff.imread(img_marker) + stack[counter, :, :] = img + counter += 1 + + output_folders_path = stack_path / "--".join(output_levels) / "raw" + + if os.path.exists(output_folders_path): + pass + else: + os.makedirs(output_folders_path) + + final_stack_path = os.path.join(output_folders_path, stack_name) + tifff.imwrite(final_stack_path, stack, photometric="minisblack") + img_info = { + "name": stack_name, + "device": device_name, + "no_channels": len(markers), + "markers": sorted_markers, + "filters": sorted_filters, + "exposure_times": exposure_per_marker, + "xy_img_size_pix": (width, height), + "pix_size": tile_data["pixel_size"], + "pix_units": tile_data["pixel_units"], + "bit_depth": tile_data["bit_depth"], + "sig_bits": tile_data["sig_bits"], + } + create_ome(img_info, setup_coords(X, Y, img_info["pix_units"]), img_path=final_stack_path) + + return img_info + + +def main(): + + antigen_info = antigen_cycle_info(antigen_folder=antigen_dir) + bleach_info = bleach_cycle_info(bleach_folder=bleach_dir) + exp = antigen_info["exposure_level"].unique() + exp = exp[exp != "ref"] + exp.sort() + + if args.hi_exposure_only: + exp = [max(exp)] + else: + pass + + for e in exp: + antigen_stack_info = create_stack(antigen_info, antigen_cycle_no=cycle_no, exp_level=e) + bleach_stack_info = create_stack(bleach_info, antigen_cycle_no=cycle_no, isbleach=True, exp_level=e) + + +if __name__ == "__main__": + main() diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 48f784bd..618855a5 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -4,6 +4,7 @@ from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.curio import curio from spatialdata_io.readers.dbit import dbit +from spatialdata_io.readers.macsima import macsima from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.merscope import merscope from spatialdata_io.readers.seqfish import seqfish @@ -32,6 +33,7 @@ "xenium_explorer_selection", "dbit", "visium_hd", + "macsima", ] __version__ = version("spatialdata-io") diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index 34848137..7865279b 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -18,6 +18,16 @@ class CodexKeys(ModeEnum): IMAGE_TIF = ".tif" +@unique +class MacsimaKeys(ModeEnum): + """Keys for *MACSima* formatted dataset.""" + + # images + METADATA_SUFFIX = ".QiPattern.txt" + IMAGE_OMETIF = ".ome.tif" + IMAGE_QPTIF = ".qptif" + + class CurioKeys(ModeEnum): """Keys for *Curio* formatted dataset.""" diff --git a/src/spatialdata_io/readers/_utils/_utils.py b/src/spatialdata_io/readers/_utils/_utils.py index 4623fced..bc051d29 100644 --- a/src/spatialdata_io/readers/_utils/_utils.py +++ b/src/spatialdata_io/readers/_utils/_utils.py @@ -6,8 +6,13 @@ from typing import Any, Optional, Union import numpy as np +import spatialdata as sd +import zarr from anndata import AnnData, read_text from h5py import File +from ome_types import from_tiff +from ome_types.model import Pixels, UnitsLength +from spatialdata._logging import logger from spatialdata_io.readers._utils._read_10x_h5 import _read_10x_h5 @@ -84,3 +89,113 @@ def _initialize_raster_models_kwargs( if "scale_factors" not in labels_models_kwargs: labels_models_kwargs["scale_factors"] = [2, 2, 2, 2] return image_models_kwargs, labels_models_kwargs + + +def add_ome_attr(path: Path) -> None: + """Add OME metadata to Zarr store of SpatialData object.""" + path = Path(path) + assert path.exists() + # store = zarr.open(path, mode="r") + sdata = sd.SpatialData.read(path) + logger.debug(sdata) + # get subdirs + images = [p for p in path.glob("images/*") if p.is_dir()] + logger.debug(images) + # write the image data + for image in images: + name = image.stem + el = sdata.images[name] + if "scale0" in el: + el = el["scale0"] + logger.debug("picking scale0") + el = el[name] + logger.debug(el) + channel_names = list(el.coords["c"].data) + logger.debug(channel_names) + # get maximum possible value for dtype + dtype = el.dtype + max_value = np.iinfo(dtype).max + logger.debug(f"{dtype} {max_value}") + # store = parse_url(image, mode="w").store + # list of 7 basic colors + colors = ["ff0000", "00ff00", "0000ff", "ffff00", "00ffff", "ff00ff", "ffffff"] + channel_dicts = [ + { + "active": True if i < 3 else False, + "label": c, + "coefficient": 1, + "family": "linear", + "inverted": False, + # get one of the 7 colors, based on i + "color": colors[i % 7], + "window": { + "min": 0, + "start": 0, + "end": max_value, + "max": max_value, + }, + } + for i, c in enumerate(channel_names) + ] + root = zarr.open_group(image) + root.attrs.update( + { + "omero": { + "channels": channel_dicts, + "rdefs": { + "defaultT": 0, # First timepoint to show the user + "defaultZ": 0, # First Z section to show the user + "model": "color", # "color" or "greyscale" + }, + "name": name, + "version": "0.4", + } + } + ) + zarr.consolidate_metadata(path) + + +def calc_scale_factors(stack: Any, min_size: int = 1000, default_scale_factor: int = 2) -> list[int]: + """Calculate scale factors based on image size to get lowest resolution under min_size pixels.""" + # get lowest dimension, ignoring channels + lower_scale_limit = min(stack.shape[1:]) + scale_factor = default_scale_factor + scale_factors = [scale_factor] + lower_scale_limit /= scale_factor + while lower_scale_limit >= min_size: + # scale_factors are cumulative, so we don't need to do e.g. scale_factor *= 2 + scale_factors.append(scale_factor) + lower_scale_limit /= scale_factor + return scale_factors + + +def parse_channels(path: Path) -> list[str]: + """Parse channel names from an OME-TIFF file.""" + images = from_tiff(path).images + if len(images) > 1: + logger.warning("Found multiple images in OME-TIFF file. Only the first one will be used.") + channels = images[0].pixels.channels + logger.debug(channels) + names = [c.name for c in channels] + return names + + +def parse_physical_size(path: Path | None = None, ome_pixels: Pixels | None = None) -> float: + """Parse physical size from OME-TIFF to micrometer.""" + pixels = ome_pixels or from_tiff(path).images[0].pixels + logger.debug(pixels) + if pixels.physical_size_x_unit != pixels.physical_size_y_unit: + logger.error("Physical units for x and y dimensions are not the same.") + raise NotImplementedError + if pixels.physical_size_x != pixels.physical_size_y: + logger.error("Physical sizes for x and y dimensions are the same.") + raise NotImplementedError + # convert to micrometer if needed + if pixels.physical_size_x_unit == UnitsLength.NANOMETER: + physical_size = pixels.physical_size_x / 1000 + elif pixels.physical_size_x_unit == UnitsLength.MICROMETER: + physical_size = pixels.physical_size_x + else: + logger.error(f"Physical unit not recognized: '{pixels.physical_size_x_unit}'.") + raise NotImplementedError + return float(physical_size) diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py new file mode 100644 index 00000000..7cbd7daf --- /dev/null +++ b/src/spatialdata_io/readers/macsima.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from typing import Any + +import dask.array as da +import pandas as pd +import spatialdata as sd +from aicsimageio import AICSImage +from dask_image.imread import imread +from ome_types import from_tiff +from spatialdata import SpatialData +from spatialdata._logging import logger + +from spatialdata_io._constants._constants import MacsimaKeys +from spatialdata_io._docs import inject_docs +from spatialdata_io.readers._utils._utils import calc_scale_factors, parse_physical_size + +__all__ = ["macsima"] + + +@inject_docs(vx=MacsimaKeys) +def macsima( + path: str | Path, + metadata: bool = True, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + subset: int | None = None, + c_subset: int | None = None, + max_chunk_size: int = 1024, + c_chunks_size: int = 1, + multiscale: bool = True, + transformations: bool = True, + scale_factors: list[int] | None = None, + default_scale_factor: int = 2, +) -> SpatialData: + """ + Read *MACSima* formatted dataset. + + This function reads images from a MACSima cyclic imaging experiment. Metadata of the cycles is either parsed from a metadata file or image names. + For parsing .qptiff files, installation of bioformats is adviced. + + .. seealso:: + + - `MACSima output `_. + + Parameters + ---------- + path + Path to the directory containing the data. + metadata + Whether to search for a .txt file with metadata in the folder. If False, the metadata in the image names is used. + imread_kwargs + Keyword arguments passed to :func:`dask_image.imread.imread`. + subset + Subset the image to the first ``subset`` pixels in x and y dimensions. + c_subset + Subset the image to the first ``c_subset`` channels. + max_chunk_size + Maximum chunk size for x and y dimensions. + c_chunks_size + Chunk size for c dimension. + multiscale + Whether to create a multiscale image. + transformations + Whether to add a transformation from pixels to microns to the image. + scale_factors + Scale factors to use for downsampling. If None, scale factors are calculated based on image size. + default_scale_factor + Default scale factor to use for downsampling. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + path = Path(path) + path_files = [] + pixels_to_microns = None + if metadata: + # read metadata to get list of images and channel names + path_files = list(path.glob(f"*{MacsimaKeys.METADATA_SUFFIX}")) + if len(path_files) > 0: + if len(path_files) > 1: + logger.warning( + f"Cannot determine metadata file. Expecting a single file with format .txt. Got multiple files: {path_files}" + ) + path_metadata = list(path.glob(f"*{MacsimaKeys.METADATA_SUFFIX}"))[0] + df = pd.read_csv(path_metadata, sep="\t", header=0, index_col=None) + logger.debug(df) + df["channel"] = df["ch1"].str.split(" ").str[0] + df["round_channel"] = df["Round"] + " " + df["channel"] + path_files = [path / p for p in df.filename.values] + assert all( + [p.exists() for p in path_files] + ), f"Cannot find all images in metadata file. Missing: {[p for p in path_files if not p.exists()]}" + round_channels = df.round_channel.values + stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) + else: + logger.warning("Cannot find metadata file. Will try to parse from image names.") + if not metadata or len(path_files) == 0: + # get list of image paths, get channel name from OME data and cycle number from filename + # look for OME-TIFF files + ome_patt = f"*{MacsimaKeys.IMAGE_OMETIF}*" + path_files = list(path.glob(ome_patt)) + if not path_files: + # look for .qptiff files + qptif_patt = f"*{MacsimaKeys.IMAGE_QPTIF}*" + path_files = list(path.glob(qptif_patt)) + logger.debug(path_files) + if not path_files: + raise ValueError("Cannot determine data set. Expecting '{ome_patt}' or '{qptif_patt}' files") + # TODO: warning if not 1 ROI with 1 .qptiff per cycle + # TODO: robuster parsing of {name}_cycle{round}_{scan}.qptiff + rounds = [f"R{int(p.stem.split('_')[1][5:])}" for p in path_files] + # parse .qptiff files + imgs = [AICSImage(img, **imread_kwargs) for img in path_files] + # sort based on cycle number + rounds, imgs = zip(*sorted(zip(rounds, imgs), key=lambda x: int(x[0][1:]))) + channels_per_round = [img.channel_names for img in imgs] + # take first image and first channel to get physical size + ome_data = imgs[0].ome_metadata + logger.debug(ome_data) + pixels_to_microns = parse_physical_size(ome_pixels=ome_data.images[0].pixels) + da_per_round = [img.dask_data[0, :, 0, :, :] for img in imgs] + sorted_channels = [] + for r, cs in zip(rounds, channels_per_round): + for c in cs: + sorted_channels.append(f"{r} {c}") + stack = da.stack(da_per_round).squeeze() + # Parse OME XML + # img.ome_metadata + # arr = img.dask_data[0, :, 0, :, :] + # channel_names = img.channel_names + logger.debug(sorted_channels) + logger.debug(stack) + else: + logger.debug(path_files[0]) + # make sure not to remove round 0 when parsing! + rounds = [f"R{int(p.stem.split('_')[0])}" for p in path_files] + channels = [from_tiff(p).images[0].pixels.channels[0].name for p in path_files] + round_channels = [f"{r} {c}" for r, c in zip(rounds, channels)] + stack, sorted_channels = get_stack(path_files, round_channels, imread_kwargs) + + # do subsetting if needed + if subset: + stack = stack[:, :subset, :subset] + if c_subset: + stack = stack[:c_subset, :, :] + sorted_channels = sorted_channels[:c_subset] + if multiscale and not scale_factors: + scale_factors = calc_scale_factors(stack, default_scale_factor=default_scale_factor) + if not multiscale: + scale_factors = None + logger.debug(f"Scale factors: {scale_factors}") + + t_dict = None + if transformations: + pixels_to_microns = pixels_to_microns or parse_physical_size(path_files[0]) + t_pixels_to_microns = sd.transformations.Scale([pixels_to_microns, pixels_to_microns], axes=("x", "y")) + # 'microns' is also used in merscope example + # no inverse needed as the transformation is already from pixels to microns + t_dict = {"global": t_pixels_to_microns} + # # chunk_size can be 1 for channels + chunks = { + "x": max_chunk_size, + "y": max_chunk_size, + "c": c_chunks_size, + } + stack = sd.models.Image2DModel.parse( + stack, + # TODO: make sure y and x locations are correct + dims=["c", "y", "x"], + scale_factors=scale_factors, + chunks=chunks, + c_coords=sorted_channels, + transformations=t_dict, + ) + sdata = sd.SpatialData(images={path.stem: stack}, table=None) + + return sdata + + +def get_stack(path_files: list[Path], round_channels: list[str], imread_kwargs: Mapping[str, Any]) -> Any: + imgs_channels = list(zip(path_files, round_channels)) + logger.debug(imgs_channels) + # sort based on round number + imgs_channels = sorted(imgs_channels, key=lambda x: int(x[1].split(" ")[0][1:])) + logger.debug(f"Len imgs_channels: {len(imgs_channels)}") + # read in images and merge channels + sorted_paths, sorted_channels = list(zip(*imgs_channels)) + imgs = [imread(img, **imread_kwargs) for img in sorted_paths] + stack = da.stack(imgs).squeeze() + return stack, sorted_channels diff --git a/tests/test_macsima.py b/tests/test_macsima.py new file mode 100644 index 00000000..523dce65 --- /dev/null +++ b/tests/test_macsima.py @@ -0,0 +1,54 @@ +import math +import sys +from pathlib import Path + +import pytest + +from spatialdata_io.readers.macsima import macsima + + +# The datasets should be downloaded and placed in the "data" directory; +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.parametrize( + "dataset,expected", + [("Lung_adc_demo", "{'y': (0, 15460), 'x': (0, 13864)}"), ("HumanLiverH35", "{'y': (0, 1154), 'x': (0, 1396)}")], +) +def test_image_size(dataset: str, expected: str) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + from spatialdata import get_extent + + extent: dict[str, tuple[float, float]] = get_extent(sdata, exact=False) + extent = {ax: (math.floor(extent[ax][0]), math.ceil(extent[ax][1])) for ax in extent} + assert str(extent) == expected + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.parametrize( + "dataset,expected", + [("Lung_adc_demo", 116), ("HumanLiverH35", 151)], +) +def test_total_channels(dataset: str, expected: int) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f) + + # get the number of channels + channels: int = len(sdata.images[dataset]["scale0"]["c"]) + assert channels == expected + + +@pytest.mark.skipif(sys.version_info < (3, 10), reason="Test requires Python 3.10 or higher") +@pytest.mark.parametrize( + "dataset,expected", + [("Lung_adc_demo", ["R0 DAPI", "R1 CD68", "R1 CD163"]), ("HumanLiverH35", ["R0 DAPI", "R1 CD68", "R1 CD163"])], +) +def test_channel_names(dataset: str, expected: list[str]) -> None: + f = Path("./data") / dataset + assert f.is_dir() + sdata = macsima(f, c_subset=3) + + # get the channel names + channels: int = sdata.images[dataset]["scale0"]["c"].values + assert list(channels) == expected