From 9b6fdb53e444221bd7f8ec4a6d5e42dc4bc2f5b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20H=2E=20Benedetti?= Date: Mon, 18 Nov 2024 19:01:54 +0100 Subject: [PATCH] Finished UNet + launcher + started widget --- docs/conf.py | 8 +- src/microglia_analyzer/_tests/workflow.py | 2 +- src/microglia_analyzer/_widget.py | 2 +- .../_widget_yolo_annotations.py | 2 +- src/microglia_analyzer/dl/losses.py | 60 +++ src/microglia_analyzer/dl/unet2d_training.py | 361 ++++++++++++------ src/microglia_analyzer/dl/yolov5_training.py | 31 +- .../experimental/classify_microglia.py | 144 +++++++ src/microglia_analyzer/experimental/dump.py | 60 +++ .../experimental/segment_microglia.py | 87 +++++ src/microglia_analyzer/experimental/tiles.py | 66 ++++ src/microglia_analyzer/ma_worker.py | 276 +++++++++++++ src/microglia_analyzer/microglia_analyzer.py | 106 ----- src/microglia_analyzer/models.json | 4 + src/microglia_analyzer/tiles/tiler.py | 6 - src/microglia_analyzer/utils.py | 125 ++++++ 16 files changed, 1104 insertions(+), 236 deletions(-) create mode 100644 src/microglia_analyzer/dl/losses.py create mode 100644 src/microglia_analyzer/experimental/classify_microglia.py create mode 100644 src/microglia_analyzer/experimental/dump.py create mode 100644 src/microglia_analyzer/experimental/segment_microglia.py create mode 100644 src/microglia_analyzer/experimental/tiles.py create mode 100644 src/microglia_analyzer/ma_worker.py delete mode 100644 src/microglia_analyzer/microglia_analyzer.py create mode 100644 src/microglia_analyzer/models.json create mode 100644 src/microglia_analyzer/utils.py diff --git a/docs/conf.py b/docs/conf.py index 65f50a7..370c2ee 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,12 +21,12 @@ # -- Project information ----------------------------------------------------- -project = 'Protoplasts swelling analyzer' +project = 'Microglia Analyzer' copyright = '2024, Clément H. Benedetti' -author = 'Clément H. BENEDETTI' +author = 'Clément H. BENEDETTI' # The full version, including alpha/beta/rc tags -release = '1.0.0' +release = '0.0.1' # -- General configuration --------------------------------------------------- @@ -69,4 +69,4 @@ html_favicon = 'icon.png' -autosummary_generate = True \ No newline at end of file +autosummary_generate = True diff --git a/src/microglia_analyzer/_tests/workflow.py b/src/microglia_analyzer/_tests/workflow.py index d1ae527..9dcd791 100644 --- a/src/microglia_analyzer/_tests/workflow.py +++ b/src/microglia_analyzer/_tests/workflow.py @@ -1,6 +1,6 @@ import tifffile import os -from microglia_analyzer.microglia_analyzer import MicrogliaAnalyzer +from microglia_analyzer.ma_worker import MicrogliaAnalyzer from microglia_analyzer.tiles import tiler diff --git a/src/microglia_analyzer/_widget.py b/src/microglia_analyzer/_widget.py index 315c8ab..a05473b 100644 --- a/src/microglia_analyzer/_widget.py +++ b/src/microglia_analyzer/_widget.py @@ -19,7 +19,7 @@ import re from microglia_analyzer import TIFF_REGEX -from microglia_analyzer.microglia_analyzer import MicrogliaAnalyzer +from microglia_analyzer.ma_worker import MicrogliaAnalyzer _IMAGE_LAYER_NAME = "µ-Image" diff --git a/src/microglia_analyzer/_widget_yolo_annotations.py b/src/microglia_analyzer/_widget_yolo_annotations.py index ec6de1d..ef423bc 100644 --- a/src/microglia_analyzer/_widget_yolo_annotations.py +++ b/src/microglia_analyzer/_widget_yolo_annotations.py @@ -322,7 +322,7 @@ def clear_mask(self): Clears the mask layer. """ if _MASKS_LAYER in self.viewer.layers: - self.viewer.layers[_MASKS_LAYER].data = np.zeros_like(self.viewer.layers[_IMAGE_LAYER].data) + self.viewer.layers[_MASKS_LAYER].data = np.zeros_like(self.viewer.layers[_MASKS_LAYER].data) def fill_current_label(self): if not _MASKS_LAYER in self.viewer.layers: diff --git a/src/microglia_analyzer/dl/losses.py b/src/microglia_analyzer/dl/losses.py new file mode 100644 index 0000000..0b32d61 --- /dev/null +++ b/src/microglia_analyzer/dl/losses.py @@ -0,0 +1,60 @@ +import tensorflow as tf + +def jaccard_loss(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + intersection = tf.reduce_sum(y_true * y_pred) + union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection + return 1 - (intersection + 1) / (union + 1) + +def dice_loss(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + intersection = tf.reduce_sum(y_true * y_pred) + return 1 - (2. * intersection + 1) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + 1) + +def bce_dice_loss(bce_coef=0.5): + def bcl(y_true, y_pred): + bce = tf.keras.losses.binary_crossentropy(y_true, y_pred) + dice = dice_loss(y_true, y_pred) + return bce_coef * bce + (1.0 - bce_coef) * dice + return bcl + +def focal_loss(gamma=2.0, alpha=5.75): + def focal_loss_fixed(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + alpha_t = y_true * alpha + (1 - y_true) * (1 - alpha) + p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred) + fl = - alpha_t * (1 - p_t) ** gamma * tf.math.log(p_t + 1e-5) + return tf.reduce_mean(fl) + return focal_loss_fixed + +def tversky_loss(alpha=0.5): + beta = 1 - alpha + def loss(y_true, y_pred): + y_true = tf.cast(y_true, tf.float32) + y_pred = tf.cast(y_pred, tf.float32) + true_pos = tf.reduce_sum(y_true * y_pred) + false_neg = tf.reduce_sum(y_true * (1 - y_pred)) + false_pos = tf.reduce_sum((1 - y_true) * y_pred) + return 1 - (true_pos + 1) / (true_pos + alpha * false_neg + beta * false_pos + 1) + return loss + +def skeleton_recall(y_true, y_pred): + intersection = tf.reduce_sum(y_true * y_pred) + recall = intersection / (tf.reduce_sum(y_true) + 1e-8) + return 1 - recall + +def dice_skeleton_loss(skeleton_coef=0.5, bce_coef=0.5): + bdl = bce_dice_loss(bce_coef) + def dsl(y_true, y_pred): + y_pred = tf.square(y_pred) + return (1.0 - skeleton_coef) * bdl(y_true, y_pred) + skeleton_coef * skeleton_recall(y_true, y_pred) + return dsl + +def skeleton_loss(y_true, y_pred): + inter = tf.reduce_sum(y_true * y_pred) / tf.reduce_sum(y_true) + mse_score = tf.reduce_mean(tf.square(y_true - y_pred)) + mean_constraint = tf.abs(tf.reduce_mean(y_pred) - tf.reduce_mean(y_true)) + return 1.0 - inter + mse_score + 0.1 * mean_constraint \ No newline at end of file diff --git a/src/microglia_analyzer/dl/unet2d_training.py b/src/microglia_analyzer/dl/unet2d_training.py index 64e3e8d..039c465 100644 --- a/src/microglia_analyzer/dl/unet2d_training.py +++ b/src/microglia_analyzer/dl/unet2d_training.py @@ -5,19 +5,25 @@ import os import shutil import re +import json -from scipy.ndimage import rotate +from scipy.ndimage import rotate, gaussian_filter +from skimage.morphology import binary_dilation, diamond, skeletonize import pandas as pd from tabulate import tabulate +from microglia_analyzer.dl.losses import (dice_loss, bce_dice_loss, + skeleton_recall, dice_skeleton_loss) + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" import tensorflow as tf from tensorflow.keras.models import Model -from tensorflow.keras.callbacks import (ModelCheckpoint, EarlyStopping, - ReduceLROnPlateau, TensorBoard, Callback) -from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Dropout, - UpSampling2D, concatenate, Activation) +from tensorflow.keras.callbacks import (ModelCheckpoint, EarlyStopping, + ReduceLROnPlateau, Callback) +from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Dropout, BatchNormalization, + UpSampling2D, concatenate, Activation, Conv2DTranspose) from tensorflow.keras.optimizers import Adam from tensorflow.keras.utils import plot_model from tensorflow.keras.losses import BinaryCrossentropy @@ -60,38 +66,45 @@ #@markdown ## 📍 a. Data paths -data_folder = "/home/benedetti/Documents/projects/2060-microglia/data/unet-training/" +data_folder = "/home/benedetti/Documents/projects/2060-microglia/data/training-data/clean" qc_folder = None -inputs_name = "images" +inputs_name = "inputs" masks_name = "masks" -models_path = "/home/benedetti/Documents/projects/2060-microglia/data/unet-training/models/" +models_path = "/home/benedetti/Documents/projects/2060-microglia/µnet" working_directory = "/tmp/unet_working/" model_name_prefix = "µnet" reset_local_data = True -remove_wrong_data = True +remove_wrong_data = False +data_usage = None #@markdown ## 📍 b. Network architecture validation_percentage = 0.15 -batch_size = 32 -epochs = 50 -unet_depth = 4 -num_filters_start = 16 -dropout_rate = 0.25 +batch_size = 8 +epochs = 500 +unet_depth = 2 +num_filters_start = 24 +dropout_rate = 0.2 optimizer = 'Adam' -learning_rate = 0.0001 +learning_rate = 0.001 +skeleton_coef = 0.2 +bce_coef = 0.7 +early_stop_patience = 50 +dilation_kernel = diamond(1) +loss = dice_skeleton_loss(skeleton_coef, bce_coef) #@markdown ## 📍 c. Data augmentation use_data_augmentation = True use_mirroring = True use_gaussian_noise = True +noise_scale = 0.001 use_random_rotations = True -angle_range = (-30, 30) +angle_range = (-90, 90) use_gamma_correction = True -gamma_range = (0.2, 4.0) -show_preview = False - +gamma_range = (0.2, 5.0) +use_holes = False +export_aug_sample = True # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # SANITY CHECK # @@ -347,8 +360,9 @@ def create_local_dirs(reset=False): Args: reset (bool): If True, the folders will be reset. """ - if not os.path.isdir(working_directory): - raise ValueError(f"Working directory '{working_directory}' does not exist.") + if os.path.isdir(working_directory) and reset: + shutil.rmtree(working_directory) + os.makedirs(working_directory, exist_ok=True) leaves = [inputs_name, masks_name] for r in _LOCAL_FOLDERS: for l in leaves: @@ -379,6 +393,22 @@ def check_sum(targets): acc = sum([i[1] for i in targets]) return abs(acc - 1.0) < 1e-6 +def restore_data_usage(targets, source): + """ + Restores the data usage from a JSON file. + """ + with open(data_usage, "r") as f: + data = json.load(f) + for target, _ in targets: + files = data[target] + for f in files: + src_path = os.path.join(source, inputs_name, f) + dst_path = os.path.join(working_directory, target, inputs_name, f) + shutil.copy(src_path, dst_path) + src_path = os.path.join(source, masks_name, f) + dst_path = os.path.join(working_directory, target, masks_name, f) + shutil.copy(src_path, dst_path) + def migrate_data(targets, source): """ Copies the content of the source folder to the working directory. @@ -389,6 +419,9 @@ def migrate_data(targets, source): targets (list): List of tuples. The first element is the name of the folder, the second is the ratio of the data to move. source (str): The source folder """ + if data_usage is not None: + restore_data_usage(targets, source) + return if not check_sum(targets): raise ValueError("The sum of the ratios must be equal to 1.") folders = [inputs_name, masks_name] @@ -411,6 +444,26 @@ def migrate_data(targets, source): #@markdown ## 📍 a. Data augmentation functions +def deteriorate_image(image, mask, num_points=25): + """ + Attempts to deteriorate the original image by making holes along the path. + """ + image = np.squeeze(image) + mask = np.squeeze(mask) + positive_points = np.argwhere(mask > 0) + if len(positive_points) < num_points: + selected_points = positive_points + else: + selected_points = positive_points[np.random.choice(len(positive_points), num_points, replace=False)] + + new_image = np.full_like(mask, 0, dtype=np.uint8) + for point in selected_points: + new_image[point[0], point[1]] = 255 + new_image = 1.0 - binary_dilation(new_image, footprint=dilation_kernel).astype(np.float32) + new_image = gaussian_filter(new_image, sigma=2.0) + image *= new_image + return np.expand_dims(image, axis=-1), np.expand_dims(mask, axis=-1) + def random_flip(image, mask): """ Applies a random horizontal or vertical flip to both the image and the mask. @@ -470,6 +523,22 @@ def gamma_correction(image, mask): return image, mask +def add_gaussian_noise(image, mask): + """ + Adds random Gaussian noise to the image. The mask remains unchanged. + + Args: + image (np.ndarray): The input image. + mask (np.ndarray): The input mask. + + Returns: + (np.ndarray, np.ndarray): The noisy image and the unchanged mask. + """ + noise = np.random.normal(0, noise_scale, image.shape) + noisy_image = image + noise + + return noisy_image, mask + def normalize(image, mask): """ Normalizes the image values to be between 0 and 1. The mask remains unchanged. @@ -503,32 +572,34 @@ def apply_data_augmentation(image, mask): image, mask = random_flip(image, mask) if use_random_rotations: image, mask = random_rotation(image, mask) + if use_holes: + image, mask = deteriorate_image(image, mask) if use_gamma_correction: image, mask = gamma_correction(image, mask) + if use_gaussian_noise: + image, mask = add_gaussian_noise(image, mask) image, mask = normalize(image, mask) return image, mask #@markdown ## 📍 b. Datasets visualization -def visualize_augmentations(num_examples=5): +def visualize_augmentations(model_path, num_examples=6): """ Visualizes original and augmented images side by side. Args: num_examples (int): The number of examples to visualize. """ - s = get_shape() - ds = make_dataset("training", True).batch(1).take(num_examples) - + dataset = make_dataset("training", True).batch(1).take(num_examples) grid_size = (2, num_examples) - fig, axes = plt.subplots(*grid_size, figsize=(15, 6)) + _, axes = plt.subplots(*grid_size, figsize=(15, 6)) - for i, (augmented_image, original_image) in enumerate(ds): + for i, (augmented_image, original_image) in enumerate(dataset): if i >= num_examples: break augmented_image = augmented_image[0].numpy() - original_image = original_image[0].numpy() + original_image = original_image[0].numpy() axes[0, i].imshow(original_image) axes[0, i].set_title(f"Mask {i+1}") @@ -539,7 +610,10 @@ def visualize_augmentations(num_examples=5): axes[1, i].axis('off') plt.tight_layout() - plt.show() + plot_path = os.path.join(model_path, "augmentations_preview.png") + plt.savefig(plot_path, format='png', dpi=400) + plt.clf() + print(f"📊 Augmentations preview saved to: {plot_path}") # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -553,12 +627,16 @@ def visualize_augmentations(num_examples=5): def open_pair(input_path, mask_path, training, img_only): raw_img = tifffile.imread(input_path) raw_img = np.expand_dims(raw_img, axis=-1) - raw_mask = (tifffile.imread(mask_path) > 0).astype(np.uint8) + raw_mask = tifffile.imread(mask_path) + raw_mask = skeletonize(raw_mask) + raw_mask = binary_dilation(raw_mask) + raw_mask = raw_mask.astype(np.float32) + raw_mask /= np.max(raw_mask) raw_mask = np.expand_dims(raw_mask, axis=-1) if training: raw_img, raw_mask = apply_data_augmentation(raw_img, raw_mask) image = tf.constant(raw_img, dtype=tf.float32) - mask = tf.constant(raw_mask, dtype=tf.uint8) + mask = tf.constant(raw_mask, dtype=tf.float32) if img_only: return image else: @@ -580,7 +658,7 @@ def make_dataset(source, training=False, img_only=False): output_signature=tf.TensorSpec(shape=shape, dtype=tf.float32, name=None) if not img_only: - output_signature = (output_signature, tf.TensorSpec(shape=shape, dtype=tf.uint8, name=None)) + output_signature = (output_signature, tf.TensorSpec(shape=shape, dtype=tf.float32, name=None)) ds = tf.data.Dataset.from_generator( pairs_generator, @@ -605,6 +683,37 @@ def test_ds_consumer(): print("\nDONE.") +def make_data_augmentation_sample(n_samples=100): + """ + Applies the data augmentation pipeline to n_samples images and saves them to a folder. + The folder folder is located into the working_directory. + """ + sample_folder = os.path.join(working_directory, "augmentation_sample") + if os.path.isdir(sample_folder): + shutil.rmtree(sample_folder) + os.makedirs(sample_folder, exist_ok=True) + ds = make_dataset("training", True) + for i, (img, mask) in enumerate(ds.repeat().take(n_samples)): + img = img.numpy() + mask = mask.numpy() + aug_img, _ = apply_data_augmentation(img, mask) + combined_img = np.concatenate((img.squeeze(), aug_img.squeeze()), axis=1) + tifffile.imwrite(os.path.join(sample_folder, f"img_{str(i).zfill(3)}.tif"), combined_img) + + +def export_data_usage(model_path): + """ + Produces a JSON explaining to which category belongs every file of the provided data (training, validation, testing). + It consists in a dictionary where keys are the categories and values are lists of files. + """ + categories = {"training": [], "validation": [], "testing": []} + for category in _LOCAL_FOLDERS: + _, files = get_data_pools(os.path.join(working_directory, category), [inputs_name], True) + categories[category] = list(files) + json_string = json.dumps(categories, indent=4) + with open(os.path.join(model_path, "data_usage.json"), "w") as f: + f.write(json_string) + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # MODEL GENERATOR # @@ -640,89 +749,54 @@ def create_unet2d_model(input_shape): inputs = Input(shape=input_shape) x = inputs - # Encoder: + # --- Encoder --- skip_connections = [] for i in range(unet_depth): num_filters = num_filters_start * 2**i - x = Conv2D(num_filters, 3, padding='same', kernel_initializer='he_normal')(x) - x = Activation('relu')(x) - x = Conv2D(num_filters, 3, padding='same', kernel_initializer='he_normal')(x) - x = Activation('relu')(x) + coef = (unet_depth - i - 1) / unet_depth + x = Conv2D(num_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x) + x = BatchNormalization()(x) + x = Conv2D(num_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x) + x = BatchNormalization()(x) skip_connections.append(x) + x = Dropout(coef * dropout_rate)(x) x = MaxPooling2D(2)(x) - x = Dropout(dropout_rate)(x) - - # Decoder: + + # --- Bottleneck --- + num_filters = num_filters_start * 2**unet_depth + x = Conv2D(num_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x) + x = BatchNormalization()(x) + x = Conv2D(num_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x) + x = BatchNormalization()(x) + + # --- Decoder --- for i in reversed(range(unet_depth)): num_filters = num_filters_start * 2**i x = UpSampling2D(2)(x) + x = Conv2DTranspose(num_filters, (3, 3), strides=(1, 1), padding='same')(x) x = concatenate([x, skip_connections[i]]) - x = Conv2D(num_filters, 3, padding='same', kernel_initializer='he_normal')(x) - x = Activation('relu')(x) - x = Conv2D(num_filters, 3, padding='same', kernel_initializer='he_normal')(x) - x = Activation('relu')(x) - x = Dropout(dropout_rate)(x) + x = Conv2D(num_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x) + # x = BatchNormalization()(x) + x = Conv2D(num_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal')(x) + # x = BatchNormalization()(x) - # Output layer with sigmoid for binary classification outputs = Conv2D(1, 1, activation='sigmoid')(x) - model = Model(inputs=inputs, outputs=outputs) return model -#@markdown ## 📍 c. Alternative loss functions - -def jaccard_loss(y_true, y_pred): - y_true = tf.cast(y_true, tf.float32) - y_pred = tf.cast(y_pred, tf.float32) - intersection = tf.reduce_sum(y_true * y_pred) - union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection - return 1 - (intersection + 1) / (union + 1) - -def dice_loss(y_true, y_pred): - y_true = tf.cast(y_true, tf.float32) - y_pred = tf.cast(y_pred, tf.float32) - intersection = tf.reduce_sum(y_true * y_pred) - return 1 - (2. * intersection + 1) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + 1) - -def bce_dice_loss(y_true, y_pred): - bce = tf.keras.losses.binary_crossentropy(y_true, y_pred) - dice = dice_loss(y_true, y_pred) - return bce + dice - -def focal_loss(gamma=2., alpha=0.25): - def focal_loss_fixed(y_true, y_pred): - y_true = tf.cast(y_true, tf.float32) - y_pred = tf.cast(y_pred, tf.float32) - alpha_t = y_true * alpha + (1 - y_true) * (1 - alpha) - p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred) - fl = - alpha_t * (1 - p_t) ** gamma * tf.math.log(p_t + 1e-5) - return tf.reduce_mean(fl) - return focal_loss_fixed - -def tversky_loss(alpha=0.5, beta=0.5): - def loss(y_true, y_pred): - y_true = tf.cast(y_true, tf.float32) - y_pred = tf.cast(y_pred, tf.float32) - true_pos = tf.reduce_sum(y_true * y_pred) - false_neg = tf.reduce_sum(y_true * (1 - y_pred)) - false_pos = tf.reduce_sum((1 - y_true) * y_pred) - return 1 - (true_pos + 1) / (true_pos + alpha * false_neg + beta * false_pos + 1) - return loss - -#@markdown ## 📍 d. Model instanciator +#@markdown ## 📍 c. Model instanciator def instanciate_model(): input_shape = get_shape() model = create_unet2d_model(input_shape) model.compile( - optimizer=Adam(learning_rate=learning_rate), - loss=bce_dice_loss, + optimizer=Adam(learning_rate=learning_rate), + loss=loss, metrics=[ tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), - tf.keras.metrics.Accuracy(), - tf.keras.metrics.MeanIoU(num_classes=2) + tf.keras.metrics.Accuracy() ] ) return model @@ -737,7 +811,7 @@ def instanciate_model(): #@markdown ## 📍 a. Creating callback for validation class SavePredictionsCallback(Callback): - def __init__(self, num_examples=5): + def __init__(self, model_path, num_examples=5): """ Custom callback to save predictions to images at the end of each epoch. @@ -750,6 +824,7 @@ def __init__(self, num_examples=5): self.validation_data = make_dataset("validation", False) self.output_dir = os.path.join(working_directory, "predictions") self.num_examples = num_examples + self.model_path = model_path if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) @@ -757,54 +832,109 @@ def __init__(self, num_examples=5): def on_epoch_end(self, epoch, logs=None): val_images, val_masks = next(iter(self.validation_data.batch(self.num_examples))) predictions = self.model.predict(val_images) - - # Create a subfolder for each epoch - epoch_dir = os.path.join(self.output_dir, f'epoch_{epoch + 1}') + epoch_dir = os.path.join(self.output_dir, f'epoch_{str(epoch + 1).zfill(3)}') if not os.path.exists(epoch_dir): os.makedirs(epoch_dir) - # Save the results (input, predicted mask, and ground truth mask) - for i in range(self.num_examples): + num_samples = min(self.num_examples, len(val_images)) + + for i in range(num_samples): tifffile.imwrite(os.path.join(epoch_dir, f'input_{str(i + 1).zfill(5)}.tif'), val_images[i].numpy()) tifffile.imwrite(os.path.join(epoch_dir, f'mask_{str(i + 1).zfill(5)}.tif'), val_masks[i].numpy()) tifffile.imwrite(os.path.join(epoch_dir, f'prediction_{str(i + 1).zfill(5)}.tif'), predictions[i]) + + def on_train_end(self, logs=None): + # Move the last predictions into the model folder. + all_epochs = sorted([f for f in os.listdir(self.output_dir) if f.startswith('epoch')]) + if len(all_epochs) == 0: + return + last_epoch = all_epochs[-1] + last_epoch_path = os.path.join(self.output_dir, last_epoch) + last_epoch_dest = os.path.join(self.model_path, "predictions") + os.makedirs(last_epoch_dest, exist_ok=True) + shutil.move(last_epoch_path, last_epoch_dest) #@markdown ## 📍 b. Training launcher -def train_model(model, train_dataset, val_dataset): - # Path of the folder in which the model is exported. +import math + +def get_model_path(): v = get_version() version_name = f"{model_name_prefix}-V{str(v).zfill(3)}" output_path = os.path.join(models_path, version_name) os.makedirs(output_path) - plot_model(model, to_file=os.path.join(output_path, 'architecture.png'), show_shapes=True) + return output_path +def cosine_annealing(epoch, _): + period = 50 + alpha = 0.0 + cosine_decay = 0.5 * (1 + math.cos(math.pi * (epoch % period) / period)) + decayed = (1 - alpha) * cosine_decay + alpha + return float(learning_rate * decayed) + +def train_model(model, train_dataset, val_dataset, output_path): + plot_model(model, to_file=os.path.join(output_path, 'architecture.png'), show_shapes=True) print(f"💾 Exporting model to: {output_path}") checkpoint = ModelCheckpoint(os.path.join(output_path, 'best.keras'), save_best_only=True, monitor='val_loss', mode='min') - early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min') + lastpoint = ModelCheckpoint(os.path.join(output_path, 'last.keras'), save_best_only=False, monitor='val_loss', mode='min') + early_stopping = EarlyStopping(monitor='val_loss', patience=early_stop_patience, mode='min') reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6, mode='min') + lr_callback = tf.keras.callbacks.LearningRateScheduler(cosine_annealing) history = model.fit( train_dataset, validation_data=val_dataset, epochs=epochs, - callbacks=[checkpoint, early_stopping, reduce_lr, SavePredictionsCallback()], + callbacks=[checkpoint, lastpoint, early_stopping, lr_callback, SavePredictionsCallback(output_path)], verbose=1 ) - model.save(os.path.join(output_path, 'last.keras')) return history +def export_settings(model_path): + settings_dict = { + "Data folder" : os.path.basename(data_folder), + "QC folder" : os.path.basename(qc_folder) if qc_folder is not None else "None", + "Inputs" : inputs_name, + "Masks" : masks_name, + "Validation (%)" : validation_percentage, + "Batch size" : batch_size, + "# epochs" : epochs, + "UNet depth" : unet_depth, + "# filters" : num_filters_start, + "Dropout (%)" : dropout_rate, + "Optimizer" : optimizer, + "Learning rate" : learning_rate, + "Skeleton (%)" : skeleton_coef, + "BCE (%)" : bce_coef, + "Early stop patience": early_stop_patience, + "Dilation kernel" : str(dilation_kernel), + "Data augmentation" : use_data_augmentation, + "Mirroring" : use_mirroring, + "Gaussian noise" : use_gaussian_noise, + "Noise scale" : noise_scale, + "Random rotations" : use_random_rotations, + "Angle range" : angle_range, + "Gamma correction" : use_gamma_correction, + "Gamma range" : gamma_range, + "Holes" : use_holes, + "Loss function" : loss.__name__ + } + json_string = json.dumps(settings_dict, indent=4) + with open(os.path.join(model_path, "settings.json"), "w") as f: + f.write(json_string) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # EVALUATE THE MODEL # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #@markdown # ⭐ 8. EVALUATE THE MODEL -def plot_training_history(history): +def plot_training_history(history, model_path): """ Plots the training and validation metrics from the model's history. @@ -815,7 +945,7 @@ def plot_training_history(history): metrics = history.history # Create subplots - fig, axes = plt.subplots(2, 2, figsize=(12, 10)) + _, axes = plt.subplots(2, 2, figsize=(12, 10)) # Plot Training and Validation Loss axes[0, 0].plot(metrics['loss'], label='Training Loss') @@ -847,7 +977,7 @@ def plot_training_history(history): # Adjust layout plt.tight_layout() - plt.show() + plt.savefig(os.path.join(model_path, 'training_history.png'), format='png', dpi=400) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -856,7 +986,6 @@ def plot_training_history(history): def main(): - from pprint import pprint # 1. Running the sanity checks data_sanity, results = sanity_check(data_folder) @@ -892,16 +1021,20 @@ def main(): ], qc_folder) # 3. Preview the effects of data augmentation - if show_preview: - visualize_augmentations() - + model_path = get_model_path() + export_data_usage(model_path) + export_settings(model_path) + visualize_augmentations(model_path) + if export_aug_sample: + make_data_augmentation_sample() + # 4. Creating the model model = instanciate_model() model.summary() # 5. Create the datasets training_dataset = make_dataset("training", True).repeat().batch(batch_size).take(batch_size) - validation_dataset = make_dataset("validation", False).repeat().batch(16).take(16) + validation_dataset = make_dataset("validation", False).repeat().batch(batch_size).take(batch_size) print(f" • Training dataset: {len(list(training_dataset))} ({training_dataset}).") print(f" • Validation dataset: {len(list(validation_dataset))} ({validation_dataset}).") @@ -913,7 +1046,9 @@ def main(): print(" • No testing dataset provided.") # 6. Training the model - history = train_model(model, training_dataset, validation_dataset) + history = train_model(model, training_dataset, validation_dataset, model_path) + plot_training_history(history, model_path) + if __name__ == "__main__": main() \ No newline at end of file diff --git a/src/microglia_analyzer/dl/yolov5_training.py b/src/microglia_analyzer/dl/yolov5_training.py index a5dd6c8..faf3f95 100644 --- a/src/microglia_analyzer/dl/yolov5_training.py +++ b/src/microglia_analyzer/dl/yolov5_training.py @@ -65,8 +65,20 @@ batch_size = 24 epochs = 1800 classes_names = ["garbage", "amoeboid", "rod", "intermediate", "homeostatic"] -optimizer = 'Adam' -learning_rate = 0.001 +optimizer = 'AdamW' +learning_rate = 0.0001 +deterministic = True +cos_lr = True +label_smoothing = 0.0 +overlap_mask = False +dropout = 0.2 + +# optimizer: 'SGD', 'Adam', 'AdamW'. +# deterministic: True, False +# cos_lr: True, False +# label_smoothing: 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 +# overlap_mask: True, False +# dropout: 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 #@markdown ## 📍 c. Constants @@ -456,9 +468,20 @@ def main(): project=models_path, name=version_name, imgsz=get_image_size(), - optimizer=optimizer + optimizer=optimizer, + deterministic=deterministic, + cos_lr=cos_lr, + label_smoothing=label_smoothing, + overlap_mask=overlap_mask, + dropout=dropout, + lr0=learning_rate ) - # label_smoothing + # optimizer: 'SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp'. + # deterministic: True, False + # cos_lr: True, False + # label_smoothing: 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 + # overlap_mask: True, False + # dropout: 0.0, 0.1, 0.2, 0.3, 0.4, 0.5 if __name__ == "__main__": diff --git a/src/microglia_analyzer/experimental/classify_microglia.py b/src/microglia_analyzer/experimental/classify_microglia.py new file mode 100644 index 0000000..c619b48 --- /dev/null +++ b/src/microglia_analyzer/experimental/classify_microglia.py @@ -0,0 +1,144 @@ +import torch +import cv2 +import numpy as np +import tifffile +import os +from microglia_analyzer.tiles.tiler import ImageTiler2D, normalize + +def calculate_iou(box1, box2): + """ + Calculate the Intersection over Union (IoU) between two bounding boxes. + + Parameters: + - box1: list [x1, y1, x2, y2], coordinates of the first box. + - box2: list [x1, y1, x2, y2], coordinates of the second box. + + Returns: + - iou: float, Intersection over Union + """ + x1 = max(box1[0], box2[0]) + y1 = max(box1[1], box2[1]) + x2 = min(box1[2], box2[2]) + y2 = min(box1[3], box2[3]) + + inter_area = max(0, x2 - x1) * max(0, y2 - y1) + box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) + box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]) + union_area = box1_area + box2_area - inter_area + + return 0.0 if union_area == 0 else (inter_area / union_area) + +class MicrogliaClassifier(object): + def __init__(self, model_path, image_path, iou_tr=0.8, score_tr=0.5, reload_yolo=False): + if not os.path.isfile(model_path): + raise FileNotFoundError(f"Model file {model_path} not found") + if not model_path.endswith(".pt"): + raise ValueError("Model file must be a '.pt' file") + self.model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path, force_reload=reload_yolo) + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.model.to(self.device) + if not os.path.isfile(image_path): + raise FileNotFoundError(f"Image file {image_path} not found") + self.image = normalize(tifffile.imread(image_path), 0, 255, np.uint8) + self.bboxes = None + self.iou_threshold = iou_tr + self.score_threshold = score_tr + self.classes = self.model.names + + def remove_useless_boxes(self, boxes): + """ + Fusions boxes with an IoU greater than `iou_threshold`. + The box with the highest score is kept, whatever the two classes were. + Also, boxes with a score below the threshold score are removed. + + Parameters: + - boxes: list of dict, chaque dict contient 'box' (coordonnées) et 'class' + - iou_threshold: float, seuil d'IoU pour fusionner les boîtes + + Returns: + - fused_boxes: list of dict, boîtes après fusion + """ + clean_boxes = {'boxes': [], 'scores': [], 'classes': []} + used = set() + + for i, (box1, score1, class1) in enumerate(zip(boxes['boxes'], boxes['scores'], boxes['classes'])): + if i in used: + continue + chosen_box = box1 + chosen_score = score1 + chosen_class = class1 + for j, (box2, score2, class2) in enumerate(zip(boxes['boxes'], boxes['scores'], boxes['classes'])): + if j <= i or j in used: + continue + iou = calculate_iou(chosen_box, box2) + if iou > self.iou_threshold: + chosen_box = chosen_box if score1 > score2 else box2 + chosen_score = max(score1, score2) + chosen_class = class1 if score1 > score2 else class2 + used.add(j) + if chosen_score < self.score_threshold: + continue + clean_boxes['boxes'].append(chosen_box) + clean_boxes['scores'].append(chosen_score) + clean_boxes['classes'].append(chosen_class) + used.add(i) + return clean_boxes + + def inference(self): + results = self.model(self.image) + for img_results in results.xyxy: + boxes = img_results[:, :4].tolist() + scores = img_results[:, 4].tolist() + classes = img_results[:, 5].tolist() + self.bboxes = { + 'boxes' : boxes, + 'scores' : scores, + 'classes': classes, + } + + def get_cleaned_bboxes(self): + return self.remove_useless_boxes(self.bboxes) + +# ----------------------------------------------------------------- + +def draw_bounding_boxes(image, predictions, classes, exclude_class=1, thickness=2): + """ + Dessine les bounding boxes sur une image en excluant une classe spécifique. + + Parameters: + - image: np.array, image d'entrée (modifiée en place) + - predictions: list of dict, liste des prédictions contenant les clés 'box' et 'class' + Exemple : [{'box': [x1, y1, x2, y2], 'score': 0.95, 'class': 2}, ...] + - exclude_class: int, la classe à exclure (par défaut 1) + - box_color: tuple, couleur des bounding boxes (par défaut vert) + - thickness: int, épaisseur des bounding boxes (par défaut 2) + + Returns: + - image_with_boxes: np.array, l'image avec les bounding boxes dessinées + """ + box_colors=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255), + (255, 255, 255), (0, 0, 0), (128, 128, 128), (128, 0, 0), (0, 128, 0), (0, 0, 128), (128, 128, 0), + (128, 0, 128), (0, 128, 128), (128, 128, 128)] + image_with_boxes = image.copy() + image_with_boxes = cv2.cvtColor(image_with_boxes, cv2.COLOR_GRAY2BGR) + + for box, cls, score in zip(predictions['boxes'], predictions['classes'], predictions['scores']): + if int(cls) == exclude_class: + continue + x1, y1, x2, y2 = map(int, box) # Cast into integers + cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color=box_colors[int(cls)], thickness=thickness) + label = f"{classes[int(cls)]} ({score:.2f})" + cv2.putText(image_with_boxes, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_colors[int(cls)], 1) + + return image_with_boxes + + +if __name__ == "__main__": + mc = MicrogliaClassifier( + "/home/benedetti/Documents/projects/2060-microglia/µyolo/µyolo-V051/weights/best.pt", + "/home/benedetti/Documents/projects/2060-microglia/data/raw-data/tiff-data/adulte 3.tif" + ) + mc.inference() + cleaned_bboxes = mc.get_cleaned_bboxes() + visual = draw_bounding_boxes(mc.image, mc.get_cleaned_bboxes(), mc.classes) + cv2.imwrite("/tmp/visual.png", visual) \ No newline at end of file diff --git a/src/microglia_analyzer/experimental/dump.py b/src/microglia_analyzer/experimental/dump.py new file mode 100644 index 0000000..df1b707 --- /dev/null +++ b/src/microglia_analyzer/experimental/dump.py @@ -0,0 +1,60 @@ + +# Instanciate the model +model_path = "/home/benedetti/Documents/projects/2060-microglia/µyolo/µyolo-V049/weights/best.pt" +model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_path, force_reload=True) +device = 'cuda' if torch.cuda.is_available() else 'cpu' +model.to(device) + +# Load image and split it into tiles +image_path = "/home/benedetti/Documents/projects/2060-microglia/data/raw-data/tiff-data/adulte 3.tif" +image = tifffile.imread(image_path) +image = normalize(image, 0, 255, np.uint8) + +# Inference +results = model(image) + +# Extract the results +output = [] +for i, img_results in enumerate(results.xyxy): + boxes = img_results[:, :4].tolist() # Coordonnées des bounding boxes [x1, y1, x2, y2] + scores = img_results[:, 4].tolist() # Scores de confiance + classes = img_results[:, 5].tolist() # Classes prédites (index des classes) + + # Ajouter les résultats à la collection Python + output.append({ + 'image_index': i, + 'boxes': boxes, + 'scores': scores, + 'classes': classes, + }) + +def remove_useless_boxes(boxes, iou_threshold=0.8): + """ + Fusions boxes with an IoU greater than `iou_threshold`. + The box with the highest score is kept, whatever the two classes were. + + Parameters: + - boxes: list of dict, chaque dict contient 'box' (coordonnées) et 'class' + - iou_threshold: float, seuil d'IoU pour fusionner les boîtes + + Returns: + - fused_boxes: list of dict, boîtes après fusion + """ + fused_boxes = [] + used = set() + + for i, box1 in enumerate(boxes): + if i in used: + continue + chosen_box = box1['box'] + for j, box2 in enumerate(boxes): + if j <= i or j in used: + continue + iou = calculate_iou(chosen_box, box2['box']) + if iou > iou_threshold: + chosen_box = chosen_box if box1['score'] > box2['score'] else box2['box'] + used.add(j) + fused_boxes.append({'box': chosen_box, 'class': box1['class'], 'score': box1['score']}) + used.add(i) + + return fused_boxes \ No newline at end of file diff --git a/src/microglia_analyzer/experimental/segment_microglia.py b/src/microglia_analyzer/experimental/segment_microglia.py new file mode 100644 index 0000000..a4b8e21 --- /dev/null +++ b/src/microglia_analyzer/experimental/segment_microglia.py @@ -0,0 +1,87 @@ +import os +import tifffile +import numpy as np +from microglia_analyzer.tiles.tiler import ImageTiler2D, normalize +from microglia_analyzer.dl.losses import (dice_skeleton_loss, bce_dice_loss, dice_loss) + +# os.environ['CUDA_VISIBLE_DEVICES'] = '-1' +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + +import tensorflow as tf + +def normalize_batch(batch): + for i in range(len(batch)): + batch[i] = normalize(batch[i]) + + +skeleton_coef = 0.2 +bce_coef = 0.7 + +class MicrogliaSegmenter(object): + def __init__(self, model_path, image_path, tile_size=512, overlap=128): + if not os.path.isfile(model_path): + raise FileNotFoundError(f"Model file {model_path} not found") + if not model_path.endswith(".keras"): + raise ValueError("Model file must be a '.keras' file") + self.model = tf.keras.models.load_model( + model_path, + custom_objects={ + "bcl": bce_dice_loss(bce_coef), + "dsl": dice_skeleton_loss(skeleton_coef, bce_coef) + } + ) + if not os.path.isfile(image_path): + raise FileNotFoundError(f"Image file {image_path} not found") + self.image = tifffile.imread(image_path) + self.tile_size = tile_size + self.overlap = overlap + self.mask = None + + def run(self): + pred = self.inference() + t = self.get_threshold_val(pred) + self.mask = self.clean_mask(pred, t) + + def inference(self): + shape = self.image.shape + tiles_manager = ImageTiler2D(self.tile_size, self.overlap, shape) + tiles = np.array(tiles_manager.image_to_tiles(self.image)) + predictions = np.squeeze(self.model.predict(tiles, batch_size=8)) + tifffile.imwrite("/tmp/tiles.tif", predictions) + normalize_batch(predictions) + probabilities = tiles_manager.tiles_to_image(predictions) + return probabilities + +if __name__ == "__main__": + output_path = "/tmp/inference/" + model_path = "/home/benedetti/Documents/projects/2060-microglia/µnet/µnet-V208/best.keras" + folder_path = "/home/benedetti/Documents/projects/2060-microglia/data/raw-data/tiff-data" + content = [f for f in os.listdir(folder_path) if f.endswith(".tif")] + for i, image_name in enumerate(content): + print(f"{i+1}/{len(content)}: {image_name}") + image_path = os.path.join(folder_path, image_name) + ms = MicrogliaSegmenter(model_path, image_path) + tifffile.imwrite(os.path.join(output_path, image_name), ms.inference()) + print("DONE.") + + +""" +WORKFLOW DEPUIS LA GUI: + + 1. L'utilisateur sélectionne le dossier dans lequel se trouvent les images. + 2. Le menu déroulant se rempli avec le contenu du dossier, les résultats existants sont chargés. + 3. L'utilisateur sélectionne une image. + 4. Dans la section "Segmentation", l'utilisateur peut choisir le modèle à utiliser. + 5. Il peut ensuite cliquer sur "Segmenter" pour lancer le processus. + 6. Un champ de float gère le threshold pour la transformation en masque. + Une dilation a lieu automatiquement après le seuillage. + 7. L'utilisateur peut maintenant passer au panneau de classification. + 8. Il peut y choisir le modèle et le seuil minimal de confiance. + 9. Il peut ensuite cliquer sur "Classifier" pour lancer le processus. + 10. À ce moment là, l'utilisateur doit pouvoir éditer ses résultats. + 11. On peut maintenant passer à la section "Measure". + Ici, l'utilisateur peut transformer son masque en squelette. + Pour chaque instance, on a la longueur totale, le nombre de branches, le nombre de feuilles, ... + 12. Enfin, il y a un bouton pour exporter les résultats. + Le CSV consiste en une ligne par instance, avec sa classe et ses mesures. +""" \ No newline at end of file diff --git a/src/microglia_analyzer/experimental/tiles.py b/src/microglia_analyzer/experimental/tiles.py new file mode 100644 index 0000000..574231b --- /dev/null +++ b/src/microglia_analyzer/experimental/tiles.py @@ -0,0 +1,66 @@ +import numpy as np +from PIL import Image +from microglia_analyzer.tiles.tiler import ImageTiler2D +import tifffile + +def generate_checkerboard(width, height, num_squares_x, num_squares_y): + """ + Génère une image de damier pour la vérification de l'UV mapping. + + Parameters: + ---------- + width : int + Largeur de l'image en pixels. + height : int + Hauteur de l'image en pixels. + num_squares_x : int + Nombre de cases horizontalement. + num_squares_y : int + Nombre de cases verticalement. + colors : tuple of tuples, optional + Couleurs des cases sous forme de tuples RGB. Par défaut, noir et blanc. + + Returns: + ------- + Image.Image + Image du damier générée. + """ + # Calculer la taille de chaque case + square_width = width // num_squares_x + square_height = height // num_squares_y + + # Créer un tableau vide pour l'image + checkerboard = np.zeros((height, width), dtype=np.uint8) + + for y in range(num_squares_y): + for x in range(num_squares_x): + # Déterminer la couleur de la case actuelle + if (x + y) % 2 == 0: + color = 0 + else: + color = np.random.randint(0, 256, size=1) + + # Définir les coordonnées de la case + x_start = x * square_width + y_start = y * square_height + x_end = x_start + square_width + y_end = y_start + square_height + + # Remplir la case avec la couleur choisie + checkerboard[y_start:y_end, x_start:x_end] = color + + # Convertir le tableau NumPy en image Pillow + img = Image.fromarray(checkerboard) + return img + +# Générer une image de 2048x2048 avec des cases de 128x128 +checkerboard_img = np.squeeze(np.array(generate_checkerboard(2048, 2048, 16, 16))) +tifffile.imwrite("/tmp/original.tif", checkerboard_img) +tiles_manager = ImageTiler2D(512, 128, checkerboard_img.shape) +tiles = tiles_manager.image_to_tiles(checkerboard_img) +tifffile.imwrite("/tmp/checkerboard.tif", tiles) +merged = tiles_manager.tiles_to_image(tiles) +tifffile.imwrite("/tmp/merged.tif", merged) + +tifffile.imwrite("/tmp/coefs.tif", tiles_manager.blending_coefs) +tifffile.imwrite("/tmp/gradient.tif", tiles_manager.tiles_to_image(tiles_manager.blending_coefs)) \ No newline at end of file diff --git a/src/microglia_analyzer/ma_worker.py b/src/microglia_analyzer/ma_worker.py new file mode 100644 index 0000000..51c378d --- /dev/null +++ b/src/microglia_analyzer/ma_worker.py @@ -0,0 +1,276 @@ +import os +import shutil +import pint +import json + +import tifffile +import numpy as np +from skimage.measure import label, regionprops +from skimage import morphology + +from microglia_analyzer.tiles.tiler import ImageTiler2D, normalize +from microglia_analyzer.dl.losses import dice_skeleton_loss, bce_dice_loss +from microglia_analyzer.utils import calculate_iou, normalize_batch, download_from_web + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + +import tensorflow as tf +import torch + +class MicrogliaAnalyzer(object): + + def __init__(self, logging_f=None): + # Image on which we are working. + self.image = None + # Pixel size => tuple (pixel size, unit). + self.calibration = None + # Directory in which we export productions (control images, settings, ...). + self.working_directory = None + # Path of the YOLO model that we use to classify microglia. + self.classification_model_path = None + # Path of the model that we use to segment microglia. + self.segmentation_model_path = None + # Classification model. + self.classification_model = None + # Segmentation model. + self.segmentation_model = None + # Importance of the skeleton in the loss function. + self.unet_skeleton_coef = 0.2 + # Importance of the BCE in the BCE-dice loss function. + self.unet_bce_coef = 0.7 + # Global logging function. + self.logging = logging_f + # Object responsible for cutting images into tiles. + self.tiles_manager = None + # Size of the tiles (in pixels). + self.tile_size = None + # Overlap between the tiles (in pixels). + self.overlap = None + # Probability threshold for the segmentation (%). + self.segmentation_threshold = 0.5 + # Score threshold for the classification (%). + self.score_threshold = 0.5 + # Probability map of the segmentation. + self.probability_map = None + # Connected component minimum size threshold. + self.cc_min_size = 250 + # Classes guessed by the classification model. + self.classes = None + # Set of bounding-boxes guessed by the classification model. + self.bboxes = None + # Maximum IoU threshold (%) for the classification. Beyond that, BBs are merged. + self.iou_threshold = 0.85 + # Bounding-boxes after they were cleaned. + self.classifications = None + # Dictionary of the bindings between the segmentation and the classification. + self.bindings = None + + def log(self, message): + if self.logging: + self.logging(message) + + def set_input_image(self, image, tile_size=512, overlap=128): + """ + Setter of the input image. + Checks that the image is 2D before using it. + """ + image = np.squeeze(image) + if len(image.shape) != 2: + raise ValueError("The input image must be 2D.") + self.image = image + self.tile_size = tile_size + self.overlap = overlap + self.tiles_manager = ImageTiler2D(self.tile_size, self.overlap, image.shape) + + def set_calibration(self, pixel_size, unit): + """ + Setter of the calibration. + Before editing the internal state, checks that the pixel size is a float and Pint is used to check the unit. + """ + if not isinstance(pixel_size, float): + raise ValueError("The pixel size must be a float.") + ureg = pint.UnitRegistry() + try: + unit = ureg(unit) + except pint.UndefinedUnitError: + raise ValueError("The unit is not recognized.") + self.calibration = (pixel_size, unit) + + def set_working_directory(self, path): + """ + Checks that the directory exists before setting it. + Also outputs a warning in the logs if it is not empty. + """ + if os.path.exists(path): + self.log("The working directory already exists and will be cleared.") + shutil.rmtree(path) + os.makedirs(path) + self.working_directory = path + + def set_segmentation_model(self, path, use="best"): + """ + Checks that the path is a folder. + In the folder, we search for the file "best.keras" or "last.keras". + To verify that the training was complete, we also check for the presence of "training_history.png". + """ + if not os.path.isdir(path): + raise ValueError("The segmentation model path must be a folder.") + model_path = os.path.join(path, use+".keras") + if not os.path.exists(model_path): + raise ValueError("The model file does not exist.") + history_path = os.path.join(path, "training_history.png") + if not os.path.exists(history_path): + raise ValueError("The training of this model is not complete.") + self.segmentation_model_path = path + self.segmentation_model = tf.keras.models.load_model( + model_path, + custom_objects={ + "bcl": bce_dice_loss(self.unet_bce_coef), + "dsl": dice_skeleton_loss(self.unet_skeleton_coef, self.unet_bce_coef) + } + ) + + def set_classification_model(self, path, use="best", reload=False): + """ + Checks that the path corresponds to a folder. + This folder must contain a "confusion_matrix.png" file to verify that the training is complete. + In there, there must be a "weights" folder, containing either 'best.pt' or 'last.pt'. + + Args: + - path (str): Path of the model's folder (containing 'results.csv' and 'weights'). + - use (str): Either 'best' or 'last', to use either 'best.pt' or 'last.pt'. + - reload (bool): Whether to force the reload of the model from the online repo. + """ + if not os.path.isdir(path): + raise ValueError("The classification model path must be a folder.") + weights_path = os.path.join(path, "weights", use+".pt") + if not os.path.isfile(weights_path): + raise ValueError("The model file does not exist.") + confusion_matrix_path = os.path.join(path, "confusion_matrix.png") + if not os.path.isfile(confusion_matrix_path): + raise ValueError("The training of this model is not complete.") + self.classification_model_path = weights_path + self.classification_model = torch.hub.load( + 'ultralytics/yolov5', + 'custom', + path=self.classification_model_path, + force_reload=reload + ) + device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.classification_model.to(device) + + def segmentation_inference(self): + tiles = np.array(self.tiles_manager.image_to_tiles(self.image)) + predictions = np.squeeze(self.segmentation_model.predict(tiles, batch_size=8)) + normalize_batch(predictions) + self.probability_map = self.tiles_manager.tiles_to_image(predictions) + + def filter_cc_by_size(self, mask, connectivity=2): + """ + Filters connected components in a binary mask based on their size. + + Args: + - mask (np.ndarray): Binary mask (2D or 3D) with values 0 and 255. + - min_size (int): Minimum number of pixels a connected component must have to be retained. + - connectivity (int, optional): Connectivity criterion (4 or 8). + + Returns: + (np.ndarray): Binary mask with only the connected components that meet the minimum size. + """ + labeled_map = label(mask, connectivity=connectivity) + regions = regionprops(labeled_map) + labels_to_keep = [region.label for region in regions if region.area >= self.cc_min_size] + + if not labels_to_keep: + return np.zeros_like(mask, dtype=np.uint8) + + filtered_binary = np.isin(labeled_map, labels_to_keep).astype(np.uint8) * 255 + return filtered_binary + + def segmentation_postprocess(self): + self.mask = (self.probability_map > self.segmentation_threshold).astype(np.uint8) + self.mask = self.filter_cc_by_size(self.mask) + selem = morphology.diamond(2) + self.mask = morphology.binary_closing(self.mask, selem) + # self.mask = morphology.binary_fill_holes(self.mask) + self.mask = label(self.mask, connectivity=2) + + def classification_inference(self): + yolo_input = normalize(self.image, 0, 255, np.uint8) + results = self.classification_model(yolo_input) + for img_results in results.xyxy: + boxes = img_results[:, :4].tolist() + scores = img_results[:, 4].tolist() + classes = img_results[:, 5].tolist() + self.bboxes = { + 'boxes' : boxes, + 'scores' : scores, + 'classes': classes, + } + + def classification_postprocess(self): + """ + Fusions boxes with an IoU greater than `iou_threshold`. + The box with the highest score is kept, whatever the two classes were. + Also, boxes with a score below the threshold score are removed. + + Parameters: + - boxes: list of dict, chaque dict contient 'box' (coordonnées) et 'class' + - iou_threshold: float, seuil d'IoU pour fusionner les boîtes + + Returns: + - fused_boxes: list of dict, boîtes après fusion + """ + clean_boxes = {'boxes': [], 'scores': [], 'classes': []} + used = set() + + for i, (box1, score1, class1) in enumerate(zip(self.bboxes['boxes'], self.bboxes['scores'], self.bboxes['classes'])): + if i in used: + continue + chosen_box = box1 + chosen_score = score1 + chosen_class = class1 + for j, (box2, score2, class2) in enumerate(zip(self.bboxes['boxes'], self.bboxes['scores'], self.bboxes['classes'])): + if j <= i or j in used: + continue + iou = calculate_iou(chosen_box, box2) + if iou > self.iou_threshold: + chosen_box = chosen_box if score1 > score2 else box2 + chosen_score = max(score1, score2) + chosen_class = class1 if score1 > score2 else class2 + used.add(j) + if chosen_score < self.score_threshold: + continue + clean_boxes['boxes'].append(chosen_box) + clean_boxes['scores'].append(chosen_score) + clean_boxes['classes'].append(chosen_class) + used.add(i) + self.classifications = clean_boxes + + def bind_classifications(self): + labeled = self.mask + regions = regionprops(labeled) + bindings = {int(l): (None, 0.0, None) for l in np.unique(labeled) if l != 0} # label: (class, IoU) + for region in regions: + seg_bbox = list(map(int, region.bbox)) + for box, cls in zip(self.classifications['boxes'], self.classifications['classes']): + detect_bbox = list(map(int, box)) + iou = calculate_iou(seg_bbox, detect_bbox) + if iou > bindings[region.label][1]: + bindings[region.label] = (cls, iou, seg_bbox) + self.bindings = bindings + + +if __name__ == "__main__": + img_path = "/home/benedetti/Documents/projects/2060-microglia/data/raw-data/tiff-data/adulte 3.tif" + img_data = tifffile.imread(img_path) + ma = MicrogliaAnalyzer(lambda x: print(x)) + ma.set_input_image(img_data) + ma.set_calibration(0.325, "µm") + ma.set_segmentation_model("/home/benedetti/Documents/projects/2060-microglia/µnet/µnet-V208") + ma.set_classification_model("/home/benedetti/Documents/projects/2060-microglia/µyolo/µyolo-V051") + ma.segmentation_inference() + ma.segmentation_postprocess() + ma.classification_inference() + ma.classification_postprocess() + ma.bind_classifications() \ No newline at end of file diff --git a/src/microglia_analyzer/microglia_analyzer.py b/src/microglia_analyzer/microglia_analyzer.py deleted file mode 100644 index d49ac16..0000000 --- a/src/microglia_analyzer/microglia_analyzer.py +++ /dev/null @@ -1,106 +0,0 @@ -import tifffile -import numpy as np -import os -import shutil - -from microglia_analyzer.tiles.recalibrate import recalibrate_image -from microglia_analyzer.tiles.tiler import ImageTiler2D - -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - -import tensorflow as tf -from tensorflow.keras.models import Model - -_PATCH_SIZE = 512 -_OVERLAP = 128 - -class MicrogliaAnalyzer(object): - - def __init__(self, logging_f=None): - # Path of the image on which we are working. - self.image_path = None - # Image data corresponding to the `image_path`. - self.input_image = None - # Directory in which we export stuff relative to the `image_path`. - self.working_directory = None - # Path of the YOLO model that we use to classify microglia. - self.classification_model_path = None - # Path of the model that we use to segment microglia on YOLO patches. - self.segmentation_model_path = None - # Segmentation model. - self.segmentation_model = None - # Pixel size => tuple (pixel size, unit). - self.calibration = None - # Global logging function. - self.logging = logging_f - - def log(self, message): - if self.logging: - self.logging(message) - - def create_working_directory(self, img_path): - name = os.path.basename(img_path) - wd_name = ".".join(name.split('.')[:-1]) - wd_name = wd_name.replace(" ", "-") + "-control" - source_dir = os.path.dirname(img_path) - self.working_directory = os.path.join(source_dir, wd_name) - if os.path.isdir(self.working_directory): - shutil.rmtree(self.working_directory) - else: - os.makedirs(self.working_directory, exist_ok=True) - - def load_image(self, image_path): - self.image_path = image_path - self.input_image = np.squeeze(tifffile.imread(image_path)) - self.create_working_directory(image_path) - self.log(f"Image loaded: '{image_path}'") - - def get_image_path(self): - return self.image_path - - def get_image_data(self): - return self.input_image - - def get_working_directory(self): - return self.working_directory - - def get_image_shape(self): - return self.input_image.shape - - def set_calibration(self, pixel_size, unit): - self.calibration = (pixel_size, unit) - - def set_segmentation_model_path(self, model_path): - best_path = os.path.join(model_path, "best.keras") - if not os.path.isfile(best_path): - raise ValueError(f"Model '{os.path.basename(model_path)}' does not exist.") - self.segmentation_model_path = best_path - self.segmentation_model = tf.keras.models.load_model(self.segmentation_model_path) - self.segmentation_model.summary() - - def set_classification_model_path(self, model_path): - pass - - def export_patches(self): - rescaled_img = recalibrate_image(self.input_image, *self.calibration) - tiler = ImageTiler2D(_PATCH_SIZE, _OVERLAP, rescaled_img.shape) - patches = tiler.image_to_tiles(rescaled_img) - export_path = os.path.join(self.working_directory, "patches") - shutil.rmtree(export_path, ignore_errors=True) - os.makedirs(export_path, exist_ok=True) - for i, patch in enumerate(patches): - patch_name = f"patch_{str(i).zfill(3)}.tif" - tifffile.imwrite(os.path.join(export_path, patch_name), patch) - self.log(f"{len(patches)} patches exported to '{export_path}'") - - def segment_microglia(self): - pass - - def classify_microglia(self): - pass - - def make_skeletons(self): - pass - - def extract_metrics(self): - pass diff --git a/src/microglia_analyzer/models.json b/src/microglia_analyzer/models.json new file mode 100644 index 0000000..1673e5a --- /dev/null +++ b/src/microglia_analyzer/models.json @@ -0,0 +1,4 @@ +{ + "µnet" : "https://dev.mri.cnrs.fr/attachments/download/3574/%C2%B5net.zip", + "µyolo": "https://dev.mri.cnrs.fr/attachments/download/3575/%C2%B5yolo.zip" +} \ No newline at end of file diff --git a/src/microglia_analyzer/tiles/tiler.py b/src/microglia_analyzer/tiles/tiler.py index 02079aa..fb729a3 100644 --- a/src/microglia_analyzer/tiles/tiler.py +++ b/src/microglia_analyzer/tiles/tiler.py @@ -178,12 +178,6 @@ def get_grid_size(self): """ return self.grid_size - def get_blending_tiles(self): - """ - Returns the list containing the blending tiles. - """ - return self.blending_coefs - def _process_grid_size(self): """ Processes the final number of tiles on each axis, taking into account the overlap. diff --git a/src/microglia_analyzer/utils.py b/src/microglia_analyzer/utils.py new file mode 100644 index 0000000..3d2e349 --- /dev/null +++ b/src/microglia_analyzer/utils.py @@ -0,0 +1,125 @@ +import cv2 +import requests +import zipfile +import tempfile +import os +import json +import shutil +from microglia_analyzer.tiles.tiler import normalize + +BBOX_COLORS = [ + (255, 0, 0), + ( 0, 255, 0), + ( 0, 0, 255), + (255, 255, 0), + (255, 0, 255), + ( 0, 255, 255), + (255, 255, 255), + ( 0, 0, 0), + (128, 128, 128), + (128, 0, 0), + ( 0, 128, 0), + ( 0, 0, 128), + (128, 128, 0), + (128, 0, 128), + ( 0, 128, 128), + (128, 128, 128) +] + +def calculate_iou(box1, box2): + """ + Calculate the Intersection over Union (IoU) between two bounding boxes. + + Args: + - box1 (list): [x1, y1, x2, y2], coordinates of the first box. + - box2 (list): [x1, y1, x2, y2], coordinates of the second box. + + Returns: + (float): Intersection over Union + """ + x1 = max(box1[0], box2[0]) + y1 = max(box1[1], box2[1]) + x2 = min(box1[2], box2[2]) + y2 = min(box1[3], box2[3]) + + inter_area = max(0, x2 - x1) * max(0, y2 - y1) + box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) + box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1]) + union_area = box1_area + box2_area - inter_area + return 0.0 if union_area == 0 else (inter_area / union_area) + +def normalize_batch(batch): + for i in range(len(batch)): + batch[i] = normalize(batch[i]) + +def draw_bounding_boxes(image, predictions, classes, thickness=2): + """ + Dessine les bounding boxes sur une image en excluant une classe spécifique. + + Parameters: + - image: np.array, image d'entrée (modifiée en place) + - predictions: list of dict, liste des prédictions contenant les clés 'box' et 'class' + Exemple : [{'box': [x1, y1, x2, y2], 'score': 0.95, 'class': 2}, ...] + - exclude_class: int, la classe à exclure (par défaut 1) + - box_color: tuple, couleur des bounding boxes (par défaut vert) + - thickness: int, épaisseur des bounding boxes (par défaut 2) + + Returns: + - image_with_boxes: np.array, l'image avec les bounding boxes dessinées + """ + image_with_boxes = image.copy() + image_with_boxes = cv2.cvtColor(image_with_boxes, cv2.COLOR_GRAY2BGR) + + for box, cls, score in zip(predictions['boxes'], predictions['classes'], predictions['scores']): + x1, y1, x2, y2 = map(int, box) # Cast into integers + cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color=BBOX_COLORS[int(cls)], thickness=thickness) + label = f"{classes[int(cls)]} ({score:.2f})" + cv2.putText(image_with_boxes, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, BBOX_COLORS[int(cls)], 1) + + return image_with_boxes + +def download_from_web(url, target_dir, name, timeout=100): + extract_to = os.path.join(target_dir, name) + if os.path.isdir(extract_to): + shutil.rmtree(extract_to) + os.makedirs(extract_to, exist_ok=True) + + with tempfile.TemporaryDirectory() as temp_dir: + zip_path = os.path.join(temp_dir, "downloaded.zip") + print(f"Downloading model from {url}...") + try: + with requests.get(url, stream=True, timeout=timeout) as response: + response.raise_for_status() + with open(zip_path, 'wb') as f: + shutil.copyfileobj(response.raw, f) + except requests.exceptions.RequestException as e: + print(f"Error while downloading the models: {e}") + raise + + try: + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(extract_to) + print(f"Model extracted to: {extract_to}") + except zipfile.BadZipFile as e: + print(f"Error while decompressing the model's ZIP: {e}") + raise + except Exception as e: + print(f"Unknown decompression error: {e}") + raise + +def download_ressources(): + models_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") + urls_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models.json") + if not os.path.isfile(urls_path): + print("The 'models.json' file is missing.") + return + if not os.path.isdir(models_path): + os.makedirs(models_path) + ressources = json.load(open(urls_path, "r")) + download_from_web(ressources['µnet'] , models_path, "µnet") + download_from_web(ressources['µyolo'], models_path, "µyolo") + + +if __name__ == "__main__": + # download_ressources() + pass \ No newline at end of file