From 1112cece0c424d86c55df4cba7b28e14dfdd0da9 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Mon, 13 May 2024 13:13:33 -0700 Subject: [PATCH 01/23] create tf_cnn_mnist and update plan and src to match new taskrunner format Signed-off-by: kta-intel --- openfl-workspace/tf_cnn_mnist/.workspace | 2 + openfl-workspace/tf_cnn_mnist/plan/cols.yaml | 5 + openfl-workspace/tf_cnn_mnist/plan/data.yaml | 7 + openfl-workspace/tf_cnn_mnist/plan/defaults | 2 + openfl-workspace/tf_cnn_mnist/plan/plan.yaml | 42 +++++ .../tf_cnn_mnist/requirements.txt | 1 + openfl-workspace/tf_cnn_mnist/src/__init__.py | 3 + .../tf_cnn_mnist/src/dataloader.py | 143 ++++++++++++++++++ .../tf_cnn_mnist/src/keras_cnn.py | 85 +++++++++++ .../tf_cnn_mnist/src/mnist_utils.py | 118 +++++++++++++++ .../tf_cnn_mnist/src/taskrunner.py | 0 .../tf_cnn_mnist/src/tfmnist_inmemory.py | 39 +++++ 12 files changed, 447 insertions(+) create mode 100644 openfl-workspace/tf_cnn_mnist/.workspace create mode 100644 openfl-workspace/tf_cnn_mnist/plan/cols.yaml create mode 100644 openfl-workspace/tf_cnn_mnist/plan/data.yaml create mode 100644 openfl-workspace/tf_cnn_mnist/plan/defaults create mode 100644 openfl-workspace/tf_cnn_mnist/plan/plan.yaml create mode 100644 openfl-workspace/tf_cnn_mnist/requirements.txt create mode 100644 openfl-workspace/tf_cnn_mnist/src/__init__.py create mode 100644 openfl-workspace/tf_cnn_mnist/src/dataloader.py create mode 100644 openfl-workspace/tf_cnn_mnist/src/keras_cnn.py create mode 100644 openfl-workspace/tf_cnn_mnist/src/mnist_utils.py create mode 100644 openfl-workspace/tf_cnn_mnist/src/taskrunner.py create mode 100644 openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py diff --git a/openfl-workspace/tf_cnn_mnist/.workspace b/openfl-workspace/tf_cnn_mnist/.workspace new file mode 100644 index 0000000000..3c2c5d08b4 --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/.workspace @@ -0,0 +1,2 @@ +current_plan_name: default + diff --git a/openfl-workspace/tf_cnn_mnist/plan/cols.yaml b/openfl-workspace/tf_cnn_mnist/plan/cols.yaml new file mode 100644 index 0000000000..95307de3bc --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/plan/cols.yaml @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +collaborators: + \ No newline at end of file diff --git a/openfl-workspace/tf_cnn_mnist/plan/data.yaml b/openfl-workspace/tf_cnn_mnist/plan/data.yaml new file mode 100644 index 0000000000..257c7825fe --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/plan/data.yaml @@ -0,0 +1,7 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +# collaborator_name,data_directory_path +one,1 + + diff --git a/openfl-workspace/tf_cnn_mnist/plan/defaults b/openfl-workspace/tf_cnn_mnist/plan/defaults new file mode 100644 index 0000000000..fb82f9c5b6 --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/plan/defaults @@ -0,0 +1,2 @@ +../../workspace/plan/defaults + diff --git a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml new file mode 100644 index 0000000000..d3ebfe2921 --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml @@ -0,0 +1,42 @@ +# Copyright (C) 2020-2021 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/tf_cnn_mnist_init.pbuf + best_state_path : save/tf_cnn_mnist_best.pbuf + last_state_path : save/tf_cnn_mnist_last.pbuf + rounds_to_train : 10 + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : false + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : src.dataloader.TensorFlowMNISTInMemory + settings : + collaborator_count : 2 + data_group_name : mnist + batch_size : 256 + +task_runner : + defaults : plan/defaults/task_runner.yaml + template : src.taskrunner.TensorFlowCNN + +network : + defaults : plan/defaults/network.yaml + +assigner : + defaults : plan/defaults/assigner.yaml + +tasks : + defaults : plan/defaults/tasks_keras.yaml + +compression_pipeline : + defaults : plan/defaults/compression_pipeline.yaml diff --git a/openfl-workspace/tf_cnn_mnist/requirements.txt b/openfl-workspace/tf_cnn_mnist/requirements.txt new file mode 100644 index 0000000000..af80212eeb --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/requirements.txt @@ -0,0 +1 @@ +tensorflow==2.13 diff --git a/openfl-workspace/tf_cnn_mnist/src/__init__.py b/openfl-workspace/tf_cnn_mnist/src/__init__.py new file mode 100644 index 0000000000..f1410b1298 --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/src/__init__.py @@ -0,0 +1,3 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/tf_cnn_mnist/src/dataloader.py b/openfl-workspace/tf_cnn_mnist/src/dataloader.py new file mode 100644 index 0000000000..e3982a149b --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/src/dataloader.py @@ -0,0 +1,143 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from openfl.federated import TensorFlowDataLoader +from logging import getLogger + +import numpy as np +from tensorflow.python.keras.utils.data_utils import get_file + +logger = getLogger(__name__) + + +class TensorFlowMNISTInMemory(TensorFlowDataLoader): + """TensorFlow Data Loader for MNIST Dataset.""" + + def __init__(self, data_path, batch_size, **kwargs): + """ + Initialize. + + Args: + data_path: File path for the dataset + batch_size (int): The batch size for the data loader + **kwargs: Additional arguments, passed to super init and load_mnist_shard + """ + super().__init__(batch_size, **kwargs) + + num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( + shard_num=int(data_path), **kwargs + ) + + self.X_train = X_train + self.y_train = y_train + self.X_valid = X_valid + self.y_valid = y_valid + + self.num_classes = num_classes + + +def one_hot(labels, classes): + """ + One Hot encode a vector. + + Args: + labels (list): List of labels to onehot encode + classes (int): Total number of categorical classes + + Returns: + np.array: Matrix of one-hot encoded labels + """ + return np.eye(classes)[labels] + + +def _load_raw_datashards(shard_num, collaborator_count): + """ + Load the raw data by shard. + + Returns tuples of the dataset shard divided into training and validation. + + Args: + shard_num (int): The shard number to use + collaborator_count (int): The number of collaborators in the federation + + Returns: + 2 tuples: (image, label) of the training, validation dataset + """ + origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + path = get_file('mnist.npz', + origin=origin_folder + 'mnist.npz', + file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') + + with np.load(path) as f: + # get all of mnist + X_train_tot = f['x_train'] + y_train_tot = f['y_train'] + + X_valid_tot = f['x_test'] + y_valid_tot = f['y_test'] + + # create the shards + shard_num = int(shard_num) + X_train = X_train_tot[shard_num::collaborator_count] + y_train = y_train_tot[shard_num::collaborator_count] + + X_valid = X_valid_tot[shard_num::collaborator_count] + y_valid = y_valid_tot[shard_num::collaborator_count] + + return (X_train, y_train), (X_valid, y_valid) + + +def load_mnist_shard(shard_num, collaborator_count, categorical=True, + channels_last=True, **kwargs): + """ + Load the MNIST dataset. + + Args: + shard_num (int): The shard to use from the dataset + collaborator_count (int): The number of collaborators in the federation + categorical (bool): True = convert the labels to one-hot encoded + vectors (Default = True) + channels_last (bool): True = The input images have the channels + last (Default = True) + **kwargs: Additional parameters to pass to the function + + Returns: + list: The input shape + int: The number of classes + numpy.ndarray: The training data + numpy.ndarray: The training labels + numpy.ndarray: The validation data + numpy.ndarray: The validation labels + """ + img_rows, img_cols = 28, 28 + num_classes = 10 + + (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( + shard_num, collaborator_count + ) + + if channels_last: + X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) + X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) + else: + X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) + X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) + + X_train = X_train.astype('float32') + X_valid = X_valid.astype('float32') + X_train /= 255 + X_valid /= 255 + + logger.info(f'MNIST > X_train Shape : {X_train.shape}') + logger.info(f'MNIST > y_train Shape : {y_train.shape}') + logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') + logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') + + if categorical: + # convert class vectors to binary class matrices + y_train = one_hot(y_train, num_classes) + y_valid = one_hot(y_valid, num_classes) + + return num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/tf_cnn_mnist/src/keras_cnn.py b/openfl-workspace/tf_cnn_mnist/src/keras_cnn.py new file mode 100644 index 0000000000..35a71f7734 --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/src/keras_cnn.py @@ -0,0 +1,85 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +import tensorflow.keras as ke +from tensorflow.keras import Sequential +from tensorflow.keras.layers import Conv2D +from tensorflow.keras.layers import Dense +from tensorflow.keras.layers import Flatten + +from openfl.federated import KerasTaskRunner + + +class KerasCNN(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, **kwargs): + """ + Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') + + def build_model(self, + input_shape, + num_classes, + conv_kernel_size=(4, 4), + conv_strides=(2, 2), + conv1_channels_out=16, + conv2_channels_out=32, + final_dense_inputsize=100, + **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + + Returns: + tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + + """ + model = Sequential() + + model.add(Conv2D(conv1_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu', + input_shape=input_shape)) + + model.add(Conv2D(conv2_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu')) + + model.add(Flatten()) + + model.add(Dense(final_dense_inputsize, activation='relu')) + + model.add(Dense(num_classes, activation='softmax')) + + model.compile(loss=ke.losses.categorical_crossentropy, + optimizer=ke.optimizers.legacy.Adam(), + metrics=['accuracy']) + + # initialize the optimizer variables + opt_vars = model.optimizer.variables() + + for v in opt_vars: + v.initializer.run(session=self.sess) + + return model diff --git a/openfl-workspace/tf_cnn_mnist/src/mnist_utils.py b/openfl-workspace/tf_cnn_mnist/src/mnist_utils.py new file mode 100644 index 0000000000..d19e13d9dd --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/src/mnist_utils.py @@ -0,0 +1,118 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from logging import getLogger + +import numpy as np +from tensorflow.python.keras.utils.data_utils import get_file + +logger = getLogger(__name__) + + +def one_hot(labels, classes): + """ + One Hot encode a vector. + + Args: + labels (list): List of labels to onehot encode + classes (int): Total number of categorical classes + + Returns: + np.array: Matrix of one-hot encoded labels + """ + return np.eye(classes)[labels] + + +def _load_raw_datashards(shard_num, collaborator_count): + """ + Load the raw data by shard. + + Returns tuples of the dataset shard divided into training and validation. + + Args: + shard_num (int): The shard number to use + collaborator_count (int): The number of collaborators in the federation + + Returns: + 2 tuples: (image, label) of the training, validation dataset + """ + origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' + path = get_file('mnist.npz', + origin=origin_folder + 'mnist.npz', + file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') + + with np.load(path) as f: + # get all of mnist + X_train_tot = f['x_train'] + y_train_tot = f['y_train'] + + X_valid_tot = f['x_test'] + y_valid_tot = f['y_test'] + + # create the shards + shard_num = int(shard_num) + X_train = X_train_tot[shard_num::collaborator_count] + y_train = y_train_tot[shard_num::collaborator_count] + + X_valid = X_valid_tot[shard_num::collaborator_count] + y_valid = y_valid_tot[shard_num::collaborator_count] + + return (X_train, y_train), (X_valid, y_valid) + + +def load_mnist_shard(shard_num, collaborator_count, categorical=True, + channels_last=True, **kwargs): + """ + Load the MNIST dataset. + + Args: + shard_num (int): The shard to use from the dataset + collaborator_count (int): The number of collaborators in the federation + categorical (bool): True = convert the labels to one-hot encoded + vectors (Default = True) + channels_last (bool): True = The input images have the channels + last (Default = True) + **kwargs: Additional parameters to pass to the function + + Returns: + list: The input shape + int: The number of classes + numpy.ndarray: The training data + numpy.ndarray: The training labels + numpy.ndarray: The validation data + numpy.ndarray: The validation labels + """ + img_rows, img_cols = 28, 28 + num_classes = 10 + + (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( + shard_num, collaborator_count + ) + + if channels_last: + X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) + X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + else: + X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) + X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) + + X_train = X_train.astype('float32') + X_valid = X_valid.astype('float32') + X_train /= 255 + X_valid /= 255 + + logger.info(f'MNIST > X_train Shape : {X_train.shape}') + logger.info(f'MNIST > y_train Shape : {y_train.shape}') + logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') + logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') + + if categorical: + # convert class vectors to binary class matrices + y_train = one_hot(y_train, num_classes) + y_valid = one_hot(y_valid, num_classes) + + return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py b/openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py new file mode 100644 index 0000000000..dec2bc2808 --- /dev/null +++ b/openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py @@ -0,0 +1,39 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +from openfl.federated import TensorFlowDataLoader +from .mnist_utils import load_mnist_shard + + +class TensorFlowMNISTInMemory(TensorFlowDataLoader): + """TensorFlow Data Loader for MNIST Dataset.""" + + def __init__(self, data_path, batch_size, **kwargs): + """ + Initialize. + + Args: + data_path: File path for the dataset + batch_size (int): The batch size for the data loader + **kwargs: Additional arguments, passed to super init and load_mnist_shard + """ + super().__init__(batch_size, **kwargs) + + # TODO: We should be downloading the dataset shard into a directory + # TODO: There needs to be a method to ask how many collaborators and + # what index/rank is this collaborator. + # Then we have a way to automatically shard based on rank and size of + # collaborator list. + + _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( + shard_num=int(data_path), **kwargs + ) + + self.X_train = X_train + self.y_train = y_train + self.X_valid = X_valid + self.y_valid = y_valid + + self.num_classes = num_classes From d09236724f8cd024db16e3afb8a4c580ef67772b Mon Sep 17 00:00:00 2001 From: kta-intel Date: Tue, 14 May 2024 15:03:03 -0700 Subject: [PATCH 02/23] updates to tf-keras taskrunner Signed-off-by: kta-intel --- .../tf_cnn_mnist/src/taskrunner.py | 114 ++++++++++++++++++ .../workspace/plan/defaults/tasks_keras.yaml | 6 +- openfl/federated/task/runner_keras.py | 34 +++--- 3 files changed, 134 insertions(+), 20 deletions(-) diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index e69de29bb2..e9dfb6c70c 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -0,0 +1,114 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +import numpy as np +import tensorflow as tf + +from openfl.utilities import Metric +from openfl.federated import KerasTaskRunner + + +class TensorFlowCNN(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, **kwargs): + """ + Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') + + def build_model(self, + input_shape, + num_classes, + conv_kernel_size=(4, 4), + conv_strides=(2, 2), + conv1_channels_out=16, + conv2_channels_out=32, + final_dense_inputsize=100, + **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + + Returns: + tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + + """ + model = tf.keras.models.Sequential([ + tf.keras.layers.Conv2D(conv1_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu', + input_shape=input_shape), + tf.keras.layers.Conv2D(conv2_channels_out, + kernel_size=conv_kernel_size, + strides=conv_strides, + activation='relu'), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(final_dense_inputsize, activation='relu'), + tf.keras.layers.Dense(num_classes, activation='softmax') + ]) + + model.compile(loss=tf.keras.losses.categorical_crossentropy, + optimizer=tf.keras.optimizers.legacy.Adam(), + metrics=['accuracy']) + + # initialize the optimizer variables + opt_vars = model.optimizer.variables() + + for v in opt_vars: + v.initializer.run(session=self.sess) + + return model + + + def train_(self, batch_generator, metrics: list = None, **kwargs): + """Train single epoch. + + Override this function for custom training. + + Args: + batch_generator: Generator of training batches. + Each batch is a tuple of N train images and N train labels + where N is the batch size of the DataLoader of the current TaskRunner instance. + + epochs: Number of epochs to train. + metrics: Names of metrics to save. + """ + if metrics is None: + metrics = [] + + model_metrics_names = self.model.metrics_names + + for param in metrics: + if param not in model_metrics_names: + raise ValueError( + f'KerasTaskRunner does not support specifying new metrics. ' + f'Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}' + ) + + history = self.model.fit(batch_generator, + verbose=1, + **kwargs) + results = [] + for metric in metrics: + value = np.mean([history.history[metric]]) + results.append(Metric(name=metric, value=np.array(value))) + return results diff --git a/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml b/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml index 79d067d8d2..0ef460da87 100644 --- a/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml +++ b/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml @@ -1,5 +1,5 @@ aggregated_model_validation: - function : validate + function : validate_task kwargs : batch_size : 32 apply : global @@ -7,7 +7,7 @@ aggregated_model_validation: - accuracy locally_tuned_model_validation: - function : validate + function : validate_task kwargs : batch_size : 32 apply : local @@ -15,7 +15,7 @@ locally_tuned_model_validation: - accuracy train: - function : train + function : train_task kwargs : batch_size : 32 epochs : 1 diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py index c7daaa3d33..030bed9675 100644 --- a/openfl/federated/task/runner_keras.py +++ b/openfl/federated/task/runner_keras.py @@ -61,8 +61,8 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False): else: self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - def train(self, col_name, round_num, input_tensor_dict, - metrics, epochs=1, batch_size=1, **kwargs): + def train_task(self, col_name, round_num, input_tensor_dict, + metrics, epochs=1, batch_size=1, **kwargs): """ Perform the training. @@ -81,7 +81,7 @@ def train(self, col_name, round_num, input_tensor_dict, self.rebuild_model(round_num, input_tensor_dict) for epoch in range(epochs): self.logger.info(f'Run {epoch} epoch of {round_num} round') - results = self.train_iteration(self.data_loader.get_train_loader(batch_size), + results = self.train_(self.data_loader.get_train_loader(batch_size), metrics=metrics, **kwargs) @@ -145,7 +145,7 @@ def train(self, col_name, round_num, input_tensor_dict, return global_tensor_dict, local_tensor_dict - def train_iteration(self, batch_generator, metrics: list = None, **kwargs): + def train_(self, batch_generator, metrics: list = None, **kwargs): """Train single epoch. Override this function for custom training. @@ -185,7 +185,7 @@ def train_iteration(self, batch_generator, metrics: list = None, **kwargs): results.append(Metric(name=metric, value=np.array(value))) return results - def validate(self, col_name, round_num, input_tensor_dict, **kwargs): + def validate_task(self, col_name, round_num, input_tensor_dict, **kwargs): """ Run the trained model on validation data; report results. @@ -396,7 +396,7 @@ def set_required_tensorkeys_for_function(self, func_name, # of the methods in the class and declare the tensors. # For now this is done manually - if func_name == 'validate': + if func_name == 'validate_task': # Should produce 'apply=global' or 'apply=local' local_model = 'apply' + kwargs['apply'] self.required_tensorkeys_for_function[func_name][ @@ -419,7 +419,7 @@ def get_required_tensorkeys_for_function(self, func_name, **kwargs): List [TensorKey] """ - if func_name == 'validate': + if func_name == 'validate_task': local_model = 'apply=' + str(kwargs['apply']) return self.required_tensorkeys_for_function[func_name][local_model] else: @@ -449,19 +449,19 @@ def update_tensorkeys_for_functions(self): opt_names = self._get_weights_names(self.model.optimizer) tensor_names = model_layer_names + opt_names self.logger.debug(f'Updating model tensor names: {tensor_names}') - self.required_tensorkeys_for_function['train'] = [ + self.required_tensorkeys_for_function['train_task'] = [ TensorKey(tensor_name, 'GLOBAL', 0, ('model',)) for tensor_name in tensor_names ] # Validation may be performed on local or aggregated (global) model, # so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function['validate'] = {} - self.required_tensorkeys_for_function['validate']['local_model=True'] = [ + self.required_tensorkeys_for_function['validate_task'] = {} + self.required_tensorkeys_for_function['validate_task']['local_model=True'] = [ TensorKey(tensor_name, 'LOCAL', 0, ('trained',)) for tensor_name in tensor_names ] - self.required_tensorkeys_for_function['validate']['local_model=False'] = [ + self.required_tensorkeys_for_function['validate_task']['local_model=False'] = [ TensorKey(tensor_name, 'GLOBAL', 0, ('model',)) for tensor_name in tensor_names ] @@ -502,31 +502,31 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): **self.tensor_dict_split_fn_kwargs ) - self.required_tensorkeys_for_function['train'] = [ + self.required_tensorkeys_for_function['train_task'] = [ TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) for tensor_name in global_model_dict ] - self.required_tensorkeys_for_function['train'] += [ + self.required_tensorkeys_for_function['train_task'] += [ TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) for tensor_name in local_model_dict ] # Validation may be performed on local or aggregated (global) model, # so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function['validate'] = {} + self.required_tensorkeys_for_function['validate_task'] = {} # TODO This is not stateless. The optimizer will not be - self.required_tensorkeys_for_function['validate']['apply=local'] = [ + self.required_tensorkeys_for_function['validate_task']['apply=local'] = [ TensorKey(tensor_name, 'LOCAL', 0, False, ('trained',)) for tensor_name in { **global_model_dict_val, **local_model_dict_val } ] - self.required_tensorkeys_for_function['validate']['apply=global'] = [ + self.required_tensorkeys_for_function['validate_task']['apply=global'] = [ TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) for tensor_name in global_model_dict_val ] - self.required_tensorkeys_for_function['validate']['apply=global'] += [ + self.required_tensorkeys_for_function['validate_task']['apply=global'] += [ TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) for tensor_name in local_model_dict_val ] From c7452e72eb6b20199c8ce71280c641311b830d43 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 6 Jun 2024 12:42:50 -0700 Subject: [PATCH 03/23] update runner_tf.py to run on keras api with non-legacy optimizers. create new tf_cnn_mnist workspace Signed-off-by: kta-intel --- openfl-workspace/tf_cnn_mnist/plan/plan.yaml | 6 +- .../tf_cnn_mnist/requirements.txt | 2 +- .../tf_cnn_mnist/src/keras_cnn.py | 85 --- .../tf_cnn_mnist/src/mnist_utils.py | 118 ---- .../tf_cnn_mnist/src/taskrunner.py | 26 +- .../tf_cnn_mnist/src/tfmnist_inmemory.py | 39 -- .../plan/defaults/tasks_tensorflow.yaml | 16 +- openfl/federated/task/__init__.py | 10 +- openfl/federated/task/runner_tf.py | 601 +++++++++++++++++- 9 files changed, 603 insertions(+), 300 deletions(-) delete mode 100644 openfl-workspace/tf_cnn_mnist/src/keras_cnn.py delete mode 100644 openfl-workspace/tf_cnn_mnist/src/mnist_utils.py delete mode 100644 openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py diff --git a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml index d3ebfe2921..72928c5196 100644 --- a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml +++ b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2024 Intel Corporation # Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. aggregator : @@ -8,7 +8,7 @@ aggregator : init_state_path : save/tf_cnn_mnist_init.pbuf best_state_path : save/tf_cnn_mnist_best.pbuf last_state_path : save/tf_cnn_mnist_last.pbuf - rounds_to_train : 10 + rounds_to_train : 2 collaborator : defaults : plan/defaults/collaborator.yaml @@ -36,7 +36,7 @@ assigner : defaults : plan/defaults/assigner.yaml tasks : - defaults : plan/defaults/tasks_keras.yaml + defaults : plan/defaults/tasks_tensorflow.yaml compression_pipeline : defaults : plan/defaults/compression_pipeline.yaml diff --git a/openfl-workspace/tf_cnn_mnist/requirements.txt b/openfl-workspace/tf_cnn_mnist/requirements.txt index af80212eeb..4a8d507a47 100644 --- a/openfl-workspace/tf_cnn_mnist/requirements.txt +++ b/openfl-workspace/tf_cnn_mnist/requirements.txt @@ -1 +1 @@ -tensorflow==2.13 +tensorflow==2.15.1 \ No newline at end of file diff --git a/openfl-workspace/tf_cnn_mnist/src/keras_cnn.py b/openfl-workspace/tf_cnn_mnist/src/keras_cnn.py deleted file mode 100644 index 35a71f7734..0000000000 --- a/openfl-workspace/tf_cnn_mnist/src/keras_cnn.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow.keras as ke -from tensorflow.keras import Sequential -from tensorflow.keras.layers import Conv2D -from tensorflow.keras.layers import Dense -from tensorflow.keras.layers import Flatten - -from openfl.federated import KerasTaskRunner - - -class KerasCNN(KerasTaskRunner): - """A basic convolutional neural network model.""" - - def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - """ - super().__init__(**kwargs) - - self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) - - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info) - - self.logger.info(f'Train Set Size : {self.get_train_data_size()}') - self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') - - def build_model(self, - input_shape, - num_classes, - conv_kernel_size=(4, 4), - conv_strides=(2, 2), - conv1_channels_out=16, - conv2_channels_out=32, - final_dense_inputsize=100, - **kwargs): - """ - Define the model architecture. - - Args: - input_shape (numpy.ndarray): The shape of the data - num_classes (int): The number of classes of the dataset - - Returns: - tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras - - """ - model = Sequential() - - model.add(Conv2D(conv1_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu', - input_shape=input_shape)) - - model.add(Conv2D(conv2_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu')) - - model.add(Flatten()) - - model.add(Dense(final_dense_inputsize, activation='relu')) - - model.add(Dense(num_classes, activation='softmax')) - - model.compile(loss=ke.losses.categorical_crossentropy, - optimizer=ke.optimizers.legacy.Adam(), - metrics=['accuracy']) - - # initialize the optimizer variables - opt_vars = model.optimizer.variables() - - for v in opt_vars: - v.initializer.run(session=self.sess) - - return model diff --git a/openfl-workspace/tf_cnn_mnist/src/mnist_utils.py b/openfl-workspace/tf_cnn_mnist/src/mnist_utils.py deleted file mode 100644 index d19e13d9dd..0000000000 --- a/openfl-workspace/tf_cnn_mnist/src/mnist_utils.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from logging import getLogger - -import numpy as np -from tensorflow.python.keras.utils.data_utils import get_file - -logger = getLogger(__name__) - - -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - -def _load_raw_datashards(shard_num, collaborator_count): - """ - Load the raw data by shard. - - Returns tuples of the dataset shard divided into training and validation. - - Args: - shard_num (int): The shard number to use - collaborator_count (int): The number of collaborators in the federation - - Returns: - 2 tuples: (image, label) of the training, validation dataset - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file('mnist.npz', - origin=origin_folder + 'mnist.npz', - file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') - - with np.load(path) as f: - # get all of mnist - X_train_tot = f['x_train'] - y_train_tot = f['y_train'] - - X_valid_tot = f['x_test'] - y_valid_tot = f['y_test'] - - # create the shards - shard_num = int(shard_num) - X_train = X_train_tot[shard_num::collaborator_count] - y_train = y_train_tot[shard_num::collaborator_count] - - X_valid = X_valid_tot[shard_num::collaborator_count] - y_valid = y_valid_tot[shard_num::collaborator_count] - - return (X_train, y_train), (X_valid, y_valid) - - -def load_mnist_shard(shard_num, collaborator_count, categorical=True, - channels_last=True, **kwargs): - """ - Load the MNIST dataset. - - Args: - shard_num (int): The shard to use from the dataset - collaborator_count (int): The number of collaborators in the federation - categorical (bool): True = convert the labels to one-hot encoded - vectors (Default = True) - channels_last (bool): True = The input images have the channels - last (Default = True) - **kwargs: Additional parameters to pass to the function - - Returns: - list: The input shape - int: The number of classes - numpy.ndarray: The training data - numpy.ndarray: The training labels - numpy.ndarray: The validation data - numpy.ndarray: The validation labels - """ - img_rows, img_cols = 28, 28 - num_classes = 10 - - (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( - shard_num, collaborator_count - ) - - if channels_last: - X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) - X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - else: - X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) - X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) - - X_train = X_train.astype('float32') - X_valid = X_valid.astype('float32') - X_train /= 255 - X_valid /= 255 - - logger.info(f'MNIST > X_train Shape : {X_train.shape}') - logger.info(f'MNIST > y_train Shape : {y_train.shape}') - logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') - logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') - - if categorical: - # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) - - return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index e9dfb6c70c..f3382b3339 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -7,10 +7,10 @@ import tensorflow as tf from openfl.utilities import Metric -from openfl.federated import KerasTaskRunner +from openfl.federated import TensorFlowTaskRunner -class TensorFlowCNN(KerasTaskRunner): +class TensorFlowCNN(TensorFlowTaskRunner): """A basic convolutional neural network model.""" def __init__(self, **kwargs): @@ -51,31 +51,19 @@ def build_model(self, tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras """ + model = tf.keras.models.Sequential([ - tf.keras.layers.Conv2D(conv1_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu', - input_shape=input_shape), - tf.keras.layers.Conv2D(conv2_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu'), + tf.keras.layers.Conv2D(conv1_channels_out, kernel_size=conv_kernel_size, strides=conv_strides, activation='relu', input_shape=input_shape), + tf.keras.layers.Conv2D(conv2_channels_out, kernel_size=conv_kernel_size, strides=conv_strides, activation='relu'), tf.keras.layers.Flatten(), tf.keras.layers.Dense(final_dense_inputsize, activation='relu'), tf.keras.layers.Dense(num_classes, activation='softmax') ]) model.compile(loss=tf.keras.losses.categorical_crossentropy, - optimizer=tf.keras.optimizers.legacy.Adam(), + optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) - # initialize the optimizer variables - opt_vars = model.optimizer.variables() - - for v in opt_vars: - v.initializer.run(session=self.sess) - return model @@ -100,7 +88,7 @@ def train_(self, batch_generator, metrics: list = None, **kwargs): for param in metrics: if param not in model_metrics_names: raise ValueError( - f'KerasTaskRunner does not support specifying new metrics. ' + f'TensorFlowTaskRunner does not support specifying new metrics. ' f'Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}' ) diff --git a/openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py b/openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py deleted file mode 100644 index dec2bc2808..0000000000 --- a/openfl-workspace/tf_cnn_mnist/src/tfmnist_inmemory.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import TensorFlowDataLoader -from .mnist_utils import load_mnist_shard - - -class TensorFlowMNISTInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for MNIST Dataset.""" - - def __init__(self, data_path, batch_size, **kwargs): - """ - Initialize. - - Args: - data_path: File path for the dataset - batch_size (int): The batch size for the data loader - **kwargs: Additional arguments, passed to super init and load_mnist_shard - """ - super().__init__(batch_size, **kwargs) - - # TODO: We should be downloading the dataset shard into a directory - # TODO: There needs to be a method to ask how many collaborators and - # what index/rank is this collaborator. - # Then we have a way to automatically shard based on rank and size of - # collaborator list. - - _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( - shard_num=int(data_path), **kwargs - ) - - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid - - self.num_classes = num_classes diff --git a/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml index 6d000cc618..e3d5348ca3 100644 --- a/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml +++ b/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml @@ -1,23 +1,23 @@ aggregated_model_validation: - function : validate + function : validate_task kwargs : batch_size : 32 apply : global metrics : - - acc + - accuracy locally_tuned_model_validation: - function : validate + function : validate_task kwargs : batch_size : 32 apply : local metrics : - - acc + - accuracy train: - function : train_batches + function : train_task kwargs : - batch_size : 32 - metrics : + batch_size : 32 + metrics : - loss - epochs : 1 + epochs : 1 diff --git a/openfl/federated/task/__init__.py b/openfl/federated/task/__init__.py index b5efcdcd50..e0e16a4a81 100644 --- a/openfl/federated/task/__init__.py +++ b/openfl/federated/task/__init__.py @@ -3,23 +3,23 @@ """Task package.""" -import pkgutil +import importlib.util from warnings import catch_warnings from warnings import simplefilter with catch_warnings(): simplefilter(action='ignore', category=FutureWarning) - if pkgutil.find_loader('tensorflow'): + if importlib.util.find_spec('tensorflow') is not None: # ignore deprecation warnings in command-line interface import tensorflow # NOQA from .runner import TaskRunner # NOQA - -if pkgutil.find_loader('tensorflow'): +if importlib.util.find_spec('tensorflow') is not None: from .runner_tf import TensorFlowTaskRunner # NOQA + from .runner_tf import TensorFlowTaskRunner_v1 # NOQA from .runner_keras import KerasTaskRunner # NOQA from .fl_model import FederatedModel # NOQA -if pkgutil.find_loader('torch'): +if importlib.util.find_spec('torch') is not None: from .runner_pt import PyTorchTaskRunner # NOQA from .fl_model import FederatedModel # NOQA diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py index f63ffce3f8..eee5f14ecd 100644 --- a/openfl/federated/task/runner_tf.py +++ b/openfl/federated/task/runner_tf.py @@ -1,18 +1,575 @@ -# Copyright (C) 2020-2023 Intel Corporation +# Copyright (C) 2020-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """TensorFlowTaskRunner module.""" - import numpy as np -import tensorflow.compat.v1 as tf -from tqdm import tqdm +import tensorflow as tf +from openfl.utilities import change_tags, Metric, TensorKey from openfl.utilities.split import split_tensor_dict_for_holdouts -from openfl.utilities import TensorKey from .runner import TaskRunner class TensorFlowTaskRunner(TaskRunner): + """The base model for Keras models in the federation.""" + + def __init__(self, **kwargs): + """ + Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = tf.keras.Model() + + self.model_tensor_names = [] + + # this is a map of all of the required tensors for each of the public + # functions in KerasTaskRunner + self.required_tensorkeys_for_function = {} + tf.keras.backend.clear_session() + + def rebuild_model(self, round_num, input_tensor_dict, validation=False): + """ + Parse tensor names and update weights of model. Handles the optimizer treatment. + + Returns + ------- + None + """ + + if self.opt_treatment == 'RESET': + self.reset_opt_vars() + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + elif (round_num > 0 and self.opt_treatment == 'CONTINUE_GLOBAL' + and not validation): + self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) + else: + self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) + + def train_task(self, col_name, round_num, input_tensor_dict, + metrics, epochs=1, batch_size=1, **kwargs): + """ + Perform the training. + + Is expected to perform draws randomly, without replacement until data is exausted. + Then data is replaced and shuffled and draws continue. + + Returns + ------- + dict + 'TensorKey: nparray' + """ + if metrics is None: + raise KeyError('metrics must be defined') + + # rebuild model with updated weights + self.rebuild_model(round_num, input_tensor_dict) + for epoch in range(epochs): + self.logger.info(f'Run {epoch} epoch of {round_num} round') + results = self.train_(self.data_loader.get_train_loader(batch_size), + metrics=metrics, + **kwargs) + + # output metric tensors (scalar) + origin = col_name + tags = ('trained',) + output_metric_dict = { + TensorKey( + metric_name, origin, round_num, True, ('metric',) + ): metric_value + for (metric_name, metric_value) in results + } + + # output model tensors (Doesn't include TensorKey) + output_model_dict = self.get_tensor_dict(with_opt_vars=True) + global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( + self.logger, output_model_dict, + **self.tensor_dict_split_fn_kwargs + ) + + # create global tensorkeys + global_tensorkey_model_dict = { + TensorKey(tensor_name, origin, round_num, False, tags): + nparray for tensor_name, nparray in global_model_dict.items() + } + # create tensorkeys that should stay local + local_tensorkey_model_dict = { + TensorKey(tensor_name, origin, round_num, False, tags): + nparray for tensor_name, nparray in local_model_dict.items() + } + # the train/validate aggregated function of the next round will look + # for the updated model parameters. + # this ensures they will be resolved locally + next_local_tensorkey_model_dict = { + TensorKey( + tensor_name, origin, round_num + 1, False, ('model',) + ): nparray for tensor_name, nparray in local_model_dict.items() + } + + global_tensor_dict = { + **output_metric_dict, + **global_tensorkey_model_dict + } + local_tensor_dict = { + **local_tensorkey_model_dict, + **next_local_tensorkey_model_dict + } + + # update the required tensors if they need to be pulled from the + # aggregator + # TODO this logic can break if different collaborators have different + # roles between rounds. + # for example, if a collaborator only performs validation in the first + # round but training in the second, it has no way of knowing the + # optimizer state tensor names to request from the aggregator because + # these are only created after training occurs. A work around could + # involve doing a single epoch of training on random data to get the + # optimizer names, and then throwing away the model. + if self.opt_treatment == 'CONTINUE_GLOBAL': + self.initialize_tensorkeys_for_functions(with_opt_vars=True) + + return global_tensor_dict, local_tensor_dict + + def train_(self, batch_generator, metrics: list = None, **kwargs): + """Train single epoch. + + Override this function for custom training. + + Args: + batch_generator: Generator of training batches. + Each batch is a tuple of N train images and N train labels + where N is the batch size of the DataLoader of the current TaskRunner instance. + + epochs: Number of epochs to train. + metrics: Names of metrics to save. + """ + if metrics is None: + metrics = [] + # TODO Currently assuming that all metrics are defined at + # initialization (build_model). + # If metrics are added (i.e. not a subset of what was originally + # defined) then the model must be recompiled. + model_metrics_names = self.model.metrics_names + + # TODO if there are new metrics in the flplan that were not included + # in the originally + # compiled model, that behavior is not currently handled. + for param in metrics: + if param not in model_metrics_names: + raise ValueError( + f'KerasTaskRunner does not support specifying new metrics. ' + f'Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}' + ) + + history = self.model.fit(batch_generator, + verbose=1, + **kwargs) + results = [] + for metric in metrics: + value = np.mean([history.history[metric]]) + results.append(Metric(name=metric, value=np.array(value))) + return results + + def validate_task(self, col_name, round_num, input_tensor_dict, **kwargs): + """ + Run the trained model on validation data; report results. + + Parameters + ---------- + input_tensor_dict : either the last aggregated or locally trained model + + Returns + ------- + output_tensor_dict : {TensorKey: nparray} (these correspond to acc, + precision, f1_score, etc.) + """ + if 'batch_size' in kwargs: + batch_size = kwargs['batch_size'] + else: + batch_size = 1 + + self.rebuild_model(round_num, input_tensor_dict, validation=True) + param_metrics = kwargs['metrics'] + + vals = self.model.evaluate( + self.data_loader.get_valid_loader(batch_size), + verbose=1 + ) + model_metrics_names = self.model.metrics_names + if type(vals) is not list: + vals = [vals] + ret_dict = dict(zip(model_metrics_names, vals)) + + # TODO if there are new metrics in the flplan that were not included in + # the originally compiled model, that behavior is not currently + # handled. + for param in param_metrics: + if param not in model_metrics_names: + raise ValueError( + f'KerasTaskRunner does not support specifying new metrics. ' + f'Param_metrics = {param_metrics}, model_metrics_names = {model_metrics_names}' + ) + + origin = col_name + suffix = 'validate' + if kwargs['apply'] == 'local': + suffix += '_local' + else: + suffix += '_agg' + tags = ('metric',) + tags = change_tags(tags, add_field=suffix) + output_tensor_dict = { + TensorKey(metric, origin, round_num, True, tags): + np.array(ret_dict[metric]) + for metric in param_metrics} + + return output_tensor_dict, {} + + def save_native(self, filepath): + """Save model.""" + self.model.save(filepath) + + def load_native(self, filepath): + """Load model.""" + self.model = tf.keras.models.load_model(filepath) + + @staticmethod + def _get_weights_names(obj, with_opt_vars): + """ + Get the list of weight names. + + Parameters + ---------- + obj : Model or Optimizer + The target object that we want to get the weights. + + with_opt_vars (bool): Specify if we want to get optimizer weights + + Returns + ------- + dict + The weight name list + """ + if with_opt_vars: + # When acquiring optimizer weights, check optimizer version. + # Current optimizer does not use 'weights' attributes + if 'legacy' in obj.__class__.__module__: + weight_names = [weight.name for weight in obj.weights] + else: + weight_names = [weight.name for weight in obj.variables] + + weight_names = [weight.name for weight in obj.weights] + return weight_names + + @staticmethod + def _get_weights_dict(obj, suffix='', with_opt_vars=False): + """ + Get the dictionary of weights. + + Parameters + ---------- + obj : Model or Optimizer + The target object that we want to get the weights. + + with_opt_vars (bool): Specify if we want to get optimizer weights + + Returns + ------- + dict + The weight dictionary. + """ + + weights_dict = {} + if with_opt_vars: + # When acquiring optimizer weights, check optimizer version. + # Current optimizer does not use 'weights' or '.get_weights()' attributes + if 'legacy' in obj.__class__.__module__: + weight_names = [weight.name for weight in obj.weights] + weight_values = obj.get_weights() + else: + weight_names = [weight.name for weight in obj.variables] + weight_values = [weight.numpy() for weight in obj.variables] + else: + weight_names = [weight.name for weight in obj.weights] + weight_values = obj.get_weights() + + + for name, value in zip(weight_names, weight_values): + weights_dict[name + suffix] = value + return weights_dict + + @staticmethod + def _set_weights_dict(obj, weights_dict, with_opt_vars=False): + """Set the object weights with a dictionary. + + The obj can be a model or an optimizer. + + Args: + obj (Model or Optimizer): The target object that we want to set + the weights. + weights_dict (dict): The weight dictionary. + with_opt_vars (bool): Specify if we want to set optimizer weights + Returns: + None + """ + + if with_opt_vars: + # When acquiring optimizer weights, check optimizer version. + # Current optimizer does not use 'weights' attributes + if 'legacy' in obj.__class__.__module__: + weight_names = [weight.name for weight in obj.weights] + else: + weight_names = [weight.name for weight in obj.variables] + else: + weight_names = [weight.name for weight in obj.weights] + + weight_values = [weights_dict[name] for name in weight_names] + + obj.set_weights(weight_values) + + def get_tensor_dict(self, with_opt_vars, suffix=''): + """ + Get the model weights as a tensor dictionary. + + Parameters + ---------- + with_opt_vars : bool + If we should include the optimizer's status. + suffix : string + Universally + + Returns: + dict: The tensor dictionary. + """ + + model_weights = self._get_weights_dict(self.model, suffix) + + if with_opt_vars: + + opt_weights = self._get_weights_dict(self.model.optimizer, suffix, with_opt_vars) + + model_weights.update(opt_weights) + + if len(opt_weights) == 0: + self.logger.debug( + "WARNING: We didn't find variables for the optimizer.") + return model_weights + + def set_tensor_dict(self, tensor_dict, with_opt_vars): + """ + Set the model weights with a tensor dictionary. + + Args: + tensor_dict: the tensor dictionary + with_opt_vars (bool): True = include the optimizer's status. + """ + if with_opt_vars is False: + # It is possible to pass in opt variables from the input tensor dict + # This will make sure that the correct layers are updated + model_weight_names = [weight.name for weight in self.model.weights] + model_weights_dict = { + name: tensor_dict[name] for name in model_weight_names + } + self._set_weights_dict(self.model, model_weights_dict) + else: + model_weight_names = [ + weight.name for weight in self.model.weights + ] + model_weights_dict = { + name: tensor_dict[name] for name in model_weight_names + } + if 'legacy' in self.model.optimizer.__class__.__module__: + opt_weight_names = [ + weight.name for weight in self.model.optimizer.weights + ] + else: + opt_weight_names = [ + weight.name for weight in self.model.optimizer.variables + ] + + opt_weights_dict = { + name: tensor_dict[name] for name in opt_weight_names + } + self._set_weights_dict(self.model, model_weights_dict) + self._set_weights_dict(self.model.optimizer, opt_weights_dict, with_opt_vars) + + def reset_opt_vars(self): + """ + Reset optimizer variables. + + Resets the optimizer variables + + """ + for var in self.model.optimizer.variables(): + var.assign(tf.zeros_like(var)) + self.logger.debug('Optimizer variables reset') + + def set_required_tensorkeys_for_function(self, func_name, + tensor_key, **kwargs): + """ + Set the required tensors for specified function that could be called as part of a task. + + By default, this is just all of the layers and optimizer of the model. + Custom tensors should be added to this function + + Parameters + ---------- + func_name: string + tensor_key: TensorKey (namedtuple) + **kwargs: Any function arguments {} + + Returns + ------- + None + """ + # TODO there should be a way to programmatically iterate through all + # of the methods in the class and declare the tensors. + # For now this is done manually + + if func_name == 'validate_task': + # Should produce 'apply=global' or 'apply=local' + local_model = 'apply' + kwargs['apply'] + self.required_tensorkeys_for_function[func_name][ + local_model].append(tensor_key) + else: + self.required_tensorkeys_for_function[func_name].append(tensor_key) + + def get_required_tensorkeys_for_function(self, func_name, **kwargs): + """ + Get the required tensors for specified function that could be called as part of a task. + + By default, this is just all of the layers and optimizer of the model. + + Parameters + ---------- + None + + Returns + ------- + List + [TensorKey] + """ + if func_name == 'validate_task': + local_model = 'apply=' + str(kwargs['apply']) + return self.required_tensorkeys_for_function[func_name][local_model] + else: + return self.required_tensorkeys_for_function[func_name] + + def update_tensorkeys_for_functions(self): + """ + Update the required tensors for all publicly accessible methods \ + that could be called as part of a task. + + By default, this is just all of the layers and optimizer of the model. + Custom tensors should be added to this function + + Parameters + ---------- + None + + Returns + ------- + None + """ + # TODO complete this function. It is only needed for opt_treatment, + # and making the model stateless + + # Minimal required tensors for train function + model_layer_names = self._get_weights_names(self.model) + opt_names = self._get_weights_names(self.model.optimizer) + tensor_names = model_layer_names + opt_names + self.logger.debug(f'Updating model tensor names: {tensor_names}') + self.required_tensorkeys_for_function['train_task'] = [ + TensorKey(tensor_name, 'GLOBAL', 0, ('model',)) + for tensor_name in tensor_names + ] + + # Validation may be performed on local or aggregated (global) model, + # so there is an extra lookup dimension for kwargs + self.required_tensorkeys_for_function['validate_task'] = {} + self.required_tensorkeys_for_function['validate_task']['local_model=True'] = [ + TensorKey(tensor_name, 'LOCAL', 0, ('trained',)) + for tensor_name in tensor_names + ] + self.required_tensorkeys_for_function['validate_task']['local_model=False'] = [ + TensorKey(tensor_name, 'GLOBAL', 0, ('model',)) + for tensor_name in tensor_names + ] + + def initialize_tensorkeys_for_functions(self, with_opt_vars=False): + """ + Set the required tensors for all publicly accessible methods \ + that could be called as part of a task. + + By default, this is just all of the layers and optimizer of the model. + Custom tensors should be added to this function + + Parameters + ---------- + None + + Returns + ------- + None + """ + # TODO there should be a way to programmatically iterate through all + # of the methods in the class and declare the tensors. + # For now this is done manually + + output_model_dict = self.get_tensor_dict(with_opt_vars=with_opt_vars) + global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( + self.logger, output_model_dict, + **self.tensor_dict_split_fn_kwargs + ) + if not with_opt_vars: + global_model_dict_val = global_model_dict + local_model_dict_val = local_model_dict + else: + output_model_dict = self.get_tensor_dict(with_opt_vars=False) + global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( + self.logger, + output_model_dict, + **self.tensor_dict_split_fn_kwargs + ) + + self.required_tensorkeys_for_function['train_task'] = [ + TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) + for tensor_name in global_model_dict + ] + self.required_tensorkeys_for_function['train_task'] += [ + TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) + for tensor_name in local_model_dict + ] + + # Validation may be performed on local or aggregated (global) model, + # so there is an extra lookup dimension for kwargs + self.required_tensorkeys_for_function['validate_task'] = {} + # TODO This is not stateless. The optimizer will not be + self.required_tensorkeys_for_function['validate_task']['apply=local'] = [ + TensorKey(tensor_name, 'LOCAL', 0, False, ('trained',)) + for tensor_name in { + **global_model_dict_val, + **local_model_dict_val + } + ] + self.required_tensorkeys_for_function['validate_task']['apply=global'] = [ + TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) + for tensor_name in global_model_dict_val + ] + self.required_tensorkeys_for_function['validate_task']['apply=global'] += [ + TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) + for tensor_name in local_model_dict_val + ] + + +import tensorflow.compat.v1 +from tqdm import tqdm + + +class TensorFlowTaskRunner_v1(TaskRunner): """ Base class for TensorFlow models in the Federated Learning solution. @@ -27,7 +584,7 @@ def __init__(self, **kwargs): Args: **kwargs: Additional parameters to pass to the function """ - tf.disable_v2_behavior() + tensorflow.compat.v1.disable_v2_behavior() super().__init__(**kwargs) @@ -83,8 +640,8 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False): else: self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - def train_batches(self, col_name, round_num, input_tensor_dict, - epochs=1, use_tqdm=False, **kwargs): + def train_task(self, col_name, round_num, input_tensor_dict, + epochs=1, use_tqdm=False, **kwargs): """ Perform the training. @@ -106,7 +663,7 @@ def train_batches(self, col_name, round_num, input_tensor_dict, # rebuild model with updated weights self.rebuild_model(round_num, input_tensor_dict) - tf.keras.backend.set_learning_phase(True) + tensorflow.compat.v1.keras.backend.set_learning_phase(True) losses = [] for epoch in range(epochs): @@ -195,8 +752,8 @@ def train_batch(self, X, y): return loss - def validate(self, col_name, round_num, - input_tensor_dict, use_tqdm=False, **kwargs): + def validate_task(self, col_name, round_num, + input_tensor_dict, use_tqdm=False, **kwargs): """ Run validation. @@ -210,7 +767,7 @@ def validate(self, col_name, round_num, self.rebuild_model(round_num, input_tensor_dict, validation=True) - tf.keras.backend.set_learning_phase(False) + tensorflow.compat.v1.keras.backend.set_learning_phase(False) score = 0 @@ -220,7 +777,7 @@ def validate(self, col_name, round_num, for X, y in gen: weight = X.shape[0] / self.data_loader.get_valid_data_size() - _, s = self.validate_batch(X, y) + _, s = self.validate_(X, y) score += s * weight origin = col_name @@ -238,7 +795,7 @@ def validate(self, col_name, round_num, # return empty dict for local metrics return output_tensor_dict, {} - def validate_batch(self, X, y): + def validate_(self, X, y): """Validate the model on a single local batch. Args: @@ -317,7 +874,7 @@ def initialize_globals(self): Returns: None """ - self.sess.run(tf.global_variables_initializer()) + self.sess.run(tensorflow.compat.v1.global_variables_initializer()) def _get_weights_names(self, with_opt_vars=True): """Get the weights. @@ -380,18 +937,18 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): **self.tensor_dict_split_fn_kwargs ) - self.required_tensorkeys_for_function['train_batches'] = [ + self.required_tensorkeys_for_function['train_task'] = [ TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) for tensor_name in global_model_dict] - self.required_tensorkeys_for_function['train_batches'] += [ + self.required_tensorkeys_for_function['train_task'] += [ TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) for tensor_name in local_model_dict] # Validation may be performed on local or aggregated (global) # model, so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function['validate'] = {} + self.required_tensorkeys_for_function['validate_task'] = {} # TODO This is not stateless. The optimizer will not be - self.required_tensorkeys_for_function['validate']['apply=local'] = [ + self.required_tensorkeys_for_function['validate_task']['apply=local'] = [ TensorKey(tensor_name, 'LOCAL', 0, False, ('trained',)) for tensor_name in { **global_model_dict_val, @@ -432,14 +989,14 @@ def tf_set_tensor_dict(tensor_dict, session, variables, """ if placeholders is None: placeholders = { - v.name: tf.placeholder(v.dtype, shape=v.shape) for v in variables + v.name: tensorflow.compat.v1.placeholder(v.dtype, shape=v.shape) for v in variables } if assign_ops is None: assign_ops = { - v.name: tf.assign(v, placeholders[v.name]) for v in variables + v.name: tensorflow.compat.v1.assign(v, placeholders[v.name]) for v in variables } for k, v in tensor_dict.items(): session.run(assign_ops[k], feed_dict={placeholders[k]: v}) - return assign_ops, placeholders + return assign_ops, placeholders \ No newline at end of file From dd53e71bae823440668d7c4b13d59a9f69fd95c5 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 6 Jun 2024 14:11:20 -0700 Subject: [PATCH 04/23] tf workspace update Signed-off-by: kta-intel --- .../tf_cnn_histology/plan/plan.yaml | 18 ++--- .../tf_cnn_histology/requirements.txt | 2 +- .../src/{tfds_utils.py => dataloader.py} | 27 +++++++ .../src/{tf_cnn.py => taskrunner.py} | 81 +++++++++++++------ .../src/tfhistology_inmemory.py | 34 -------- openfl-workspace/tf_cnn_mnist/plan/plan.yaml | 2 +- .../tf_cnn_mnist/src/taskrunner.py | 13 +-- 7 files changed, 100 insertions(+), 77 deletions(-) rename openfl-workspace/tf_cnn_histology/src/{tfds_utils.py => dataloader.py} (82%) rename openfl-workspace/tf_cnn_histology/src/{tf_cnn.py => taskrunner.py} (58%) delete mode 100644 openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py diff --git a/openfl-workspace/tf_cnn_histology/plan/plan.yaml b/openfl-workspace/tf_cnn_histology/plan/plan.yaml index f834794113..010ee461fc 100644 --- a/openfl-workspace/tf_cnn_histology/plan/plan.yaml +++ b/openfl-workspace/tf_cnn_histology/plan/plan.yaml @@ -5,10 +5,10 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/tf_cnn_histology_init.pbuf - last_state_path : save/tf_cnn_histology_latest.pbuf - best_state_path : save/tf_cnn_histology_best.pbuf - db_store_rounds: 2 + init_state_path : save/tf_cnn_histology_init.pbuf + last_state_path : save/tf_cnn_histology_latest.pbuf + best_state_path : save/tf_cnn_histology_best.pbuf + db_store_rounds : 2 rounds_to_train : 10 collaborator : @@ -21,7 +21,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : src.tfhistology_inmemory.TensorFlowHistologyInMemory + template : src.dataloader.TensorFlowHistologyInMemory settings : batch_size: 64 percent_train: 0.8 @@ -30,7 +30,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.tf_cnn.TensorFlowCNN + template : src.taskrunner.TensorFlowCNN network : defaults : plan/defaults/network.yaml @@ -41,14 +41,14 @@ assigner : tasks: defaults: plan/defaults/tasks_tensorflow.yaml aggregated_model_validation: - function: validate + function: validate_task kwargs: apply: global batch_size: 32 metrics: - sparse_categorical_accuracy locally_tuned_model_validation: - function: validate + function: validate_task kwargs: apply: local batch_size: 32 @@ -56,7 +56,7 @@ tasks: - sparse_categorical_accuracy settings: {} train: - function: train + function: train_task kwargs: batch_size: 32 epochs: 1 diff --git a/openfl-workspace/tf_cnn_histology/requirements.txt b/openfl-workspace/tf_cnn_histology/requirements.txt index 59ee6430c8..23b0b78e6f 100644 --- a/openfl-workspace/tf_cnn_histology/requirements.txt +++ b/openfl-workspace/tf_cnn_histology/requirements.txt @@ -1,3 +1,3 @@ pillow -tensorflow==2.13 +tensorflow==2.15.1 tensorflow-datasets diff --git a/openfl-workspace/tf_cnn_histology/src/tfds_utils.py b/openfl-workspace/tf_cnn_histology/src/dataloader.py similarity index 82% rename from openfl-workspace/tf_cnn_histology/src/tfds_utils.py rename to openfl-workspace/tf_cnn_histology/src/dataloader.py index 92977ebad0..bb8ce1557f 100644 --- a/openfl-workspace/tf_cnn_histology/src/tfds_utils.py +++ b/openfl-workspace/tf_cnn_histology/src/dataloader.py @@ -3,6 +3,7 @@ """You may copy this file as the starting point of your own model.""" +from openfl.federated import TensorFlowDataLoader from logging import getLogger import numpy as np @@ -10,6 +11,32 @@ logger = getLogger(__name__) +class TensorFlowHistologyInMemory(TensorFlowDataLoader): + """TensorFlow Data Loader for Colorectal Histology Dataset.""" + + def __init__(self, data_path, batch_size, **kwargs): + """ + Initialize. + + Args: + data_path: File path for the dataset + batch_size (int): The batch size for the data loader + **kwargs: Additional arguments, passed to super init and load_mnist_shard + """ + super().__init__(batch_size, **kwargs) + + _, num_classes, X_train, y_train, X_valid, y_valid = load_histology_shard( + shard_num=data_path, + categorical=False, **kwargs + ) + + self.X_train = X_train + self.y_train = y_train + self.X_valid = X_valid + self.y_valid = y_valid + + self.num_classes = num_classes + def one_hot(labels, classes): """ diff --git a/openfl-workspace/tf_cnn_histology/src/tf_cnn.py b/openfl-workspace/tf_cnn_histology/src/taskrunner.py similarity index 58% rename from openfl-workspace/tf_cnn_histology/src/tf_cnn.py rename to openfl-workspace/tf_cnn_histology/src/taskrunner.py index 7041396678..4791f37244 100644 --- a/openfl-workspace/tf_cnn_histology/src/tf_cnn.py +++ b/openfl-workspace/tf_cnn_histology/src/taskrunner.py @@ -3,12 +3,14 @@ """You may copy this file as the starting point of your own model.""" +import numpy as np import tensorflow as tf -from openfl.federated import KerasTaskRunner +from openfl.utilities import Metric +from openfl.federated import TensorFlowTaskRunner -class TensorFlowCNN(KerasTaskRunner): +class TensorFlowCNN(TensorFlowTaskRunner): """Initialize. Args: @@ -17,7 +19,8 @@ class TensorFlowCNN(KerasTaskRunner): """ def __init__(self, **kwargs): - """Initialize. + """ + Initialize. Args: **kwargs: Additional parameters to pass to the function @@ -25,32 +28,38 @@ def __init__(self, **kwargs): """ super().__init__(**kwargs) - self.model = self.create_model( + self.model = self.build_model( self.feature_shape, self.data_loader.num_classes, **kwargs ) self.initialize_tensorkeys_for_functions() - def create_model(self, - input_shape, - num_classes, - training_smoothing=32.0, - validation_smoothing=1.0, - **kwargs): - """Create the TensorFlow CNN Histology model. + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') + + def build_model(self, + input_shape, + num_classes, + **kwargs): + """ + Define the model architecture. Args: - training_smoothing (float): (Default=32.0) - validation_smoothing (float): (Default=1.0) + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset **kwargs: Additional parameters to pass to the function + Returns: + keras.src.engine.functional.Functional + """ print(tf.config.threading.get_intra_op_parallelism_threads()) print(tf.config.threading.get_inter_op_parallelism_threads()) - # ## Define Model - # - # Convolutional neural network model + + ## Define Model using Functional API inputs = tf.keras.layers.Input(shape=input_shape) conv = tf.keras.layers.Conv2D( @@ -96,13 +105,39 @@ def create_model(self, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], ) - self.tvars = model.layers - print(f'layer names: {[var.name for var in self.tvars]}') + return model - self.opt_vars = self.optimizer.variables() - print(f'optimizer vars: {self.opt_vars}') - # Two opt_vars for one tvar: gradient and square sum for RMSprop. - self.fl_vars = self.tvars + self.opt_vars + def train_(self, batch_generator, metrics: list = None, **kwargs): + """Train single epoch. - return model + Override this function for custom training. + + Args: + batch_generator: Generator of training batches. + Each batch is a tuple of N train images and N train labels + where N is the batch size of the DataLoader of the current TaskRunner instance. + + epochs: Number of epochs to train. + metrics: Names of metrics to save. + """ + if metrics is None: + metrics = [] + + model_metrics_names = self.model.metrics_names + + for param in metrics: + if param not in model_metrics_names: + raise ValueError( + f'TensorFlowTaskRunner does not support specifying new metrics. ' + f'Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}' + ) + + history = self.model.fit(batch_generator, + verbose=1, + **kwargs) + results = [] + for metric in metrics: + value = np.mean([history.history[metric]]) + results.append(Metric(name=metric, value=np.array(value))) + return results diff --git a/openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py b/openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py deleted file mode 100644 index 69cf5fc7e6..0000000000 --- a/openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import TensorFlowDataLoader -from .tfds_utils import load_histology_shard - - -class TensorFlowHistologyInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for Colorectal Histology Dataset.""" - - def __init__(self, data_path, batch_size, **kwargs): - """ - Initialize. - - Args: - data_path: File path for the dataset - batch_size (int): The batch size for the data loader - **kwargs: Additional arguments, passed to super init and load_mnist_shard - """ - super().__init__(batch_size, **kwargs) - - _, num_classes, X_train, y_train, X_valid, y_valid = load_histology_shard( - shard_num=data_path, - categorical=False, **kwargs - ) - - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid - - self.num_classes = num_classes diff --git a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml index 72928c5196..bd6850e650 100644 --- a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml +++ b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml @@ -8,7 +8,7 @@ aggregator : init_state_path : save/tf_cnn_mnist_init.pbuf best_state_path : save/tf_cnn_mnist_best.pbuf last_state_path : save/tf_cnn_mnist_last.pbuf - rounds_to_train : 2 + rounds_to_train : 10 collaborator : defaults : plan/defaults/collaborator.yaml diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index f3382b3339..300e8842a9 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -34,11 +34,6 @@ def __init__(self, **kwargs): def build_model(self, input_shape, num_classes, - conv_kernel_size=(4, 4), - conv_strides=(2, 2), - conv1_channels_out=16, - conv2_channels_out=32, - final_dense_inputsize=100, **kwargs): """ Define the model architecture. @@ -48,15 +43,15 @@ def build_model(self, num_classes (int): The number of classes of the dataset Returns: - tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + tensorflow.python.keras.engine.sequential.Sequential """ model = tf.keras.models.Sequential([ - tf.keras.layers.Conv2D(conv1_channels_out, kernel_size=conv_kernel_size, strides=conv_strides, activation='relu', input_shape=input_shape), - tf.keras.layers.Conv2D(conv2_channels_out, kernel_size=conv_kernel_size, strides=conv_strides, activation='relu'), + tf.keras.layers.Conv2D(16, kernel_size=(4, 4), strides=(2, 2), activation='relu', input_shape=input_shape), + tf.keras.layers.Conv2D(32, kernel_size=(4, 4), strides=(2, 2), activation='relu'), tf.keras.layers.Flatten(), - tf.keras.layers.Dense(final_dense_inputsize, activation='relu'), + tf.keras.layers.Dense(100, activation='relu'), tf.keras.layers.Dense(num_classes, activation='softmax') ]) From 6c6f1dcafbb0a487b356acae911044e471fe7d76 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 6 Jun 2024 14:23:36 -0700 Subject: [PATCH 05/23] update unet tensorflow workspaces Signed-off-by: kta-intel --- openfl-workspace/tf_2dunet/src/tf_2dunet.py | 4 ++-- openfl-workspace/tf_3dunet_brats/plan/plan.yaml | 6 +++--- openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py | 4 ++-- openfl/federated/__init__.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/openfl-workspace/tf_2dunet/src/tf_2dunet.py b/openfl-workspace/tf_2dunet/src/tf_2dunet.py index 5073344050..29603afe52 100644 --- a/openfl-workspace/tf_2dunet/src/tf_2dunet.py +++ b/openfl-workspace/tf_2dunet/src/tf_2dunet.py @@ -5,12 +5,12 @@ import tensorflow.compat.v1 as tf -from openfl.federated import TensorFlowTaskRunner +from openfl.federated import TensorFlowTaskRunner_v1 tf.disable_v2_behavior() -class TensorFlow2DUNet(TensorFlowTaskRunner): +class TensorFlow2DUNet(TensorFlowTaskRunner_v1): """Initialize. Args: diff --git a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml index fa8fb911de..b873de734b 100644 --- a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml +++ b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml @@ -56,7 +56,7 @@ task_runner: template: src.tf_3dunet_model.TensorFlow3dUNet tasks: aggregated_model_validation: - function: validate + function: validate_task kwargs: apply: global batch_size: 4 @@ -65,7 +65,7 @@ tasks: - soft_dice_coef defaults: plan/defaults/tasks_tensorflow.yaml locally_tuned_model_validation: - function: validate + function: validate_task kwargs: apply: local batch_size: 4 @@ -74,7 +74,7 @@ tasks: - soft_dice_coef settings: {} train: - function: train + function: train_task kwargs: batch_size: 4 epochs: 1 diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py index 8beeaf2375..65d6d5f54c 100644 --- a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py +++ b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py @@ -5,14 +5,14 @@ import tensorflow as tf -from openfl.federated import KerasTaskRunner +from openfl.federated import TensorFlowTaskRunner from .define_model import build_model from .define_model import dice_coef from .define_model import dice_loss from .define_model import soft_dice_coef -class TensorFlow3dUNet(KerasTaskRunner): +class TensorFlow3dUNet(TensorFlowTaskRunner): """Initialize. Args: diff --git a/openfl/federated/__init__.py b/openfl/federated/__init__.py index b2b4f4fd1f..9aa1cf7efb 100644 --- a/openfl/federated/__init__.py +++ b/openfl/federated/__init__.py @@ -9,7 +9,7 @@ from .data import DataLoader # NOQA if pkgutil.find_loader('tensorflow'): - from .task import TensorFlowTaskRunner, KerasTaskRunner, FederatedModel # NOQA + from .task import TensorFlowTaskRunner, TensorFlowTaskRunner_v1, KerasTaskRunner, FederatedModel # NOQA from .data import TensorFlowDataLoader, KerasDataLoader, FederatedDataSet # NOQA if pkgutil.find_loader('torch'): from .task import PyTorchTaskRunner, FederatedModel # NOQA From b3dd2fba5765465577c3b828502327b5159000d4 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 6 Jun 2024 14:27:34 -0700 Subject: [PATCH 06/23] find_loader deprecation notice. changing to importlib.util.find_spec() Signed-off-by: kta-intel --- openfl/federated/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/openfl/federated/__init__.py b/openfl/federated/__init__.py index 9aa1cf7efb..f746aff9d8 100644 --- a/openfl/federated/__init__.py +++ b/openfl/federated/__init__.py @@ -3,14 +3,15 @@ """openfl.federated package.""" -import pkgutil +import importlib.util from .plan import Plan # NOQA from .task import TaskRunner # NOQA from .data import DataLoader # NOQA -if pkgutil.find_loader('tensorflow'): +if importlib.util.find_spec('tensorflow'): from .task import TensorFlowTaskRunner, TensorFlowTaskRunner_v1, KerasTaskRunner, FederatedModel # NOQA from .data import TensorFlowDataLoader, KerasDataLoader, FederatedDataSet # NOQA -if pkgutil.find_loader('torch'): +if importlib.util.find_spec('torch'): from .task import PyTorchTaskRunner, FederatedModel # NOQA from .data import PyTorchDataLoader, FederatedDataSet # NOQA + From 2018e1f0297be9569621201ffe5f9eaba4608c81 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 6 Jun 2024 14:47:39 -0700 Subject: [PATCH 07/23] minor fix to find_spec() Signed-off-by: kta-intel --- openfl/federated/task/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openfl/federated/task/__init__.py b/openfl/federated/task/__init__.py index e0e16a4a81..084fc9506c 100644 --- a/openfl/federated/task/__init__.py +++ b/openfl/federated/task/__init__.py @@ -9,17 +9,17 @@ with catch_warnings(): simplefilter(action='ignore', category=FutureWarning) - if importlib.util.find_spec('tensorflow') is not None: + if importlib.util.find_spec('tensorflow'): # ignore deprecation warnings in command-line interface import tensorflow # NOQA from .runner import TaskRunner # NOQA -if importlib.util.find_spec('tensorflow') is not None: +if importlib.util.find_spec('tensorflow'): from .runner_tf import TensorFlowTaskRunner # NOQA from .runner_tf import TensorFlowTaskRunner_v1 # NOQA from .runner_keras import KerasTaskRunner # NOQA from .fl_model import FederatedModel # NOQA -if importlib.util.find_spec('torch') is not None: +if importlib.util.find_spec('torch'): from .runner_pt import PyTorchTaskRunner # NOQA from .fl_model import FederatedModel # NOQA From a74d45c7f27306b39b764c3c0672b086fca5c00d Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 6 Jun 2024 15:09:22 -0700 Subject: [PATCH 08/23] cleaning up tf import lines Signed-off-by: kta-intel --- openfl/federated/task/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openfl/federated/task/__init__.py b/openfl/federated/task/__init__.py index 084fc9506c..0838fc802d 100644 --- a/openfl/federated/task/__init__.py +++ b/openfl/federated/task/__init__.py @@ -16,8 +16,7 @@ from .runner import TaskRunner # NOQA if importlib.util.find_spec('tensorflow'): - from .runner_tf import TensorFlowTaskRunner # NOQA - from .runner_tf import TensorFlowTaskRunner_v1 # NOQA + from .runner_tf import TensorFlowTaskRunner, TensorFlowTaskRunner_v1 # NOQA from .runner_keras import KerasTaskRunner # NOQA from .fl_model import FederatedModel # NOQA if importlib.util.find_spec('torch'): From bea87294b32bee968b3646ca3d6e01c07da0bf74 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 7 Jun 2024 13:07:56 -0700 Subject: [PATCH 09/23] Fix lint with flake8 Signed-off-by: kta-intel --- openfl-workspace/tf_2dunet/src/tf_2dunet.py | 4 +-- .../tf_cnn_histology/src/dataloader.py | 1 + .../tf_cnn_histology/src/taskrunner.py | 3 +- .../tf_cnn_mnist/src/taskrunner.py | 22 +++++++++----- openfl/federated/__init__.py | 3 +- openfl/federated/task/__init__.py | 2 +- openfl/federated/task/runner_keras.py | 4 +-- openfl/federated/task/runner_tf.py | 30 +++++++++---------- 8 files changed, 37 insertions(+), 32 deletions(-) diff --git a/openfl-workspace/tf_2dunet/src/tf_2dunet.py b/openfl-workspace/tf_2dunet/src/tf_2dunet.py index 29603afe52..54c2ae2a89 100644 --- a/openfl-workspace/tf_2dunet/src/tf_2dunet.py +++ b/openfl-workspace/tf_2dunet/src/tf_2dunet.py @@ -5,12 +5,12 @@ import tensorflow.compat.v1 as tf -from openfl.federated import TensorFlowTaskRunner_v1 +from openfl.federated import TensorFlowTaskRunnerV1 tf.disable_v2_behavior() -class TensorFlow2DUNet(TensorFlowTaskRunner_v1): +class TensorFlow2DUNet(TensorFlowTaskRunnerV1): """Initialize. Args: diff --git a/openfl-workspace/tf_cnn_histology/src/dataloader.py b/openfl-workspace/tf_cnn_histology/src/dataloader.py index bb8ce1557f..30e41311f0 100644 --- a/openfl-workspace/tf_cnn_histology/src/dataloader.py +++ b/openfl-workspace/tf_cnn_histology/src/dataloader.py @@ -11,6 +11,7 @@ logger = getLogger(__name__) + class TensorFlowHistologyInMemory(TensorFlowDataLoader): """TensorFlow Data Loader for Colorectal Histology Dataset.""" diff --git a/openfl-workspace/tf_cnn_histology/src/taskrunner.py b/openfl-workspace/tf_cnn_histology/src/taskrunner.py index 4791f37244..7273c0385a 100644 --- a/openfl-workspace/tf_cnn_histology/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_histology/src/taskrunner.py @@ -59,7 +59,7 @@ def build_model(self, print(tf.config.threading.get_intra_op_parallelism_threads()) print(tf.config.threading.get_inter_op_parallelism_threads()) - ## Define Model using Functional API + # Define Model using Functional API inputs = tf.keras.layers.Input(shape=input_shape) conv = tf.keras.layers.Conv2D( @@ -107,7 +107,6 @@ def build_model(self, return model - def train_(self, batch_generator, metrics: list = None, **kwargs): """Train single epoch. diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index 300e8842a9..3251c25171 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -46,13 +46,22 @@ def build_model(self, tensorflow.python.keras.engine.sequential.Sequential """ - + model = tf.keras.models.Sequential([ - tf.keras.layers.Conv2D(16, kernel_size=(4, 4), strides=(2, 2), activation='relu', input_shape=input_shape), - tf.keras.layers.Conv2D(32, kernel_size=(4, 4), strides=(2, 2), activation='relu'), + tf.keras.layers.Conv2D(16, + kernel_size=(4, 4), + strides=(2, 2), + activation='relu', + input_shape=input_shape), + tf.keras.layers.Conv2D(32, + kernel_size=(4, 4), + strides=(2, 2), + activation='relu'), tf.keras.layers.Flatten(), - tf.keras.layers.Dense(100, activation='relu'), - tf.keras.layers.Dense(num_classes, activation='softmax') + tf.keras.layers.Dense(100, + activation='relu'), + tf.keras.layers.Dense(num_classes, + activation='softmax') ]) model.compile(loss=tf.keras.losses.categorical_crossentropy, @@ -60,8 +69,7 @@ def build_model(self, metrics=['accuracy']) return model - - + def train_(self, batch_generator, metrics: list = None, **kwargs): """Train single epoch. diff --git a/openfl/federated/__init__.py b/openfl/federated/__init__.py index f746aff9d8..54cd35515f 100644 --- a/openfl/federated/__init__.py +++ b/openfl/federated/__init__.py @@ -9,9 +9,8 @@ from .data import DataLoader # NOQA if importlib.util.find_spec('tensorflow'): - from .task import TensorFlowTaskRunner, TensorFlowTaskRunner_v1, KerasTaskRunner, FederatedModel # NOQA + from .task import TensorFlowTaskRunner, TensorFlowTaskRunnerV1, KerasTaskRunner, FederatedModel # NOQA from .data import TensorFlowDataLoader, KerasDataLoader, FederatedDataSet # NOQA if importlib.util.find_spec('torch'): from .task import PyTorchTaskRunner, FederatedModel # NOQA from .data import PyTorchDataLoader, FederatedDataSet # NOQA - diff --git a/openfl/federated/task/__init__.py b/openfl/federated/task/__init__.py index 0838fc802d..fa6628d047 100644 --- a/openfl/federated/task/__init__.py +++ b/openfl/federated/task/__init__.py @@ -16,7 +16,7 @@ from .runner import TaskRunner # NOQA if importlib.util.find_spec('tensorflow'): - from .runner_tf import TensorFlowTaskRunner, TensorFlowTaskRunner_v1 # NOQA + from .runner_tf import TensorFlowTaskRunner, TensorFlowTaskRunnerV1 # NOQA from .runner_keras import KerasTaskRunner # NOQA from .fl_model import FederatedModel # NOQA if importlib.util.find_spec('torch'): diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py index 030bed9675..8cd0cab6c3 100644 --- a/openfl/federated/task/runner_keras.py +++ b/openfl/federated/task/runner_keras.py @@ -82,8 +82,8 @@ def train_task(self, col_name, round_num, input_tensor_dict, for epoch in range(epochs): self.logger.info(f'Run {epoch} epoch of {round_num} round') results = self.train_(self.data_loader.get_train_loader(batch_size), - metrics=metrics, - **kwargs) + metrics=metrics, + **kwargs) # output metric tensors (scalar) origin = col_name diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py index eee5f14ecd..ddf36999d6 100644 --- a/openfl/federated/task/runner_tf.py +++ b/openfl/federated/task/runner_tf.py @@ -9,6 +9,9 @@ from openfl.utilities.split import split_tensor_dict_for_holdouts from .runner import TaskRunner +import tensorflow.compat.v1 +from tqdm import tqdm + class TensorFlowTaskRunner(TaskRunner): """The base model for Keras models in the federation.""" @@ -70,8 +73,8 @@ def train_task(self, col_name, round_num, input_tensor_dict, for epoch in range(epochs): self.logger.info(f'Run {epoch} epoch of {round_num} round') results = self.train_(self.data_loader.get_train_loader(batch_size), - metrics=metrics, - **kwargs) + metrics=metrics, + **kwargs) # output metric tensors (scalar) origin = col_name @@ -260,7 +263,7 @@ def _get_weights_names(obj, with_opt_vars): weight_names = [weight.name for weight in obj.weights] else: weight_names = [weight.name for weight in obj.variables] - + weight_names = [weight.name for weight in obj.weights] return weight_names @@ -296,7 +299,6 @@ def _get_weights_dict(obj, suffix='', with_opt_vars=False): weight_names = [weight.name for weight in obj.weights] weight_values = obj.get_weights() - for name, value in zip(weight_names, weight_values): weights_dict[name + suffix] = value return weights_dict @@ -315,7 +317,7 @@ def _set_weights_dict(obj, weights_dict, with_opt_vars=False): Returns: None """ - + if with_opt_vars: # When acquiring optimizer weights, check optimizer version. # Current optimizer does not use 'weights' attributes @@ -344,15 +346,15 @@ def get_tensor_dict(self, with_opt_vars, suffix=''): Returns: dict: The tensor dictionary. """ - + model_weights = self._get_weights_dict(self.model, suffix) if with_opt_vars: - + opt_weights = self._get_weights_dict(self.model.optimizer, suffix, with_opt_vars) model_weights.update(opt_weights) - + if len(opt_weights) == 0: self.logger.debug( "WARNING: We didn't find variables for the optimizer.") @@ -384,11 +386,11 @@ def set_tensor_dict(self, tensor_dict, with_opt_vars): if 'legacy' in self.model.optimizer.__class__.__module__: opt_weight_names = [ weight.name for weight in self.model.optimizer.weights - ] + ] else: opt_weight_names = [ weight.name for weight in self.model.optimizer.variables - ] + ] opt_weights_dict = { name: tensor_dict[name] for name in opt_weight_names @@ -565,11 +567,7 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): ] -import tensorflow.compat.v1 -from tqdm import tqdm - - -class TensorFlowTaskRunner_v1(TaskRunner): +class TensorFlowTaskRunnerV1(TaskRunner): """ Base class for TensorFlow models in the Federated Learning solution. @@ -999,4 +997,4 @@ def tf_set_tensor_dict(tensor_dict, session, variables, for k, v in tensor_dict.items(): session.run(assign_ops[k], feed_dict={placeholders[k]: v}) - return assign_ops, placeholders \ No newline at end of file + return assign_ops, placeholders From 05014d2cf34bf3535941116165306b5a2bcfc21a Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 7 Jun 2024 13:17:01 -0700 Subject: [PATCH 10/23] remove calls for legacy optimizer Signed-off-by: kta-intel --- openfl/federated/task/runner_tf.py | 35 ++++++------------------------ 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py index ddf36999d6..e13522666b 100644 --- a/openfl/federated/task/runner_tf.py +++ b/openfl/federated/task/runner_tf.py @@ -257,12 +257,7 @@ def _get_weights_names(obj, with_opt_vars): The weight name list """ if with_opt_vars: - # When acquiring optimizer weights, check optimizer version. - # Current optimizer does not use 'weights' attributes - if 'legacy' in obj.__class__.__module__: - weight_names = [weight.name for weight in obj.weights] - else: - weight_names = [weight.name for weight in obj.variables] + weight_names = [weight.name for weight in obj.variables] weight_names = [weight.name for weight in obj.weights] return weight_names @@ -287,14 +282,8 @@ def _get_weights_dict(obj, suffix='', with_opt_vars=False): weights_dict = {} if with_opt_vars: - # When acquiring optimizer weights, check optimizer version. - # Current optimizer does not use 'weights' or '.get_weights()' attributes - if 'legacy' in obj.__class__.__module__: - weight_names = [weight.name for weight in obj.weights] - weight_values = obj.get_weights() - else: - weight_names = [weight.name for weight in obj.variables] - weight_values = [weight.numpy() for weight in obj.variables] + weight_names = [weight.name for weight in obj.variables] + weight_values = [weight.numpy() for weight in obj.variables] else: weight_names = [weight.name for weight in obj.weights] weight_values = obj.get_weights() @@ -319,12 +308,7 @@ def _set_weights_dict(obj, weights_dict, with_opt_vars=False): """ if with_opt_vars: - # When acquiring optimizer weights, check optimizer version. - # Current optimizer does not use 'weights' attributes - if 'legacy' in obj.__class__.__module__: - weight_names = [weight.name for weight in obj.weights] - else: - weight_names = [weight.name for weight in obj.variables] + weight_names = [weight.name for weight in obj.variables] else: weight_names = [weight.name for weight in obj.weights] @@ -383,15 +367,10 @@ def set_tensor_dict(self, tensor_dict, with_opt_vars): model_weights_dict = { name: tensor_dict[name] for name in model_weight_names } - if 'legacy' in self.model.optimizer.__class__.__module__: - opt_weight_names = [ - weight.name for weight in self.model.optimizer.weights - ] - else: - opt_weight_names = [ - weight.name for weight in self.model.optimizer.variables - ] + opt_weight_names = [ + weight.name for weight in self.model.optimizer.variables + ] opt_weights_dict = { name: tensor_dict[name] for name in opt_weight_names } From 9003e8164ee8b5d3af566731fe72628c0ce24b3f Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 7 Jun 2024 13:19:56 -0700 Subject: [PATCH 11/23] fix lint - trailing whitespaces Signed-off-by: kta-intel --- .../tf_cnn_mnist/src/taskrunner.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index 3251c25171..da618fbb5f 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -48,19 +48,19 @@ def build_model(self, """ model = tf.keras.models.Sequential([ - tf.keras.layers.Conv2D(16, - kernel_size=(4, 4), - strides=(2, 2), - activation='relu', + tf.keras.layers.Conv2D(16, + kernel_size=(4, 4), + strides=(2, 2), + activation='relu', input_shape=input_shape), - tf.keras.layers.Conv2D(32, - kernel_size=(4, 4), - strides=(2, 2), + tf.keras.layers.Conv2D(32, + kernel_size=(4, 4), + strides=(2, 2), activation='relu'), tf.keras.layers.Flatten(), - tf.keras.layers.Dense(100, + tf.keras.layers.Dense(100, activation='relu'), - tf.keras.layers.Dense(num_classes, + tf.keras.layers.Dense(num_classes, activation='softmax') ]) From 3dc71709263147579e851f40aaf1f72769da5d0d Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 12:35:41 -0700 Subject: [PATCH 12/23] update tf_2dunet workspace to use tensorflow2 aVnd remove legacy tensorflowV1 workspace Signed-off-by: kta-intel --- openfl-workspace/tf_2dunet/README.md | 30 +- openfl-workspace/tf_2dunet/plan/data.yaml | 6 +- openfl-workspace/tf_2dunet/plan/plan.yaml | 34 +- openfl-workspace/tf_2dunet/requirements.txt | 2 +- openfl-workspace/tf_2dunet/src/brats_utils.py | 137 ------ .../src/{nii_reader.py => dataloader.py} | 163 ++++++- openfl-workspace/tf_2dunet/src/taskrunner.py | 257 ++++++++++ openfl-workspace/tf_2dunet/src/tf_2dunet.py | 250 ---------- .../tf_2dunet/src/tfbrats_inmemory.py | 36 -- .../tf_cnn_mnist/src/taskrunner.py | 13 +- openfl/federated/__init__.py | 2 +- openfl/federated/task/__init__.py | 2 +- openfl/federated/task/runner_tf.py | 438 +----------------- 13 files changed, 476 insertions(+), 894 deletions(-) delete mode 100644 openfl-workspace/tf_2dunet/src/brats_utils.py rename openfl-workspace/tf_2dunet/src/{nii_reader.py => dataloader.py} (63%) create mode 100644 openfl-workspace/tf_2dunet/src/taskrunner.py delete mode 100644 openfl-workspace/tf_2dunet/src/tf_2dunet.py delete mode 100644 openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py diff --git a/openfl-workspace/tf_2dunet/README.md b/openfl-workspace/tf_2dunet/README.md index 12dab8fc2e..a8e50ff464 100644 --- a/openfl-workspace/tf_2dunet/README.md +++ b/openfl-workspace/tf_2dunet/README.md @@ -14,18 +14,27 @@ To use a `tree` command, you have to install it first: `sudo apt-get install tre - `HGG`: glioblastoma scans - `LGG`: lower grade glioma scans -Let's pick `HGG`: `export SUBFOLDER=HGG`. The learning rate has been already tuned for this task, so you don't have to change it. If you pick `LGG`, all the next steps will be the same. +Let's pick `HGG`: `export SUBFOLDER=MICCAI_BraTS_2019_Data_Training/HGG`. The learning rate has been already tuned for this task, so you don't have to change it. If you pick `LGG`, all the next steps will be the same. 3) In order for each collaborator to use separate slice of data, we split main folder into `n` subfolders: ```bash +#!/bin/bash cd $DATA_PATH/$SUBFOLDER -i=0; -for f in *; -do - d=dir_$(printf $((i%n))); # change n to number of data slices (number of collaborators in federation) - mkdir -p $d; - mv "$f" $d; - let i++; + +n=2 # Set this to the number of directories you want to create + +# Get a list of all files and shuffle them +files=($(ls | shuf)) + +# Create the target directories if they don't exist +for ((i=0; i=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-workspace/tf_2dunet/src/brats_utils.py b/openfl-workspace/tf_2dunet/src/brats_utils.py deleted file mode 100644 index 653e26cbca..0000000000 --- a/openfl-workspace/tf_2dunet/src/brats_utils.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" - -import logging -import os - -import numpy as np - -from .nii_reader import nii_reader - -logger = logging.getLogger(__name__) - - -def train_val_split(features, labels, percent_train, shuffle): - """Train/validation splot of the BraTS dataset. - - Splits incoming feature and labels into training and validation. The value - of shuffle determines whether shuffling occurs before the split is performed. - - Args: - features: The input images - labels: The ground truth labels - percent_train (float): The percentage of the dataset that is training. - shuffle (bool): True = shuffle the dataset before the split - - Returns: - train_features: The input images for the training dataset - train_labels: The ground truth labels for the training dataset - val_features: The input images for the validation dataset - val_labels: The ground truth labels for the validation dataset - """ - - def split(lst, idx): - """Split a Python list into 2 lists. - - Args: - lst: The Python list to split - idx: The index where to split the list into 2 parts - - Returns: - Two lists - - """ - if idx < 0 or idx > len(lst): - raise ValueError('split was out of expected range.') - return lst[:idx], lst[idx:] - - nb_features = len(features) - nb_labels = len(labels) - if nb_features != nb_labels: - raise RuntimeError('Number of features and labels do not match.') - if shuffle: - new_order = np.random.permutation(np.arange(nb_features)) - features = features[new_order] - labels = labels[new_order] - split_idx = int(percent_train * nb_features) - train_features, val_features = split(lst=features, idx=split_idx) - train_labels, val_labels = split(lst=labels, idx=split_idx) - return train_features, train_labels, val_features, val_labels - - -def load_from_nifti(parent_dir, - percent_train, - shuffle, - channels_last=True, - task='whole_tumor', - **kwargs): - """Load the BraTS dataset from the NiFTI file format. - - Loads data from the parent directory (NIfTI files for whole brains are - assumed to be contained in subdirectories of the parent directory). - Performs a split of the data into training and validation, and the value - of shuffle determined whether shuffling is performed before this split - occurs - both split and shuffle are done in a way to - keep whole brains intact. The kwargs are passed to nii_reader. - - Args: - parent_dir: The parent directory for the BraTS data - percent_train (float): The percentage of the data to make the training dataset - shuffle (bool): True means shuffle the dataset order before the split - channels_last (bool): Input tensor uses channels as last dimension (Default is True) - task: Prediction task (Default is 'whole_tumor' prediction) - **kwargs: Variable arguments to pass to the function - - Returns: - train_features: The input images for the training dataset - train_labels: The ground truth labels for the training dataset - val_features: The input images for the validation dataset - val_labels: The ground truth labels for the validation dataset - - """ - path = os.path.join(parent_dir) - subdirs = os.listdir(path) - subdirs.sort() - if not subdirs: - raise SystemError(f'''{parent_dir} does not contain subdirectories. -Please make sure you have BraTS dataset downloaded -and located in data directory for this collaborator. - ''') - subdir_paths = [os.path.join(path, subdir) for subdir in subdirs] - - imgs_all = [] - msks_all = [] - for brain_path in subdir_paths: - these_imgs, these_msks = nii_reader( - brain_path=brain_path, - task=task, - channels_last=channels_last, - **kwargs - ) - # the needed files where not present if a tuple of None is returned - if these_imgs is None: - logger.debug(f'Brain subdirectory: {brain_path} did not contain the needed files.') - else: - imgs_all.append(these_imgs) - msks_all.append(these_msks) - - # converting to arrays to allow for numpy indexing used during split - imgs_all = np.array(imgs_all) - msks_all = np.array(msks_all) - - # note here that each is a list of 155 slices per brain, and so the - # split keeps brains intact - imgs_all_train, msks_all_train, imgs_all_val, msks_all_val = train_val_split( - features=imgs_all, - labels=msks_all, - percent_train=percent_train, - shuffle=shuffle - ) - # now concatenate the lists - imgs_train = np.concatenate(imgs_all_train, axis=0) - msks_train = np.concatenate(msks_all_train, axis=0) - imgs_val = np.concatenate(imgs_all_val, axis=0) - msks_val = np.concatenate(msks_all_val, axis=0) - - return imgs_train, msks_train, imgs_val, msks_val diff --git a/openfl-workspace/tf_2dunet/src/nii_reader.py b/openfl-workspace/tf_2dunet/src/dataloader.py similarity index 63% rename from openfl-workspace/tf_2dunet/src/nii_reader.py rename to openfl-workspace/tf_2dunet/src/dataloader.py index ba90a644b1..11b442c162 100644 --- a/openfl-workspace/tf_2dunet/src/nii_reader.py +++ b/openfl-workspace/tf_2dunet/src/dataloader.py @@ -1,13 +1,174 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """You may copy this file as the starting point of your own model.""" import os +import logging import nibabel as nib import numpy as np import numpy.ma as ma +from openfl.federated import TensorFlowDataLoader + + +logger = logging.getLogger(__name__) + + +class TensorFlowBratsInMemory(TensorFlowDataLoader): + """TensorFlow Data Loader for the BraTS dataset.""" + + def __init__(self, data_path, batch_size, percent_train=0.8, pre_split_shuffle=True, num_classes=1, + **kwargs): + """Initialize. + + Args: + data_path: The file path for the BraTS dataset + batch_size (int): The batch size to use + percent_train (float): The percentage of the data to use for training (Default=0.8) + pre_split_shuffle (bool): True= shuffle the dataset before + performing the train/validate split (Default=True) + **kwargs: Additional arguments, passed to super init and load_from_nifti + + Returns: + Data loader with BraTS data + """ + super().__init__(batch_size, **kwargs) + + X_train, y_train, X_valid, y_valid = load_from_nifti(parent_dir=data_path, + percent_train=percent_train, + shuffle=pre_split_shuffle, + **kwargs) + self.X_train = X_train + self.y_train = y_train + self.X_valid = X_valid + self.y_valid = y_valid + self.num_classes = num_classes + + +def train_val_split(features, labels, percent_train, shuffle): + """Train/validation splot of the BraTS dataset. + + Splits incoming feature and labels into training and validation. The value + of shuffle determines whether shuffling occurs before the split is performed. + + Args: + features: The input images + labels: The ground truth labels + percent_train (float): The percentage of the dataset that is training. + shuffle (bool): True = shuffle the dataset before the split + + Returns: + train_features: The input images for the training dataset + train_labels: The ground truth labels for the training dataset + val_features: The input images for the validation dataset + val_labels: The ground truth labels for the validation dataset + """ + + def split(lst, idx): + """Split a Python list into 2 lists. + + Args: + lst: The Python list to split + idx: The index where to split the list into 2 parts + + Returns: + Two lists + + """ + if idx < 0 or idx > len(lst): + raise ValueError('split was out of expected range.') + return lst[:idx], lst[idx:] + + nb_features = len(features) + nb_labels = len(labels) + if nb_features != nb_labels: + raise RuntimeError('Number of features and labels do not match.') + if shuffle: + new_order = np.random.permutation(np.arange(nb_features)) + features = features[new_order] + labels = labels[new_order] + split_idx = int(percent_train * nb_features) + train_features, val_features = split(lst=features, idx=split_idx) + train_labels, val_labels = split(lst=labels, idx=split_idx) + return train_features, train_labels, val_features, val_labels + + +def load_from_nifti(parent_dir, + percent_train, + shuffle, + channels_last=True, + task='whole_tumor', + **kwargs): + """Load the BraTS dataset from the NiFTI file format. + + Loads data from the parent directory (NIfTI files for whole brains are + assumed to be contained in subdirectories of the parent directory). + Performs a split of the data into training and validation, and the value + of shuffle determined whether shuffling is performed before this split + occurs - both split and shuffle are done in a way to + keep whole brains intact. The kwargs are passed to nii_reader. + + Args: + parent_dir: The parent directory for the BraTS data + percent_train (float): The percentage of the data to make the training dataset + shuffle (bool): True means shuffle the dataset order before the split + channels_last (bool): Input tensor uses channels as last dimension (Default is True) + task: Prediction task (Default is 'whole_tumor' prediction) + **kwargs: Variable arguments to pass to the function + + Returns: + train_features: The input images for the training dataset + train_labels: The ground truth labels for the training dataset + val_features: The input images for the validation dataset + val_labels: The ground truth labels for the validation dataset + + """ + path = os.path.join(parent_dir) + subdirs = os.listdir(path) + subdirs.sort() + if not subdirs: + raise SystemError(f'''{parent_dir} does not contain subdirectories. +Please make sure you have BraTS dataset downloaded +and located in data directory for this collaborator. + ''') + subdir_paths = [os.path.join(path, subdir) for subdir in subdirs] + + imgs_all = [] + msks_all = [] + for brain_path in subdir_paths: + these_imgs, these_msks = nii_reader( + brain_path=brain_path, + task=task, + channels_last=channels_last, + **kwargs + ) + # the needed files where not present if a tuple of None is returned + if these_imgs is None: + logger.debug(f'Brain subdirectory: {brain_path} did not contain the needed files.') + else: + imgs_all.append(these_imgs) + msks_all.append(these_msks) + + # converting to arrays to allow for numpy indexing used during split + imgs_all = np.array(imgs_all) + msks_all = np.array(msks_all) + + # note here that each is a list of 155 slices per brain, and so the + # split keeps brains intact + imgs_all_train, msks_all_train, imgs_all_val, msks_all_val = train_val_split( + features=imgs_all, + labels=msks_all, + percent_train=percent_train, + shuffle=shuffle + ) + # now concatenate the lists + imgs_train = np.concatenate(imgs_all_train, axis=0) + msks_train = np.concatenate(msks_all_train, axis=0) + imgs_val = np.concatenate(imgs_all_val, axis=0) + msks_val = np.concatenate(msks_all_val, axis=0) + + return imgs_train, msks_train, imgs_val, msks_val def parse_segments(seg, msk_modes): diff --git a/openfl-workspace/tf_2dunet/src/taskrunner.py b/openfl-workspace/tf_2dunet/src/taskrunner.py new file mode 100644 index 0000000000..1d3baa690d --- /dev/null +++ b/openfl-workspace/tf_2dunet/src/taskrunner.py @@ -0,0 +1,257 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +import numpy as np +import tensorflow as tf + +from openfl.utilities import Metric +from openfl.federated import TensorFlowTaskRunner + +class TensorFlow2DUNet(TensorFlowTaskRunner): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + + def __init__(self, initial_filters=16, + depth=5, + batch_norm=True, + use_upsampling=False, + **kwargs): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + super().__init__(**kwargs) + + self.model = self.create_model( + input_shape=self.feature_shape, + n_cl_out=self.data_loader.num_classes, + initial_filters=initial_filters, + use_upsampling=use_upsampling, + depth=depth, + batch_norm=batch_norm, + **kwargs + ) + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info, line_length=120) + + def create_model(self, + input_shape, + n_cl_out=1, + use_upsampling=False, + dropout=0.2, + print_summary=True, + seed=816, + depth=5, + dropout_at=(2, 3), + initial_filters=16, + batch_norm=True, + **kwargs): + """Create the TensorFlow 3D U-Net CNN model. + + Args: + input_shape (list): input shape of the data + n_cl_out (int): Number of output classes in label (Default=1) + **kwargs: Additional parameters to pass to the function + + """ + + model = build_model(input_shape, + n_cl_out=n_cl_out, + use_upsampling=use_upsampling, + dropout=dropout, + print_summary=print_summary, + seed=seed, + depth=depth, + dropout_at=dropout_at, + initial_filters=initial_filters, + batch_norm=batch_norm) + + model.compile( + loss=dice_loss, + optimizer=tf.keras.optimizers.Adam(), + metrics=[dice_coef, soft_dice_coef], + ) + + return model + + def train_(self, batch_generator, metrics: list = None, **kwargs): + """Train single epoch. + + Override this function for custom training. + + Args: + batch_generator: Generator of training batches. + Each batch is a tuple of N train images and N train labels + where N is the batch size of the DataLoader of the current TaskRunner instance. + + epochs: Number of epochs to train. + metrics: Names of metrics to save. + """ + history = self.model.fit(batch_generator, + verbose=1, + **kwargs) + results = [] + for metric in metrics: + value = np.mean([history.history[metric]]) + results.append(Metric(name=metric, value=np.array(value))) + return results + + +def dice_coef(target, prediction, axis=(1, 2), smooth=0.0001): + """ + Sorenson Dice. + + Returns + ------- + dice coefficient (float) + """ + prediction = tf.round(prediction) # Round to 0 or 1 + + intersection = tf.reduce_sum(target * prediction, axis=axis) + union = tf.reduce_sum(target + prediction, axis=axis) + numerator = tf.constant(2.) * intersection + smooth + denominator = union + smooth + coef = numerator / denominator + + return tf.reduce_mean(coef) + + +def soft_dice_coef(target, prediction, axis=(1, 2), smooth=0.0001): + """ + Soft Sorenson Dice. + + Does not round the predictions to either 0 or 1. + + Returns + ------- + soft dice coefficient (float) + """ + intersection = tf.reduce_sum(target * prediction, axis=axis) + union = tf.reduce_sum(target + prediction, axis=axis) + numerator = tf.constant(2.) * intersection + smooth + denominator = union + smooth + coef = numerator / denominator + + return tf.reduce_mean(coef) + + +def dice_loss(target, prediction, axis=(1, 2), smooth=0.0001): + """ + Sorenson (Soft) Dice loss. + + Using -log(Dice) as the loss since it is better behaved. + Also, the log allows avoidance of the division which + can help prevent underflow when the numbers are very small. + + Returns + ------- + dice loss (float) + """ + intersection = tf.reduce_sum(prediction * target, axis=axis) + p = tf.reduce_sum(prediction, axis=axis) + t = tf.reduce_sum(target, axis=axis) + numerator = tf.reduce_mean(intersection + smooth) + denominator = tf.reduce_mean(t + p + smooth) + dice_loss = -tf.math.log(2. * numerator) + tf.math.log(denominator) + + return dice_loss + + +def build_model(input_shape, + n_cl_out=1, + use_upsampling=False, + dropout=0.2, + seed=816, + depth=5, + dropout_at=(2, 3), + initial_filters=16, + batch_norm=True, + **kwargs): + """Build the TensorFlow model. + + Args: + input_tensor: input shape ot the model + use_upsampling (bool): True = use bilinear interpolation; + False = use transposed convolution (Default=False) + n_cl_out (int): Number of channels in output layer (Default=1) + dropout (float): Dropout percentage (Default=0.2) + print_summary (bool): True = print the model summary (Default = True) + seed: random seed (Default=816) + depth (int): Number of max pooling layers in encoder (Default=5) + dropout_at: Layers to perform dropout after (Default=[2,3]) + initial_filters (int): Number of filters in first convolutional + layer (Default=16) + batch_norm (bool): True = use batch normalization (Default=True) + **kwargs: Additional parameters to pass to the function + """ + if (input_shape[0] % (2**depth)) > 0: + raise ValueError(f'Crop dimension must be a multiple of 2^(depth of U-Net) = {2**depth}') + + inputs = tf.keras.layers.Input(input_shape, name='brats_mr_image') + + activation = tf.keras.activations.relu + + params = {'kernel_size': (3, 3), 'activation': activation, + 'padding': 'same', + 'kernel_initializer': tf.keras.initializers.he_uniform(seed=seed)} + + convb_layers = {} + + net = inputs + filters = initial_filters + for i in range(depth): + name = f'conv{i + 1}a' + net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) + if i in dropout_at: + net = tf.keras.layers.Dropout(dropout)(net) + name = f'conv{i + 1}b' + net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) + if batch_norm: + net = tf.keras.layers.BatchNormalization()(net) + convb_layers[name] = net + # only pool if not last level + if i != depth - 1: + name = f'pool{i + 1}' + net = tf.keras.layers.MaxPooling2D(name=name, pool_size=(2, 2))(net) + filters *= 2 + + # do the up levels + filters //= 2 + for i in range(depth - 1): + if use_upsampling: + up = tf.keras.layers.UpSampling2D( + name=f'up{depth + i + 1}', size=(2, 2))(net) + else: + up = tf.keras.layers.Conv2DTranspose(name=f'transConv{depth + i + 1}', + filters=filters, + kernel_size=(2, 2), + strides=(2, 2), + padding='same')(net) + net = tf.keras.layers.concatenate( + [up, convb_layers[f'conv{depth - i - 1}b']], + axis=-1 + ) + net = tf.keras.layers.Conv2D( + name=f'conv{depth + i + 1}a', + filters=filters, **params)(net) + net = tf.keras.layers.Conv2D( + name=f'conv{depth + i + 1}b', + filters=filters, **params)(net) + filters //= 2 + + net = tf.keras.layers.Conv2D(name='prediction', filters=n_cl_out, + kernel_size=(1, 1), + activation='sigmoid')(net) + + model = tf.keras.models.Model(inputs=[inputs], outputs=[net]) + + return model \ No newline at end of file diff --git a/openfl-workspace/tf_2dunet/src/tf_2dunet.py b/openfl-workspace/tf_2dunet/src/tf_2dunet.py deleted file mode 100644 index 54c2ae2a89..0000000000 --- a/openfl-workspace/tf_2dunet/src/tf_2dunet.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow.compat.v1 as tf - -from openfl.federated import TensorFlowTaskRunnerV1 - -tf.disable_v2_behavior() - - -class TensorFlow2DUNet(TensorFlowTaskRunnerV1): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - - def __init__(self, **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - super().__init__(**kwargs) - - self.create_model(**kwargs) - self.initialize_tensorkeys_for_functions() - - def create_model(self, - training_smoothing=32.0, - validation_smoothing=1.0, - **kwargs): - """Create the TensorFlow 2D U-Net model. - - Args: - training_smoothing (float): (Default=32.0) - validation_smoothing (float): (Default=1.0) - **kwargs: Additional parameters to pass to the function - - """ - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.intra_op_parallelism_threads = 112 - config.inter_op_parallelism_threads = 1 - self.sess = tf.Session(config=config) - - self.X = tf.placeholder(tf.float32, self.input_shape) - self.y = tf.placeholder(tf.float32, self.input_shape) - self.output = define_model(self.X, use_upsampling=True, **kwargs) - - self.loss = dice_coef_loss(self.y, self.output, smooth=training_smoothing) - self.loss_name = dice_coef_loss.__name__ - self.validation_metric = dice_coef( - self.y, self.output, smooth=validation_smoothing) - self.validation_metric_name = dice_coef.__name__ - - self.global_step = tf.train.get_or_create_global_step() - - self.tvars = tf.trainable_variables() - - self.optimizer = tf.train.RMSPropOptimizer(1e-2) - - self.gvs = self.optimizer.compute_gradients(self.loss, self.tvars) - self.train_step = self.optimizer.apply_gradients(self.gvs, - global_step=self.global_step) - - self.opt_vars = self.optimizer.variables() - - # FIXME: Do we really need to share the opt_vars? - # Two opt_vars for one tvar: gradient and square sum for RMSprop. - self.fl_vars = self.tvars + self.opt_vars - - self.initialize_globals() - - -def dice_coef(y_true, y_pred, smooth=1.0, **kwargs): - """Dice coefficient. - - Calculate the Dice Coefficient - - Args: - y_true: Ground truth annotation array - y_pred: Prediction array from model - smooth (float): Laplace smoothing factor (Default=1.0) - **kwargs: Additional parameters to pass to the function - - Returns: - float: Dice cofficient metric - - """ - intersection = tf.reduce_sum(y_true * y_pred, axis=[1, 2, 3]) - coef = ( - (tf.constant(2.) * intersection + tf.constant(smooth)) - / (tf.reduce_sum(y_true, axis=[1, 2, 3]) - + tf.reduce_sum(y_pred, axis=[1, 2, 3]) + tf.constant(smooth)) - ) - return tf.reduce_mean(coef) - - -def dice_coef_loss(y_true, y_pred, smooth=1.0, **kwargs): - """Dice coefficient loss. - - Calculate the -log(Dice Coefficient) loss - - Args: - y_true: Ground truth annotation array - y_pred: Prediction array from model - smooth (float): Laplace smoothing factor (Default=1.0) - **kwargs: Additional parameters to pass to the function - - Returns: - float: -log(Dice cofficient) metric - - """ - intersection = tf.reduce_sum(y_true * y_pred, axis=(1, 2, 3)) - - term1 = -tf.log(tf.constant(2.0) * intersection + smooth) - term2 = tf.log(tf.reduce_sum(y_true, axis=(1, 2, 3)) - + tf.reduce_sum(y_pred, axis=(1, 2, 3)) + smooth) - - term1 = tf.reduce_mean(term1) - term2 = tf.reduce_mean(term2) - - loss = term1 + term2 - - return loss - - -CHANNEL_LAST = True -if CHANNEL_LAST: - concat_axis = -1 - data_format = 'channels_last' -else: - concat_axis = 1 - data_format = 'channels_first' - -tf.keras.backend.set_image_data_format(data_format) - - -def define_model(input_tensor, - use_upsampling=False, - n_cl_out=1, - dropout=0.2, - print_summary=True, - activation_function='relu', - seed=0xFEEDFACE, - depth=5, - dropout_at=None, - initial_filters=32, - batch_norm=True, - **kwargs): - """Define the TensorFlow model. - - Args: - input_tensor: input shape ot the model - use_upsampling (bool): True = use bilinear interpolation; - False = use transposed convolution (Default=False) - n_cl_out (int): Number of channels in input layer (Default=1) - dropout (float): Dropout percentage (Default=0.2) - print_summary (bool): True = print the model summary (Default = True) - activation_function: The activation function to use after convolutional - layers (Default='relu') - seed: random seed (Default=0xFEEDFACE) - depth (int): Number of max pooling layers in encoder (Default=5) - dropout_at: Layers to perform dropout after (Default=[2,3]) - initial_filters (int): Number of filters in first convolutional - layer (Default=32) - batch_norm (bool): True = use batch normalization (Default=True) - **kwargs: Additional parameters to pass to the function - - """ - if dropout_at is None: - dropout_at = [2, 3] - # Set keras learning phase to train - tf.keras.backend.set_learning_phase(True) - - # Don't initialize variables on the fly - tf.keras.backend.manual_variable_initialization(False) - - inputs = tf.keras.layers.Input(tensor=input_tensor, name='Images') - - if activation_function == 'relu': - activation = tf.nn.relu - elif activation_function == 'leakyrelu': - activation = tf.nn.leaky_relu - - params = { - 'activation': activation, - 'data_format': data_format, - 'kernel_initializer': tf.keras.initializers.he_uniform(seed=seed), - 'kernel_size': (3, 3), - 'padding': 'same', - } - - convb_layers = {} - - net = inputs - filters = initial_filters - for i in range(depth): - name = f'conv{i + 1}a' - net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) - if i in dropout_at: - net = tf.keras.layers.Dropout(dropout)(net) - name = f'conv{i + 1}b' - net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) - if batch_norm: - net = tf.keras.layers.BatchNormalization()(net) - convb_layers[name] = net - # only pool if not last level - if i != depth - 1: - name = f'pool{i + 1}' - net = tf.keras.layers.MaxPooling2D(name=name, pool_size=(2, 2))(net) - filters *= 2 - - # do the up levels - filters //= 2 - for i in range(depth - 1): - if use_upsampling: - up = tf.keras.layers.UpSampling2D( - name=f'up{depth + i + 1}', size=(2, 2))(net) - else: - up = tf.keras.layers.Conv2DTranspose( - name='transConv6', filters=filters, data_format=data_format, - kernel_size=(2, 2), strides=(2, 2), padding='same')(net) - net = tf.keras.layers.concatenate( - [up, convb_layers[f'conv{depth - i - 1}b']], - axis=concat_axis - ) - net = tf.keras.layers.Conv2D( - name=f'conv{depth + i + 1}a', - filters=filters, **params)(net) - net = tf.keras.layers.Conv2D( - name=f'conv{depth + i + 1}b', - filters=filters, **params)(net) - filters //= 2 - - net = tf.keras.layers.Conv2D(name='Mask', filters=n_cl_out, - kernel_size=(1, 1), data_format=data_format, - activation='sigmoid')(net) - - model = tf.keras.models.Model(inputs=[inputs], outputs=[net]) - - if print_summary: - print(model.summary()) - - return net diff --git a/openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py b/openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py deleted file mode 100644 index 49b4484fc2..0000000000 --- a/openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import TensorFlowDataLoader -from .brats_utils import load_from_nifti - - -class TensorFlowBratsInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for the BraTS dataset.""" - - def __init__(self, data_path, batch_size, percent_train=0.8, pre_split_shuffle=True, **kwargs): - """Initialize. - - Args: - data_path: The file path for the BraTS dataset - batch_size (int): The batch size to use - percent_train (float): The percentage of the data to use for training (Default=0.8) - pre_split_shuffle (bool): True= shuffle the dataset before - performing the train/validate split (Default=True) - **kwargs: Additional arguments, passed to super init and load_from_nifti - - Returns: - Data loader with BraTS data - """ - super().__init__(batch_size, **kwargs) - - X_train, y_train, X_valid, y_valid = load_from_nifti(parent_dir=data_path, - percent_train=percent_train, - shuffle=pre_split_shuffle, - **kwargs) - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index da618fbb5f..ea11110edd 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -83,18 +83,7 @@ def train_(self, batch_generator, metrics: list = None, **kwargs): epochs: Number of epochs to train. metrics: Names of metrics to save. """ - if metrics is None: - metrics = [] - - model_metrics_names = self.model.metrics_names - - for param in metrics: - if param not in model_metrics_names: - raise ValueError( - f'TensorFlowTaskRunner does not support specifying new metrics. ' - f'Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}' - ) - + history = self.model.fit(batch_generator, verbose=1, **kwargs) diff --git a/openfl/federated/__init__.py b/openfl/federated/__init__.py index 54cd35515f..7849172bf4 100644 --- a/openfl/federated/__init__.py +++ b/openfl/federated/__init__.py @@ -9,7 +9,7 @@ from .data import DataLoader # NOQA if importlib.util.find_spec('tensorflow'): - from .task import TensorFlowTaskRunner, TensorFlowTaskRunnerV1, KerasTaskRunner, FederatedModel # NOQA + from .task import TensorFlowTaskRunner, KerasTaskRunner, FederatedModel # NOQA from .data import TensorFlowDataLoader, KerasDataLoader, FederatedDataSet # NOQA if importlib.util.find_spec('torch'): from .task import PyTorchTaskRunner, FederatedModel # NOQA diff --git a/openfl/federated/task/__init__.py b/openfl/federated/task/__init__.py index fa6628d047..a0837db687 100644 --- a/openfl/federated/task/__init__.py +++ b/openfl/federated/task/__init__.py @@ -16,7 +16,7 @@ from .runner import TaskRunner # NOQA if importlib.util.find_spec('tensorflow'): - from .runner_tf import TensorFlowTaskRunner, TensorFlowTaskRunnerV1 # NOQA + from .runner_tf import TensorFlowTaskRunner # NOQA from .runner_keras import KerasTaskRunner # NOQA from .fl_model import FederatedModel # NOQA if importlib.util.find_spec('torch'): diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py index e13522666b..56b61c5b64 100644 --- a/openfl/federated/task/runner_tf.py +++ b/openfl/federated/task/runner_tf.py @@ -9,9 +9,6 @@ from openfl.utilities.split import split_tensor_dict_for_holdouts from .runner import TaskRunner -import tensorflow.compat.v1 -from tqdm import tqdm - class TensorFlowTaskRunner(TaskRunner): """The base model for Keras models in the federation.""" @@ -543,437 +540,4 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): self.required_tensorkeys_for_function['validate_task']['apply=global'] += [ TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) for tensor_name in local_model_dict_val - ] - - -class TensorFlowTaskRunnerV1(TaskRunner): - """ - Base class for TensorFlow models in the Federated Learning solution. - - child classes should have __init__ function signature (self, data, kwargs), - and should overwrite at least the following while defining the model - """ - - def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - """ - tensorflow.compat.v1.disable_v2_behavior() - - super().__init__(**kwargs) - - self.assign_ops = None - self.placeholders = None - - self.tvar_assign_ops = None - self.tvar_placeholders = None - - # construct the shape needed for the input features - self.input_shape = (None,) + self.data_loader.get_feature_shape() - - # Required tensorkeys for all public functions in TensorFlowTaskRunner - self.required_tensorkeys_for_function = {} - - # Required tensorkeys for all public functions in TensorFlowTaskRunner - self.required_tensorkeys_for_function = {} - - # tensorflow session - self.sess = None - # input featrures to the model - self.X = None - # input labels to the model - self.y = None - # optimizer train step operation - self.train_step = None - # model loss function - self.loss = None - # model output tensor - self.output = None - # function used to validate the model outputs against labels - self.validation_metric = None - # tensorflow trainable variables - self.tvars = None - # self.optimizer.variables() once self.optimizer is defined - self.opt_vars = None - # self.tvars + self.opt_vars - self.fl_vars = None - - def rebuild_model(self, round_num, input_tensor_dict, validation=False): - """ - Parse tensor names and update weights of model. Handles the optimizer treatment. - - Returns: - None - """ - if self.opt_treatment == 'RESET': - self.reset_opt_vars() - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - elif (round_num > 0 and self.opt_treatment == 'CONTINUE_GLOBAL' - and not validation): - self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) - else: - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - - def train_task(self, col_name, round_num, input_tensor_dict, - epochs=1, use_tqdm=False, **kwargs): - """ - Perform the training. - - Is expected to perform draws randomly, without replacement until data is exausted. Then - data is replaced and shuffled and draws continue. - - Args: - use_tqdm (bool): True = use tqdm to print a progress - bar (Default=False) - epochs (int): Number of epochs to train - Returns: - float: loss metric - """ - batch_size = self.data_loader.batch_size - - if kwargs['batch_size']: - batch_size = kwargs['batch_size'] - - # rebuild model with updated weights - self.rebuild_model(round_num, input_tensor_dict) - - tensorflow.compat.v1.keras.backend.set_learning_phase(True) - losses = [] - - for epoch in range(epochs): - self.logger.info(f'Run {epoch} epoch of {round_num} round') - # get iterator for batch draws (shuffling happens here) - gen = self.data_loader.get_train_loader(batch_size) - if use_tqdm: - gen = tqdm.tqdm(gen, desc='training epoch') - - for (X, y) in gen: - losses.append(self.train_batch(X, y)) - - # Output metric tensors (scalar) - origin = col_name - tags = ('trained',) - output_metric_dict = { - TensorKey( - self.loss_name, origin, round_num, True, ('metric',) - ): np.array(np.mean(losses)) - } - - # output model tensors (Doesn't include TensorKey) - output_model_dict = self.get_tensor_dict(with_opt_vars=True) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, - **self.tensor_dict_split_fn_kwargs - ) - - # Create global tensorkeys - global_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num, False, tags): - nparray for tensor_name, nparray in global_model_dict.items() - } - # Create tensorkeys that should stay local - local_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num, False, tags): - nparray for tensor_name, nparray in local_model_dict.items() - } - # The train/validate aggregated function of the next round will - # look for the updated model parameters. - # This ensures they will be resolved locally - next_local_tensorkey_model_dict = { - TensorKey( - tensor_name, origin, round_num + 1, False, ('model',) - ): nparray for tensor_name, nparray in local_model_dict.items()} - - global_tensor_dict = { - **output_metric_dict, - **global_tensorkey_model_dict - } - local_tensor_dict = { - **local_tensorkey_model_dict, - **next_local_tensorkey_model_dict - } - - # Update the required tensors if they need to be pulled from - # the aggregator - # TODO this logic can break if different collaborators have different - # roles between rounds. - # For example, if a collaborator only performs validation in the first - # round but training in the second, it has no way of knowing the - # optimizer state tensor names to request from the aggregator because - # these are only created after training occurs. A work around could - # involve doing a single epoch of training on random data to get the - # optimizer names, and then throwing away the model. - if self.opt_treatment == 'CONTINUE_GLOBAL': - self.initialize_tensorkeys_for_functions(with_opt_vars=True) - - return global_tensor_dict, local_tensor_dict - - def train_batch(self, X, y): - """ - Train the model on a single batch. - - Args: - X: Input to the model - y: Ground truth label to the model - - Returns: - float: loss metric - """ - feed_dict = {self.X: X, self.y: y} - - # run the train step and return the loss - _, loss = self.sess.run([self.train_step, self.loss], feed_dict=feed_dict) - - return loss - - def validate_task(self, col_name, round_num, - input_tensor_dict, use_tqdm=False, **kwargs): - """ - Run validation. - - Returns: - dict: {: } - """ - batch_size = self.data_loader.batch_size - - if kwargs['batch_size']: - batch_size = kwargs['batch_size'] - - self.rebuild_model(round_num, input_tensor_dict, validation=True) - - tensorflow.compat.v1.keras.backend.set_learning_phase(False) - - score = 0 - - gen = self.data_loader.get_valid_loader(batch_size) - if use_tqdm: - gen = tqdm.tqdm(gen, desc='validating') - - for X, y in gen: - weight = X.shape[0] / self.data_loader.get_valid_data_size() - _, s = self.validate_(X, y) - score += s * weight - - origin = col_name - suffix = 'validate' - if kwargs['apply'] == 'local': - suffix += '_local' - else: - suffix += '_agg' - tags = ('metric', suffix) - output_tensor_dict = { - TensorKey( - self.validation_metric_name, origin, round_num, True, tags - ): np.array(score)} - - # return empty dict for local metrics - return output_tensor_dict, {} - - def validate_(self, X, y): - """Validate the model on a single local batch. - - Args: - X: Input to the model - y: Ground truth label to the model - - Returns: - float: loss metric - - """ - feed_dict = {self.X: X, self.y: y} - - return self.sess.run( - [self.output, self.validation_metric], feed_dict=feed_dict) - - def get_tensor_dict(self, with_opt_vars=True): - """Get the dictionary weights. - - Get the weights from the tensor - - Args: - with_opt_vars (bool): Specify if we also want to get the variables - of the optimizer - - Returns: - dict: The weight dictionary {: } - - """ - if with_opt_vars is True: - variables = self.fl_vars - else: - variables = self.tvars - - # FIXME: do this in one call? - return {var.name: val for var, val in zip( - variables, self.sess.run(variables))} - - def set_tensor_dict(self, tensor_dict, with_opt_vars): - """Set the tensor dictionary. - - Set the model weights with a tensor - dictionary: {: }. - - Args: - tensor_dict (dict): The model weights dictionary - with_opt_vars (bool): Specify if we also want to set the variables - of the optimizer - - Returns: - None - """ - if with_opt_vars: - self.assign_ops, self.placeholders = tf_set_tensor_dict( - tensor_dict, self.sess, self.fl_vars, - self.assign_ops, self.placeholders - ) - else: - self.tvar_assign_ops, self.tvar_placeholders = tf_set_tensor_dict( - tensor_dict, - self.sess, - self.tvars, - self.tvar_assign_ops, - self.tvar_placeholders - ) - - def reset_opt_vars(self): - """Reinitialize the optimizer variables.""" - for v in self.opt_vars: - v.initializer.run(session=self.sess) - - def initialize_globals(self): - """Initialize Global Variables. - - Initialize all global variables - - Returns: - None - """ - self.sess.run(tensorflow.compat.v1.global_variables_initializer()) - - def _get_weights_names(self, with_opt_vars=True): - """Get the weights. - - Args: - with_opt_vars (bool): Specify if we also want to get the variables - of the optimizer. - - Returns: - list : The weight names list - """ - if with_opt_vars is True: - variables = self.fl_vars - else: - variables = self.tvars - - return [var.name for var in variables] - - def get_required_tensorkeys_for_function(self, func_name, **kwargs): - """ - Get the required tensors for specified function that could be called as part of a task. - - By default, this is just all of the layers and optimizer of the model. - - Returns: - list : [TensorKey] - """ - if func_name == 'validate': - local_model = 'apply=' + str(kwargs['apply']) - return self.required_tensorkeys_for_function[func_name][local_model] - else: - return self.required_tensorkeys_for_function[func_name] - - def initialize_tensorkeys_for_functions(self, with_opt_vars=False): - """ - Set the required tensors for all publicly accessible methods \ - that could be called as part of a task. - - By default, this is just all of the layers and optimizer of the model. - Custom tensors should be added to this function - - """ - # TODO there should be a way to programmatically iterate through - # all of the methods in the class and declare the tensors. - # For now this is done manually - - output_model_dict = self.get_tensor_dict(with_opt_vars=with_opt_vars) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, - **self.tensor_dict_split_fn_kwargs - ) - if not with_opt_vars: - global_model_dict_val = global_model_dict - local_model_dict_val = local_model_dict - else: - output_model_dict = self.get_tensor_dict(with_opt_vars=False) - global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( - self.logger, - output_model_dict, - **self.tensor_dict_split_fn_kwargs - ) - - self.required_tensorkeys_for_function['train_task'] = [ - TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) - for tensor_name in global_model_dict] - self.required_tensorkeys_for_function['train_task'] += [ - TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) - for tensor_name in local_model_dict] - - # Validation may be performed on local or aggregated (global) - # model, so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function['validate_task'] = {} - # TODO This is not stateless. The optimizer will not be - self.required_tensorkeys_for_function['validate_task']['apply=local'] = [ - TensorKey(tensor_name, 'LOCAL', 0, False, ('trained',)) - for tensor_name in { - **global_model_dict_val, - **local_model_dict_val - } - ] - self.required_tensorkeys_for_function['validate']['apply=global'] = [ - TensorKey(tensor_name, 'GLOBAL', 0, False, ('model',)) - for tensor_name in global_model_dict_val - ] - self.required_tensorkeys_for_function['validate']['apply=global'] += [ - TensorKey(tensor_name, 'LOCAL', 0, False, ('model',)) - for tensor_name in local_model_dict_val - ] - - -# FIXME: what's a nicer construct than this? ugly interface. Perhaps we -# get an object with an assumed interface that lets is set/get these? -# Note that this will return the assign_ops and placeholder nodes it uses -# if called with None, it will create them. -# to avoid inflating the graph, caller should keep these and pass them back -# What if we want to set a different group of vars in the middle? -# It is good if it is the subset of the original variables. -def tf_set_tensor_dict(tensor_dict, session, variables, - assign_ops=None, placeholders=None): - """Tensorflow set tensor dictionary. - - Args: - tensor_dict: Dictionary of tensors - session: TensorFlow session - variables: TensorFlow variables - assign_ops: TensorFlow operations (Default=None) - placeholders: TensorFlow placeholders (Default=None) - - Returns: - assign_ops, placeholders - - """ - if placeholders is None: - placeholders = { - v.name: tensorflow.compat.v1.placeholder(v.dtype, shape=v.shape) for v in variables - } - if assign_ops is None: - assign_ops = { - v.name: tensorflow.compat.v1.assign(v, placeholders[v.name]) for v in variables - } - - for k, v in tensor_dict.items(): - session.run(assign_ops[k], feed_dict={placeholders[k]: v}) - - return assign_ops, placeholders + ] \ No newline at end of file From 9671ab1a429a5fa16388a4f21a9de5cdd8890f7d Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:06:04 -0700 Subject: [PATCH 13/23] update data path Signed-off-by: kta-intel --- openfl-workspace/tf_2dunet/README.md | 7 +------ openfl-workspace/tf_2dunet/plan/data.yaml | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/openfl-workspace/tf_2dunet/README.md b/openfl-workspace/tf_2dunet/README.md index a8e50ff464..d7665c33ab 100644 --- a/openfl-workspace/tf_2dunet/README.md +++ b/openfl-workspace/tf_2dunet/README.md @@ -46,9 +46,4 @@ Output of `tree $DATA_PATH/$SUBFOLDER -L 1` in case when `n = 2`: If BraTS20 has the same structure, we can split it in the same way. Each slice contains subdirectories containing `*.nii.gz` files. According to `load_from_NIfTI` function [docstring](https://github.com/intel/openfl/blob/2e6680fedcd4d99363c94792c4a9cc272e4eebc0/openfl-workspace/tf_2dunet/src/brats_utils.py#L68), `NIfTI files for whole brains are assumed to be contained in subdirectories of the parent directory`. So we can use these slice folders as collaborator data paths. -4) We are ready to train! Try executing the [Quick Start](https://openfl.readthedocs.io/en/latest/get_started/quickstart.html) steps. Make sure you have `openfl` installed in your Python virtual environment. Be sure to set the proper collaborator data paths in [plan/data.yaml](https://github.com/securefederatedai/openfl/blob/develop/openfl-workspace/tf_2dunet/plan/data.yaml) and during the `fx collaborator create` command. Alternatively, you can run a quick test with our 'Hello Federation' script: - -```bash -python tests/github/test_hello_federation.py tf_2dunet fed_work12345alpha81671 one123dragons beta34unicorns localhost --col1-data-path ../$DATA_PATH/$SUBFOLDER/0 --col2-data-path ../$DATA_PATH/$SUBFOLDER/1 --rounds-to-train 5 -``` -The result of the execution of the command above is 5 completed training rounds. +4) We are ready to train! Try executing the [Quick Start](https://openfl.readthedocs.io/en/latest/get_started/quickstart.html) steps. Make sure you have `openfl` installed in your Python virtual environment. Be sure to set the proper collaborator data paths in [plan/data.yaml](https://github.com/securefederatedai/openfl/blob/develop/openfl-workspace/tf_2dunet/plan/data.yaml) and during the `fx collaborator create -n -d ` command. diff --git a/openfl-workspace/tf_2dunet/plan/data.yaml b/openfl-workspace/tf_2dunet/plan/data.yaml index 498ff9297d..a04741e2b7 100644 --- a/openfl-workspace/tf_2dunet/plan/data.yaml +++ b/openfl-workspace/tf_2dunet/plan/data.yaml @@ -4,5 +4,5 @@ # all keys under 'collaborators' corresponds to a specific colaborator name the corresponding dictionary has data_name, data_path pairs. # Note that in the mnist case we do not store the data locally, and the data_path is used to pass an integer that helps the data object # construct the shard of the mnist dataset to be use for this collaborator. -collaborator1, ~/MICCAI_BraTS_2019_Data_Training/HGG/0 -collaborator2, ~/MICCAI_BraTS_2019_Data_Training/HGG/1 +collaborator1,~/MICCAI_BraTS_2019_Data_Training/HGG/0 +collaborator2,~/MICCAI_BraTS_2019_Data_Training/HGG/1 From a78b8364d4bff774492e6c8e349b76a1e727d109 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:17:19 -0700 Subject: [PATCH 14/23] pin tensorflow=2.15.1 Signed-off-by: kta-intel --- openfl-workspace/tf_3dunet_brats/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfl-workspace/tf_3dunet_brats/requirements.txt b/openfl-workspace/tf_3dunet_brats/requirements.txt index ed58705a66..d79023f767 100644 --- a/openfl-workspace/tf_3dunet_brats/requirements.txt +++ b/openfl-workspace/tf_3dunet_brats/requirements.txt @@ -1,4 +1,4 @@ -tensorflow>=2 +tensorflow==2.15.1 nibabel numpy From e9a7364a088ca38310ed98ec8d5065498781b518 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:21:54 -0700 Subject: [PATCH 15/23] use absolute paths Signed-off-by: kta-intel --- openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py index 65d6d5f54c..58f2901b80 100644 --- a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py +++ b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py @@ -6,10 +6,7 @@ import tensorflow as tf from openfl.federated import TensorFlowTaskRunner -from .define_model import build_model -from .define_model import dice_coef -from .define_model import dice_loss -from .define_model import soft_dice_coef +from src.define_model import build_model, dice_coef, dice_loss, soft_dice_coef class TensorFlow3dUNet(TensorFlowTaskRunner): From 49e97c1b33e31848eda887fe01bc90f026147284 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:42:08 -0700 Subject: [PATCH 16/23] tf_3dunet_brats updated to new convention Signed-off-by: kta-intel --- .../plan/{defaults => }/defaults | 0 .../plan/defaults/aggregator.yaml | 4 - .../plan/defaults/assigner.yaml | 9 - .../plan/defaults/collaborator.yaml | 5 - .../plan/defaults/compression_pipeline.yaml | 1 - .../plan/defaults/data_loader.yaml | 1 - .../plan/defaults/network.yaml | 9 - .../plan/defaults/task_runner.yaml | 1 - .../plan/defaults/tasks_fast_estimator.yaml | 22 -- .../plan/defaults/tasks_keras.yaml | 23 -- .../plan/defaults/tasks_tensorflow.yaml | 23 -- .../plan/defaults/tasks_torch.yaml | 19 -- .../tf_3dunet_brats/plan/plan.yaml | 103 ++++----- .../tf_3dunet_brats/src/dataloader.py | 94 +++++++- .../src/{define_model.py => taskrunner.py} | 101 +++++++++ .../tf_3dunet_brats/src/tf_3dunet_model.py | 212 ------------------ .../src/tf_brats_dataloader.py | 99 -------- 17 files changed, 237 insertions(+), 489 deletions(-) rename openfl-workspace/tf_3dunet_brats/plan/{defaults => }/defaults (100%) delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml rename openfl-workspace/tf_3dunet_brats/src/{define_model.py => taskrunner.py} (62%) delete mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py delete mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults b/openfl-workspace/tf_3dunet_brats/plan/defaults similarity index 100% rename from openfl-workspace/tf_3dunet_brats/plan/defaults/defaults rename to openfl-workspace/tf_3dunet_brats/plan/defaults diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml deleted file mode 100644 index d3ef6e5082..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml +++ /dev/null @@ -1,4 +0,0 @@ -template : openfl.component.Aggregator -settings : - db_store_rounds : 1 - diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml deleted file mode 100644 index 0b7e744475..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml +++ /dev/null @@ -1,9 +0,0 @@ -template : openfl.component.RandomGroupedAssigner -settings : - task_groups : - - name : train_and_validate - percentage : 1.0 - tasks : - - aggregated_model_validation - - train - - locally_tuned_model_validation diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml deleted file mode 100644 index a9c2e6eb7b..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml +++ /dev/null @@ -1,5 +0,0 @@ -template : openfl.component.Collaborator -settings : - opt_treatment : 'CONTINUE_LOCAL' - delta_updates : True - db_store_rounds : 1 diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml deleted file mode 100644 index a508f94fd2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.pipelines.NoCompressionPipeline diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml deleted file mode 100644 index 33accd5ab2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.federated.DataLoader diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml deleted file mode 100644 index 9528631585..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml +++ /dev/null @@ -1,9 +0,0 @@ -template: openfl.federation.Network -settings: - agg_addr : auto - agg_port : auto - hash_salt : auto - disable_tls : False - client_reconnect_interval : 5 - disable_client_auth : False - cert_folder : cert diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml deleted file mode 100644 index b162724693..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.federated.task_runner.CoreTaskRunner diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml deleted file mode 100644 index 1548d4b225..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml +++ /dev/null @@ -1,22 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - accuracy - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - accuracy -train: - function : train - kwargs : - batch_size : 32 - epochs : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml deleted file mode 100644 index 79d067d8d2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - accuracy - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - accuracy - -train: - function : train - kwargs : - batch_size : 32 - epochs : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml deleted file mode 100644 index 586a885b40..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - acc - -train: - function : train_batches - kwargs : - batch_size : 32 - num_batches : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml deleted file mode 100644 index a240c2003b..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml +++ /dev/null @@ -1,19 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - apply: local - metrics : - - acc - -train: - function : train_batches - kwargs : - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml index b873de734b..acecc2ea46 100644 --- a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml +++ b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml @@ -1,59 +1,42 @@ -aggregator: - defaults: plan/defaults/aggregator.yaml - settings: - best_state_path: save/tf_3dunet_brats_best.pbuf - init_state_path: save/tf_3dunet_brats_init.pbuf - last_state_path: save/tf_3dunet_brats_latest.pbuf - db_store_rounds: 2 - rounds_to_train: 10 - template: openfl.component.Aggregator -assigner: - defaults: plan/defaults/assigner.yaml - settings: - task_groups: - - name: train_and_validate - percentage: 1.0 - tasks: - - aggregated_model_validation - - train - - locally_tuned_model_validation - template: openfl.component.RandomGroupedAssigner -collaborator: - defaults: plan/defaults/collaborator.yaml - settings: - db_store_rounds: 2 - delta_updates: true - opt_treatment: RESET - template: openfl.component.Collaborator -data_loader: - defaults: plan/defaults/data_loader.yaml - settings: - batch_size: 4 - crop_dim: 64 - num_classes: 1 - number_input_channels: 1 +# Copyright (C) 2020-2024 Intel Corporation +# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/init.pbuf + last_state_path : save/latest.pbuf + best_state_path : save/best.pbuf + rounds_to_train : 10 + db_store_rounds : 2 + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : true + opt_treatment : RESET + +data_loader : + defaults : plan/defaults/data_loader.yaml + template : src.dataloader.BratsDataloader + settings : + batch_size: 64 percent_train: 0.8 - template: src.tf_brats_dataloader.TensorFlowBratsDataLoader -network: - defaults: plan/defaults/network.yaml - settings: - agg_addr: DESKTOP-AOKV1IJ.localdomain - agg_port: auto - cert_folder: cert - client_reconnect_interval: 5 - disable_client_auth: false - disable_tls: false - hash_salt: auto - template: openfl.federation.Network -task_runner: - defaults: plan/defaults/task_runner.yaml - settings: - batch_norm: true - batch_size: 4 - depth: 4 - initial_filters: 16 - use_upsampling: false - template: src.tf_3dunet_model.TensorFlow3dUNet + collaborator_count : 2 + data_group_name : brats + +task_runner : + defaults : plan/defaults/task_runner.yaml + template : src.taskrunner.UNet3D + +network : + defaults : plan/defaults/network.yaml + +assigner : + defaults : plan/defaults/assigner.yaml + tasks: aggregated_model_validation: function: validate_task @@ -61,8 +44,8 @@ tasks: apply: global batch_size: 4 metrics: - - dice_coef - - soft_dice_coef + - dice_coef + - soft_dice_coef defaults: plan/defaults/tasks_tensorflow.yaml locally_tuned_model_validation: function: validate_task @@ -70,8 +53,8 @@ tasks: apply: local batch_size: 4 metrics: - - dice_coef - - soft_dice_coef + - dice_coef + - soft_dice_coef settings: {} train: function: train_task @@ -79,5 +62,5 @@ tasks: batch_size: 4 epochs: 1 metrics: - - loss + - loss num_batches: 1 diff --git a/openfl-workspace/tf_3dunet_brats/src/dataloader.py b/openfl-workspace/tf_3dunet_brats/src/dataloader.py index 80ac1cd004..19ba8a0811 100644 --- a/openfl-workspace/tf_3dunet_brats/src/dataloader.py +++ b/openfl-workspace/tf_3dunet_brats/src/dataloader.py @@ -3,13 +3,105 @@ """You may copy this file as the starting point of your own model.""" -import os import nibabel as nib import numpy as np + +import os +from openfl.federated import TensorFlowDataLoader + import tensorflow as tf +class BratsDataLoader(TensorFlowDataLoader): + """TensorFlow Data Loader for the BraTS dataset.""" + + def __init__(self, data_path, batch_size=4, + crop_dim=64, percent_train=0.8, + pre_split_shuffle=True, + number_input_channels=1, + num_classes=1, + **kwargs): + """Initialize. + + Args: + data_path: The file path for the BraTS dataset + batch_size (int): The batch size to use + crop_dim (int): Crop the original image to this size on each dimension + percent_train (float): The percentage of the data to use for training (Default=0.8) + pre_split_shuffle (bool): True= shuffle the dataset before + performing the train/validate split (Default=True) + **kwargs: Additional arguments, passed to super init + + Returns: + Data loader with BraTS data + """ + super().__init__(batch_size, **kwargs) + + self.data_path = os.path.abspath(os.path.expanduser(data_path)) + self.batch_size = batch_size + self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] + self.num_input_channels = number_input_channels + self.num_classes = num_classes + + self.train_test_split = percent_train + + self.brats_data = DatasetGenerator(crop_dim, + data_path=data_path, + number_input_channels=number_input_channels, + batch_size=batch_size, + train_test_split=percent_train, + validate_test_split=0.5, + num_classes=num_classes, + random_seed=816) + + def get_feature_shape(self): + """ + Get the shape of an example feature array. + + Returns: + tuple: shape of an example feature array + """ + return tuple(self.brats_data.get_input_shape()) + + def get_train_loader(self, batch_size=None, num_batches=None): + """ + Get training data loader. + + Returns + ------- + loader object + """ + return self.brats_data.ds_train + + def get_valid_loader(self, batch_size=None): + """ + Get validation data loader. + + Returns: + loader object + """ + return self.brats_data.ds_val + + def get_train_data_size(self): + """ + Get total number of training samples. + + Returns: + int: number of training samples + """ + return self.brats_data.num_train + + def get_valid_data_size(self): + """ + Get total number of validation samples. + + Returns: + int: number of validation samples + """ + return self.brats_data.num_val + + class DatasetGenerator: """Generate a TensorFlow data loader from the BraTS .nii.gz files.""" diff --git a/openfl-workspace/tf_3dunet_brats/src/define_model.py b/openfl-workspace/tf_3dunet_brats/src/taskrunner.py similarity index 62% rename from openfl-workspace/tf_3dunet_brats/src/define_model.py rename to openfl-workspace/tf_3dunet_brats/src/taskrunner.py index 148d66e9ad..8a8390187d 100644 --- a/openfl-workspace/tf_3dunet_brats/src/define_model.py +++ b/openfl-workspace/tf_3dunet_brats/src/taskrunner.py @@ -3,8 +3,109 @@ """You may copy this file as the starting point of your own model.""" +import numpy as np import tensorflow as tf +from openfl.utilities import Metric +from openfl.federated import TensorFlowTaskRunner + + +class UNet3D(TensorFlowTaskRunner): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + + def __init__(self, initial_filters=16, + depth=5, + batch_norm=True, + use_upsampling=False, + **kwargs): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + super().__init__(**kwargs) + + self.model = self.create_model( + input_shape=self.feature_shape, + n_cl_out=self.data_loader.num_classes, + initial_filters=initial_filters, + use_upsampling=use_upsampling, + depth=depth, + batch_norm=batch_norm, + **kwargs + ) + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info, line_length=120) + + def create_model(self, + input_shape, + n_cl_out=1, + use_upsampling=False, + dropout=0.2, + print_summary=True, + seed=816, + depth=5, + dropout_at=(2, 3), + initial_filters=16, + batch_norm=True, + **kwargs): + """Create the TensorFlow 3D U-Net CNN model. + + Args: + input_shape (list): input shape of the data + n_cl_out (int): Number of output classes in label (Default=1) + **kwargs: Additional parameters to pass to the function + + """ + + model = build_model(input_shape, + n_cl_out=n_cl_out, + use_upsampling=use_upsampling, + dropout=dropout, + print_summary=print_summary, + seed=seed, + depth=depth, + dropout_at=dropout_at, + initial_filters=initial_filters, + batch_norm=batch_norm) + + model.compile( + loss=dice_loss, + optimizer=tf.keras.optimizers.Adam(), + metrics=[dice_coef, soft_dice_coef], + ) + + return model + + def train_(self, batch_generator, metrics: list = None, **kwargs): + """Train single epoch. + + Override this function for custom training. + + Args: + batch_generator: Generator of training batches. + Each batch is a tuple of N train images and N train labels + where N is the batch size of the DataLoader of the current TaskRunner instance. + + epochs: Number of epochs to train. + metrics: Names of metrics to save. + """ + history = self.model.fit(batch_generator, + verbose=1, + **kwargs) + results = [] + for metric in metrics: + value = np.mean([history.history[metric]]) + results.append(Metric(name=metric, value=np.array(value))) + return results + def dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): """ diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py deleted file mode 100644 index 58f2901b80..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow as tf - -from openfl.federated import TensorFlowTaskRunner -from src.define_model import build_model, dice_coef, dice_loss, soft_dice_coef - - -class TensorFlow3dUNet(TensorFlowTaskRunner): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - - def __init__(self, initial_filters=16, - depth=5, - batch_norm=True, - use_upsampling=False, - **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - super().__init__(**kwargs) - - self.model = self.create_model( - input_shape=self.feature_shape, - n_cl_out=self.data_loader.num_classes, - initial_filters=initial_filters, - use_upsampling=use_upsampling, - depth=depth, - batch_norm=batch_norm, - **kwargs - ) - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info, line_length=120) - - def create_model(self, - input_shape, - n_cl_out=1, - use_upsampling=False, - dropout=0.2, - print_summary=True, - seed=816, - depth=5, - dropout_at=(2, 3), - initial_filters=16, - batch_norm=True, - **kwargs): - """Create the TensorFlow 3D U-Net CNN model. - - Args: - input_shape (list): input shape of the data - n_cl_out (int): Number of output classes in label (Default=1) - **kwargs: Additional parameters to pass to the function - - """ - # - # Define Model - # - model = build_model(input_shape, - n_cl_out=n_cl_out, - use_upsampling=use_upsampling, - dropout=dropout, - print_summary=print_summary, - seed=seed, - depth=depth, - dropout_at=dropout_at, - initial_filters=initial_filters, - batch_norm=batch_norm) - - self.optimizer = tf.keras.optimizers.Adam() - - model.compile( - loss=dice_loss, - optimizer=self.optimizer, - metrics=[dice_coef, soft_dice_coef], - ) - - self.tvars = model.layers - print(f'layer names: {[var.name for var in self.tvars]}') - - self.opt_vars = self.optimizer.variables() - print(f'optimizer vars: {self.opt_vars}') - - # Two opt_vars for one tvar: gradient and square sum for RMSprop. - self.fl_vars = self.tvars + self.opt_vars - - return model - - -if __name__ == '__main__': - - from tf_brats_dataloader import DatasetGenerator - import os - - import argparse - - parser = argparse.ArgumentParser( - description='Train 3D U-Net model', add_help=True, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('--data_path', - default='~/data/MICCAI_BraTS2020_TrainingData/', - # Or wherever you unzipped the BraTS datset, - help='Root directory for BraTS 2020 dataset') - parser.add_argument('--epochs', - type=int, - default=5, - help='Number of epochs') - parser.add_argument('--crop_dim', - type=int, - default=64, - help='Crop all dimensions to this (height, width, depth)') - parser.add_argument('--batch_size', - type=int, - default=4, - help='Training batch size') - parser.add_argument('--train_test_split', - type=float, - default=0.80, - help='Train/test split (0-1)') - parser.add_argument('--validate_test_split', - type=float, - default=0.50, - help='Validation/test split (0-1)') - parser.add_argument('--number_input_channels', - type=int, - default=1, - help='Number of input channels') - parser.add_argument('--num_classes', - type=int, - default=1, - help='Number of output classes/channels') - parser.add_argument('--random_seed', - default=816, - help='Random seed for determinism') - parser.add_argument('--print_model', - action='store_true', - default=True, - help='Print the summary of the model layers') - parser.add_argument('--filters', - type=int, - default=16, - help='Number of filters in the first convolutional layer') - parser.add_argument('--use_upsampling', - action='store_true', - default=False, - help='Use upsampling instead of transposed convolution') - parser.add_argument('--use_batchnorm', - action='store_true', - default=True, - help='Use batch normalization') - parser.add_argument('--saved_model_name', - default='saved_model_3DUnet', - help='Save model to this path') - - args = parser.parse_args() - - print(args) - - brats_data = DatasetGenerator(args.crop_dim, - data_path=os.path.abspath(os.path.expanduser(args.data_path)), - batch_size=args.batch_size, - train_test_split=args.train_test_split, - validate_test_split=args.validate_test_split, - number_input_channels=args.number_input_channels, - num_classes=args.num_classes, - random_seed=args.random_seed - ) - - model = build_model([args.crop_dim, args.crop_dim, args.crop_dim, args.number_input_channels], - use_upsampling=args.use_upsampling, - n_cl_out=args.num_classes, - dropout=0.2, - print_summary=args.print_model, - seed=args.random_seed, - depth=5, - dropout_at=[2, 3], - initial_filters=args.filters, - batch_norm=args.use_batchnorm - ) - - model.compile(loss=dice_loss, - optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), - metrics=[dice_coef, soft_dice_coef] - ) - - checkpoint = tf.keras.callbacks.ModelCheckpoint(args.saved_model_name, - verbose=1, - save_best_only=True) - - # TensorBoard - import datetime - logs_dir = os.path.join('tensorboard_logs', - datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) - tb_logs = tf.keras.callbacks.TensorBoard(log_dir=logs_dir) - - callbacks = [checkpoint, tb_logs] - - history = model.fit(brats_data.ds_train, - validation_data=brats_data.ds_val, - epochs=args.epochs, - callbacks=callbacks) diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py b/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py deleted file mode 100644 index 85e5c576c3..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - - -import os - -from openfl.federated import TensorFlowDataLoader -from .dataloader import DatasetGenerator - - -class TensorFlowBratsDataLoader(TensorFlowDataLoader): - """TensorFlow Data Loader for the BraTS dataset.""" - - def __init__(self, data_path, batch_size=4, - crop_dim=64, percent_train=0.8, - pre_split_shuffle=True, - number_input_channels=1, - num_classes=1, - **kwargs): - """Initialize. - - Args: - data_path: The file path for the BraTS dataset - batch_size (int): The batch size to use - crop_dim (int): Crop the original image to this size on each dimension - percent_train (float): The percentage of the data to use for training (Default=0.8) - pre_split_shuffle (bool): True= shuffle the dataset before - performing the train/validate split (Default=True) - **kwargs: Additional arguments, passed to super init - - Returns: - Data loader with BraTS data - """ - super().__init__(batch_size, **kwargs) - - self.data_path = os.path.abspath(os.path.expanduser(data_path)) - self.batch_size = batch_size - self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] - self.num_input_channels = number_input_channels - self.num_classes = num_classes - - self.train_test_split = percent_train - - self.brats_data = DatasetGenerator(crop_dim, - data_path=data_path, - number_input_channels=number_input_channels, - batch_size=batch_size, - train_test_split=percent_train, - validate_test_split=0.5, - num_classes=num_classes, - random_seed=816) - - def get_feature_shape(self): - """ - Get the shape of an example feature array. - - Returns: - tuple: shape of an example feature array - """ - return tuple(self.brats_data.get_input_shape()) - - def get_train_loader(self, batch_size=None, num_batches=None): - """ - Get training data loader. - - Returns - ------- - loader object - """ - return self.brats_data.ds_train - - def get_valid_loader(self, batch_size=None): - """ - Get validation data loader. - - Returns: - loader object - """ - return self.brats_data.ds_val - - def get_train_data_size(self): - """ - Get total number of training samples. - - Returns: - int: number of training samples - """ - return self.brats_data.num_train - - def get_valid_data_size(self): - """ - Get total number of validation samples. - - Returns: - int: number of validation samples - """ - return self.brats_data.num_val From c17b84b247473a73dbd698b1fd8ceb37d087d42b Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:45:46 -0700 Subject: [PATCH 17/23] remove redundant naming Signed-off-by: kta-intel --- openfl-workspace/tf_2dunet/plan/plan.yaml | 10 +++++----- openfl-workspace/tf_2dunet/src/dataloader.py | 2 +- openfl-workspace/tf_2dunet/src/taskrunner.py | 2 +- openfl-workspace/tf_cnn_histology/plan/plan.yaml | 10 +++++----- openfl-workspace/tf_cnn_histology/src/dataloader.py | 2 +- openfl-workspace/tf_cnn_histology/src/taskrunner.py | 2 +- openfl-workspace/tf_cnn_mnist/plan/plan.yaml | 10 +++++----- openfl-workspace/tf_cnn_mnist/src/dataloader.py | 2 +- openfl-workspace/tf_cnn_mnist/src/taskrunner.py | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/openfl-workspace/tf_2dunet/plan/plan.yaml b/openfl-workspace/tf_2dunet/plan/plan.yaml index c2d4e969da..c925de767a 100644 --- a/openfl-workspace/tf_2dunet/plan/plan.yaml +++ b/openfl-workspace/tf_2dunet/plan/plan.yaml @@ -5,9 +5,9 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/tf_2dunet_brats_init.pbuf - last_state_path : save/tf_2dunet_brats_latest.pbuf - best_state_path : save/tf_2dunet_brats_best.pbuf + init_state_path : save/init.pbuf + last_state_path : save/latest.pbuf + best_state_path : save/best.pbuf rounds_to_train : 10 db_store_rounds : 2 @@ -20,7 +20,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : src.dataloader.TensorFlowBratsInMemory + template : src.dataloader.BratsDataloader settings : batch_size: 64 percent_train: 0.8 @@ -29,7 +29,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.TensorFlow2DUNet + template : src.taskrunner.UNet2D network : defaults : plan/defaults/network.yaml diff --git a/openfl-workspace/tf_2dunet/src/dataloader.py b/openfl-workspace/tf_2dunet/src/dataloader.py index 11b442c162..4e2acd9c04 100644 --- a/openfl-workspace/tf_2dunet/src/dataloader.py +++ b/openfl-workspace/tf_2dunet/src/dataloader.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -class TensorFlowBratsInMemory(TensorFlowDataLoader): +class BratsDataloader(TensorFlowDataLoader): """TensorFlow Data Loader for the BraTS dataset.""" def __init__(self, data_path, batch_size, percent_train=0.8, pre_split_shuffle=True, num_classes=1, diff --git a/openfl-workspace/tf_2dunet/src/taskrunner.py b/openfl-workspace/tf_2dunet/src/taskrunner.py index 1d3baa690d..a34a8c579a 100644 --- a/openfl-workspace/tf_2dunet/src/taskrunner.py +++ b/openfl-workspace/tf_2dunet/src/taskrunner.py @@ -9,7 +9,7 @@ from openfl.utilities import Metric from openfl.federated import TensorFlowTaskRunner -class TensorFlow2DUNet(TensorFlowTaskRunner): +class UNet2D(TensorFlowTaskRunner): """Initialize. Args: diff --git a/openfl-workspace/tf_cnn_histology/plan/plan.yaml b/openfl-workspace/tf_cnn_histology/plan/plan.yaml index 010ee461fc..87ecfa207f 100644 --- a/openfl-workspace/tf_cnn_histology/plan/plan.yaml +++ b/openfl-workspace/tf_cnn_histology/plan/plan.yaml @@ -5,9 +5,9 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/tf_cnn_histology_init.pbuf - last_state_path : save/tf_cnn_histology_latest.pbuf - best_state_path : save/tf_cnn_histology_best.pbuf + init_state_path : save/init.pbuf + last_state_path : save/latest.pbuf + best_state_path : save/best.pbuf db_store_rounds : 2 rounds_to_train : 10 @@ -21,7 +21,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : src.dataloader.TensorFlowHistologyInMemory + template : src.dataloader.HistologyDataloader settings : batch_size: 64 percent_train: 0.8 @@ -30,7 +30,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.TensorFlowCNN + template : src.taskrunner.CNN network : defaults : plan/defaults/network.yaml diff --git a/openfl-workspace/tf_cnn_histology/src/dataloader.py b/openfl-workspace/tf_cnn_histology/src/dataloader.py index 30e41311f0..5c9923d0c5 100644 --- a/openfl-workspace/tf_cnn_histology/src/dataloader.py +++ b/openfl-workspace/tf_cnn_histology/src/dataloader.py @@ -12,7 +12,7 @@ logger = getLogger(__name__) -class TensorFlowHistologyInMemory(TensorFlowDataLoader): +class HistologyDataloader(TensorFlowDataLoader): """TensorFlow Data Loader for Colorectal Histology Dataset.""" def __init__(self, data_path, batch_size, **kwargs): diff --git a/openfl-workspace/tf_cnn_histology/src/taskrunner.py b/openfl-workspace/tf_cnn_histology/src/taskrunner.py index 7273c0385a..3b085cfa93 100644 --- a/openfl-workspace/tf_cnn_histology/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_histology/src/taskrunner.py @@ -10,7 +10,7 @@ from openfl.federated import TensorFlowTaskRunner -class TensorFlowCNN(TensorFlowTaskRunner): +class CNN(TensorFlowTaskRunner): """Initialize. Args: diff --git a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml index bd6850e650..5a9164260a 100644 --- a/openfl-workspace/tf_cnn_mnist/plan/plan.yaml +++ b/openfl-workspace/tf_cnn_mnist/plan/plan.yaml @@ -5,9 +5,9 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/tf_cnn_mnist_init.pbuf - best_state_path : save/tf_cnn_mnist_best.pbuf - last_state_path : save/tf_cnn_mnist_last.pbuf + init_state_path : save/init.pbuf + best_state_path : save/best.pbuf + last_state_path : save/last.pbuf rounds_to_train : 10 collaborator : @@ -19,7 +19,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : src.dataloader.TensorFlowMNISTInMemory + template : src.dataloader.MNISTDataloader settings : collaborator_count : 2 data_group_name : mnist @@ -27,7 +27,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.TensorFlowCNN + template : src.taskrunner.CNN network : defaults : plan/defaults/network.yaml diff --git a/openfl-workspace/tf_cnn_mnist/src/dataloader.py b/openfl-workspace/tf_cnn_mnist/src/dataloader.py index e3982a149b..585000f905 100644 --- a/openfl-workspace/tf_cnn_mnist/src/dataloader.py +++ b/openfl-workspace/tf_cnn_mnist/src/dataloader.py @@ -12,7 +12,7 @@ logger = getLogger(__name__) -class TensorFlowMNISTInMemory(TensorFlowDataLoader): +class MNISTDataloader(TensorFlowDataLoader): """TensorFlow Data Loader for MNIST Dataset.""" def __init__(self, data_path, batch_size, **kwargs): diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index ea11110edd..9211b3ce73 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -10,7 +10,7 @@ from openfl.federated import TensorFlowTaskRunner -class TensorFlowCNN(TensorFlowTaskRunner): +class CNN(TensorFlowTaskRunner): """A basic convolutional neural network model.""" def __init__(self, **kwargs): From cf05cadcf397651a1a892ecbe0d6abe6f2e471f9 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:48:21 -0700 Subject: [PATCH 18/23] remove op_parallelism_threads print statement Signed-off-by: kta-intel --- openfl-workspace/tf_cnn_histology/src/taskrunner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/openfl-workspace/tf_cnn_histology/src/taskrunner.py b/openfl-workspace/tf_cnn_histology/src/taskrunner.py index 3b085cfa93..d048a34cb2 100644 --- a/openfl-workspace/tf_cnn_histology/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_histology/src/taskrunner.py @@ -56,8 +56,6 @@ def build_model(self, keras.src.engine.functional.Functional """ - print(tf.config.threading.get_intra_op_parallelism_threads()) - print(tf.config.threading.get_inter_op_parallelism_threads()) # Define Model using Functional API From 4b085ecd03d093d577a94b1e3a723c7aa976432f Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 14:54:43 -0700 Subject: [PATCH 19/23] remove extraneous oneshot function Signed-off-by: kta-intel --- .../tf_cnn_histology/src/dataloader.py | 18 ++---------------- .../tf_cnn_mnist/src/dataloader.py | 18 ++---------------- 2 files changed, 4 insertions(+), 32 deletions(-) diff --git a/openfl-workspace/tf_cnn_histology/src/dataloader.py b/openfl-workspace/tf_cnn_histology/src/dataloader.py index 5c9923d0c5..8314df8153 100644 --- a/openfl-workspace/tf_cnn_histology/src/dataloader.py +++ b/openfl-workspace/tf_cnn_histology/src/dataloader.py @@ -39,20 +39,6 @@ def __init__(self, data_path, batch_size, **kwargs): self.num_classes = num_classes -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - def _load_raw_datashards(shard_num, collaborator_count): """ Load the raw data by shard. @@ -148,7 +134,7 @@ def load_histology_shard(shard_num, collaborator_count, categorical=True, if categorical: # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) + y_train = np.eye(num_classes)[y_train] + y_valid = np.eye(num_classes)[y_valid] return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/tf_cnn_mnist/src/dataloader.py b/openfl-workspace/tf_cnn_mnist/src/dataloader.py index 585000f905..5adac25874 100644 --- a/openfl-workspace/tf_cnn_mnist/src/dataloader.py +++ b/openfl-workspace/tf_cnn_mnist/src/dataloader.py @@ -38,20 +38,6 @@ def __init__(self, data_path, batch_size, **kwargs): self.num_classes = num_classes -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - def _load_raw_datashards(shard_num, collaborator_count): """ Load the raw data by shard. @@ -137,7 +123,7 @@ def load_mnist_shard(shard_num, collaborator_count, categorical=True, if categorical: # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) + y_train = np.eye(num_classes)[y_train] + y_valid = np.eye(num_classes)[y_valid] return num_classes, X_train, y_train, X_valid, y_valid From 565df12d64e4d7e9a48609b1b985076633bfb5c7 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Thu, 11 Jul 2024 15:07:52 -0700 Subject: [PATCH 20/23] removing extraneous text Signed-off-by: kta-intel --- openfl-workspace/tf_cnn_mnist/src/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/openfl-workspace/tf_cnn_mnist/src/__init__.py b/openfl-workspace/tf_cnn_mnist/src/__init__.py index f1410b1298..e69de29bb2 100644 --- a/openfl-workspace/tf_cnn_mnist/src/__init__.py +++ b/openfl-workspace/tf_cnn_mnist/src/__init__.py @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" From a34c0ce2e5717fabcab393be5aa7f306f5be979c Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 12 Jul 2024 14:50:56 -0700 Subject: [PATCH 21/23] Revert "tf_3dunet_brats updated to new convention" This reverts commit 49e97c1b33e31848eda887fe01bc90f026147284. Signed-off-by: kta-intel --- .../plan/defaults/aggregator.yaml | 4 + .../plan/defaults/assigner.yaml | 9 + .../plan/defaults/collaborator.yaml | 5 + .../plan/defaults/compression_pipeline.yaml | 1 + .../plan/defaults/data_loader.yaml | 1 + .../plan/{ => defaults}/defaults | 0 .../plan/defaults/network.yaml | 9 + .../plan/defaults/task_runner.yaml | 1 + .../plan/defaults/tasks_fast_estimator.yaml | 22 ++ .../plan/defaults/tasks_keras.yaml | 23 ++ .../plan/defaults/tasks_tensorflow.yaml | 23 ++ .../plan/defaults/tasks_torch.yaml | 19 ++ .../tf_3dunet_brats/plan/plan.yaml | 103 +++++---- .../tf_3dunet_brats/src/dataloader.py | 94 +------- .../src/{taskrunner.py => define_model.py} | 101 --------- .../tf_3dunet_brats/src/tf_3dunet_model.py | 212 ++++++++++++++++++ .../src/tf_brats_dataloader.py | 99 ++++++++ 17 files changed, 489 insertions(+), 237 deletions(-) create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml rename openfl-workspace/tf_3dunet_brats/plan/{ => defaults}/defaults (100%) create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml rename openfl-workspace/tf_3dunet_brats/src/{taskrunner.py => define_model.py} (62%) create mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py create mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml new file mode 100644 index 0000000000..d3ef6e5082 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml @@ -0,0 +1,4 @@ +template : openfl.component.Aggregator +settings : + db_store_rounds : 1 + diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml new file mode 100644 index 0000000000..0b7e744475 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml @@ -0,0 +1,9 @@ +template : openfl.component.RandomGroupedAssigner +settings : + task_groups : + - name : train_and_validate + percentage : 1.0 + tasks : + - aggregated_model_validation + - train + - locally_tuned_model_validation diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml new file mode 100644 index 0000000000..a9c2e6eb7b --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml @@ -0,0 +1,5 @@ +template : openfl.component.Collaborator +settings : + opt_treatment : 'CONTINUE_LOCAL' + delta_updates : True + db_store_rounds : 1 diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml new file mode 100644 index 0000000000..a508f94fd2 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml @@ -0,0 +1 @@ +template: openfl.pipelines.NoCompressionPipeline diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml new file mode 100644 index 0000000000..33accd5ab2 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml @@ -0,0 +1 @@ +template: openfl.federated.DataLoader diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults b/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults similarity index 100% rename from openfl-workspace/tf_3dunet_brats/plan/defaults rename to openfl-workspace/tf_3dunet_brats/plan/defaults/defaults diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml new file mode 100644 index 0000000000..9528631585 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml @@ -0,0 +1,9 @@ +template: openfl.federation.Network +settings: + agg_addr : auto + agg_port : auto + hash_salt : auto + disable_tls : False + client_reconnect_interval : 5 + disable_client_auth : False + cert_folder : cert diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml new file mode 100644 index 0000000000..b162724693 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml @@ -0,0 +1 @@ +template: openfl.federated.task_runner.CoreTaskRunner diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml new file mode 100644 index 0000000000..1548d4b225 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml @@ -0,0 +1,22 @@ +aggregated_model_validation: + function : validate + kwargs : + batch_size : 32 + apply : global + metrics : + - accuracy + +locally_tuned_model_validation: + function : validate + kwargs : + batch_size : 32 + apply : local + metrics : + - accuracy +train: + function : train + kwargs : + batch_size : 32 + epochs : 1 + metrics : + - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml new file mode 100644 index 0000000000..79d067d8d2 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml @@ -0,0 +1,23 @@ +aggregated_model_validation: + function : validate + kwargs : + batch_size : 32 + apply : global + metrics : + - accuracy + +locally_tuned_model_validation: + function : validate + kwargs : + batch_size : 32 + apply : local + metrics : + - accuracy + +train: + function : train + kwargs : + batch_size : 32 + epochs : 1 + metrics : + - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml new file mode 100644 index 0000000000..586a885b40 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml @@ -0,0 +1,23 @@ +aggregated_model_validation: + function : validate + kwargs : + batch_size : 32 + apply : global + metrics : + - acc + +locally_tuned_model_validation: + function : validate + kwargs : + batch_size : 32 + apply : local + metrics : + - acc + +train: + function : train_batches + kwargs : + batch_size : 32 + num_batches : 1 + metrics : + - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml new file mode 100644 index 0000000000..a240c2003b --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml @@ -0,0 +1,19 @@ +aggregated_model_validation: + function : validate + kwargs : + apply : global + metrics : + - acc + +locally_tuned_model_validation: + function : validate + kwargs : + apply: local + metrics : + - acc + +train: + function : train_batches + kwargs : + metrics : + - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml index acecc2ea46..b873de734b 100644 --- a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml +++ b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml @@ -1,42 +1,59 @@ -# Copyright (C) 2020-2024 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -aggregator : - defaults : plan/defaults/aggregator.yaml - template : openfl.component.Aggregator - settings : - init_state_path : save/init.pbuf - last_state_path : save/latest.pbuf - best_state_path : save/best.pbuf - rounds_to_train : 10 - db_store_rounds : 2 - -collaborator : - defaults : plan/defaults/collaborator.yaml - template : openfl.component.Collaborator - settings : - delta_updates : true - opt_treatment : RESET - -data_loader : - defaults : plan/defaults/data_loader.yaml - template : src.dataloader.BratsDataloader - settings : - batch_size: 64 +aggregator: + defaults: plan/defaults/aggregator.yaml + settings: + best_state_path: save/tf_3dunet_brats_best.pbuf + init_state_path: save/tf_3dunet_brats_init.pbuf + last_state_path: save/tf_3dunet_brats_latest.pbuf + db_store_rounds: 2 + rounds_to_train: 10 + template: openfl.component.Aggregator +assigner: + defaults: plan/defaults/assigner.yaml + settings: + task_groups: + - name: train_and_validate + percentage: 1.0 + tasks: + - aggregated_model_validation + - train + - locally_tuned_model_validation + template: openfl.component.RandomGroupedAssigner +collaborator: + defaults: plan/defaults/collaborator.yaml + settings: + db_store_rounds: 2 + delta_updates: true + opt_treatment: RESET + template: openfl.component.Collaborator +data_loader: + defaults: plan/defaults/data_loader.yaml + settings: + batch_size: 4 + crop_dim: 64 + num_classes: 1 + number_input_channels: 1 percent_train: 0.8 - collaborator_count : 2 - data_group_name : brats - -task_runner : - defaults : plan/defaults/task_runner.yaml - template : src.taskrunner.UNet3D - -network : - defaults : plan/defaults/network.yaml - -assigner : - defaults : plan/defaults/assigner.yaml - + template: src.tf_brats_dataloader.TensorFlowBratsDataLoader +network: + defaults: plan/defaults/network.yaml + settings: + agg_addr: DESKTOP-AOKV1IJ.localdomain + agg_port: auto + cert_folder: cert + client_reconnect_interval: 5 + disable_client_auth: false + disable_tls: false + hash_salt: auto + template: openfl.federation.Network +task_runner: + defaults: plan/defaults/task_runner.yaml + settings: + batch_norm: true + batch_size: 4 + depth: 4 + initial_filters: 16 + use_upsampling: false + template: src.tf_3dunet_model.TensorFlow3dUNet tasks: aggregated_model_validation: function: validate_task @@ -44,8 +61,8 @@ tasks: apply: global batch_size: 4 metrics: - - dice_coef - - soft_dice_coef + - dice_coef + - soft_dice_coef defaults: plan/defaults/tasks_tensorflow.yaml locally_tuned_model_validation: function: validate_task @@ -53,8 +70,8 @@ tasks: apply: local batch_size: 4 metrics: - - dice_coef - - soft_dice_coef + - dice_coef + - soft_dice_coef settings: {} train: function: train_task @@ -62,5 +79,5 @@ tasks: batch_size: 4 epochs: 1 metrics: - - loss + - loss num_batches: 1 diff --git a/openfl-workspace/tf_3dunet_brats/src/dataloader.py b/openfl-workspace/tf_3dunet_brats/src/dataloader.py index 19ba8a0811..80ac1cd004 100644 --- a/openfl-workspace/tf_3dunet_brats/src/dataloader.py +++ b/openfl-workspace/tf_3dunet_brats/src/dataloader.py @@ -3,105 +3,13 @@ """You may copy this file as the starting point of your own model.""" +import os import nibabel as nib import numpy as np - -import os -from openfl.federated import TensorFlowDataLoader - import tensorflow as tf -class BratsDataLoader(TensorFlowDataLoader): - """TensorFlow Data Loader for the BraTS dataset.""" - - def __init__(self, data_path, batch_size=4, - crop_dim=64, percent_train=0.8, - pre_split_shuffle=True, - number_input_channels=1, - num_classes=1, - **kwargs): - """Initialize. - - Args: - data_path: The file path for the BraTS dataset - batch_size (int): The batch size to use - crop_dim (int): Crop the original image to this size on each dimension - percent_train (float): The percentage of the data to use for training (Default=0.8) - pre_split_shuffle (bool): True= shuffle the dataset before - performing the train/validate split (Default=True) - **kwargs: Additional arguments, passed to super init - - Returns: - Data loader with BraTS data - """ - super().__init__(batch_size, **kwargs) - - self.data_path = os.path.abspath(os.path.expanduser(data_path)) - self.batch_size = batch_size - self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] - self.num_input_channels = number_input_channels - self.num_classes = num_classes - - self.train_test_split = percent_train - - self.brats_data = DatasetGenerator(crop_dim, - data_path=data_path, - number_input_channels=number_input_channels, - batch_size=batch_size, - train_test_split=percent_train, - validate_test_split=0.5, - num_classes=num_classes, - random_seed=816) - - def get_feature_shape(self): - """ - Get the shape of an example feature array. - - Returns: - tuple: shape of an example feature array - """ - return tuple(self.brats_data.get_input_shape()) - - def get_train_loader(self, batch_size=None, num_batches=None): - """ - Get training data loader. - - Returns - ------- - loader object - """ - return self.brats_data.ds_train - - def get_valid_loader(self, batch_size=None): - """ - Get validation data loader. - - Returns: - loader object - """ - return self.brats_data.ds_val - - def get_train_data_size(self): - """ - Get total number of training samples. - - Returns: - int: number of training samples - """ - return self.brats_data.num_train - - def get_valid_data_size(self): - """ - Get total number of validation samples. - - Returns: - int: number of validation samples - """ - return self.brats_data.num_val - - class DatasetGenerator: """Generate a TensorFlow data loader from the BraTS .nii.gz files.""" diff --git a/openfl-workspace/tf_3dunet_brats/src/taskrunner.py b/openfl-workspace/tf_3dunet_brats/src/define_model.py similarity index 62% rename from openfl-workspace/tf_3dunet_brats/src/taskrunner.py rename to openfl-workspace/tf_3dunet_brats/src/define_model.py index 8a8390187d..148d66e9ad 100644 --- a/openfl-workspace/tf_3dunet_brats/src/taskrunner.py +++ b/openfl-workspace/tf_3dunet_brats/src/define_model.py @@ -3,109 +3,8 @@ """You may copy this file as the starting point of your own model.""" -import numpy as np import tensorflow as tf -from openfl.utilities import Metric -from openfl.federated import TensorFlowTaskRunner - - -class UNet3D(TensorFlowTaskRunner): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - - def __init__(self, initial_filters=16, - depth=5, - batch_norm=True, - use_upsampling=False, - **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - super().__init__(**kwargs) - - self.model = self.create_model( - input_shape=self.feature_shape, - n_cl_out=self.data_loader.num_classes, - initial_filters=initial_filters, - use_upsampling=use_upsampling, - depth=depth, - batch_norm=batch_norm, - **kwargs - ) - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info, line_length=120) - - def create_model(self, - input_shape, - n_cl_out=1, - use_upsampling=False, - dropout=0.2, - print_summary=True, - seed=816, - depth=5, - dropout_at=(2, 3), - initial_filters=16, - batch_norm=True, - **kwargs): - """Create the TensorFlow 3D U-Net CNN model. - - Args: - input_shape (list): input shape of the data - n_cl_out (int): Number of output classes in label (Default=1) - **kwargs: Additional parameters to pass to the function - - """ - - model = build_model(input_shape, - n_cl_out=n_cl_out, - use_upsampling=use_upsampling, - dropout=dropout, - print_summary=print_summary, - seed=seed, - depth=depth, - dropout_at=dropout_at, - initial_filters=initial_filters, - batch_norm=batch_norm) - - model.compile( - loss=dice_loss, - optimizer=tf.keras.optimizers.Adam(), - metrics=[dice_coef, soft_dice_coef], - ) - - return model - - def train_(self, batch_generator, metrics: list = None, **kwargs): - """Train single epoch. - - Override this function for custom training. - - Args: - batch_generator: Generator of training batches. - Each batch is a tuple of N train images and N train labels - where N is the batch size of the DataLoader of the current TaskRunner instance. - - epochs: Number of epochs to train. - metrics: Names of metrics to save. - """ - history = self.model.fit(batch_generator, - verbose=1, - **kwargs) - results = [] - for metric in metrics: - value = np.mean([history.history[metric]]) - results.append(Metric(name=metric, value=np.array(value))) - return results - def dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): """ diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py new file mode 100644 index 0000000000..58f2901b80 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py @@ -0,0 +1,212 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +import tensorflow as tf + +from openfl.federated import TensorFlowTaskRunner +from src.define_model import build_model, dice_coef, dice_loss, soft_dice_coef + + +class TensorFlow3dUNet(TensorFlowTaskRunner): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + + def __init__(self, initial_filters=16, + depth=5, + batch_norm=True, + use_upsampling=False, + **kwargs): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + super().__init__(**kwargs) + + self.model = self.create_model( + input_shape=self.feature_shape, + n_cl_out=self.data_loader.num_classes, + initial_filters=initial_filters, + use_upsampling=use_upsampling, + depth=depth, + batch_norm=batch_norm, + **kwargs + ) + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info, line_length=120) + + def create_model(self, + input_shape, + n_cl_out=1, + use_upsampling=False, + dropout=0.2, + print_summary=True, + seed=816, + depth=5, + dropout_at=(2, 3), + initial_filters=16, + batch_norm=True, + **kwargs): + """Create the TensorFlow 3D U-Net CNN model. + + Args: + input_shape (list): input shape of the data + n_cl_out (int): Number of output classes in label (Default=1) + **kwargs: Additional parameters to pass to the function + + """ + # + # Define Model + # + model = build_model(input_shape, + n_cl_out=n_cl_out, + use_upsampling=use_upsampling, + dropout=dropout, + print_summary=print_summary, + seed=seed, + depth=depth, + dropout_at=dropout_at, + initial_filters=initial_filters, + batch_norm=batch_norm) + + self.optimizer = tf.keras.optimizers.Adam() + + model.compile( + loss=dice_loss, + optimizer=self.optimizer, + metrics=[dice_coef, soft_dice_coef], + ) + + self.tvars = model.layers + print(f'layer names: {[var.name for var in self.tvars]}') + + self.opt_vars = self.optimizer.variables() + print(f'optimizer vars: {self.opt_vars}') + + # Two opt_vars for one tvar: gradient and square sum for RMSprop. + self.fl_vars = self.tvars + self.opt_vars + + return model + + +if __name__ == '__main__': + + from tf_brats_dataloader import DatasetGenerator + import os + + import argparse + + parser = argparse.ArgumentParser( + description='Train 3D U-Net model', add_help=True, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('--data_path', + default='~/data/MICCAI_BraTS2020_TrainingData/', + # Or wherever you unzipped the BraTS datset, + help='Root directory for BraTS 2020 dataset') + parser.add_argument('--epochs', + type=int, + default=5, + help='Number of epochs') + parser.add_argument('--crop_dim', + type=int, + default=64, + help='Crop all dimensions to this (height, width, depth)') + parser.add_argument('--batch_size', + type=int, + default=4, + help='Training batch size') + parser.add_argument('--train_test_split', + type=float, + default=0.80, + help='Train/test split (0-1)') + parser.add_argument('--validate_test_split', + type=float, + default=0.50, + help='Validation/test split (0-1)') + parser.add_argument('--number_input_channels', + type=int, + default=1, + help='Number of input channels') + parser.add_argument('--num_classes', + type=int, + default=1, + help='Number of output classes/channels') + parser.add_argument('--random_seed', + default=816, + help='Random seed for determinism') + parser.add_argument('--print_model', + action='store_true', + default=True, + help='Print the summary of the model layers') + parser.add_argument('--filters', + type=int, + default=16, + help='Number of filters in the first convolutional layer') + parser.add_argument('--use_upsampling', + action='store_true', + default=False, + help='Use upsampling instead of transposed convolution') + parser.add_argument('--use_batchnorm', + action='store_true', + default=True, + help='Use batch normalization') + parser.add_argument('--saved_model_name', + default='saved_model_3DUnet', + help='Save model to this path') + + args = parser.parse_args() + + print(args) + + brats_data = DatasetGenerator(args.crop_dim, + data_path=os.path.abspath(os.path.expanduser(args.data_path)), + batch_size=args.batch_size, + train_test_split=args.train_test_split, + validate_test_split=args.validate_test_split, + number_input_channels=args.number_input_channels, + num_classes=args.num_classes, + random_seed=args.random_seed + ) + + model = build_model([args.crop_dim, args.crop_dim, args.crop_dim, args.number_input_channels], + use_upsampling=args.use_upsampling, + n_cl_out=args.num_classes, + dropout=0.2, + print_summary=args.print_model, + seed=args.random_seed, + depth=5, + dropout_at=[2, 3], + initial_filters=args.filters, + batch_norm=args.use_batchnorm + ) + + model.compile(loss=dice_loss, + optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), + metrics=[dice_coef, soft_dice_coef] + ) + + checkpoint = tf.keras.callbacks.ModelCheckpoint(args.saved_model_name, + verbose=1, + save_best_only=True) + + # TensorBoard + import datetime + logs_dir = os.path.join('tensorboard_logs', + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) + tb_logs = tf.keras.callbacks.TensorBoard(log_dir=logs_dir) + + callbacks = [checkpoint, tb_logs] + + history = model.fit(brats_data.ds_train, + validation_data=brats_data.ds_val, + epochs=args.epochs, + callbacks=callbacks) diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py b/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py new file mode 100644 index 0000000000..85e5c576c3 --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py @@ -0,0 +1,99 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + + +import os + +from openfl.federated import TensorFlowDataLoader +from .dataloader import DatasetGenerator + + +class TensorFlowBratsDataLoader(TensorFlowDataLoader): + """TensorFlow Data Loader for the BraTS dataset.""" + + def __init__(self, data_path, batch_size=4, + crop_dim=64, percent_train=0.8, + pre_split_shuffle=True, + number_input_channels=1, + num_classes=1, + **kwargs): + """Initialize. + + Args: + data_path: The file path for the BraTS dataset + batch_size (int): The batch size to use + crop_dim (int): Crop the original image to this size on each dimension + percent_train (float): The percentage of the data to use for training (Default=0.8) + pre_split_shuffle (bool): True= shuffle the dataset before + performing the train/validate split (Default=True) + **kwargs: Additional arguments, passed to super init + + Returns: + Data loader with BraTS data + """ + super().__init__(batch_size, **kwargs) + + self.data_path = os.path.abspath(os.path.expanduser(data_path)) + self.batch_size = batch_size + self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] + self.num_input_channels = number_input_channels + self.num_classes = num_classes + + self.train_test_split = percent_train + + self.brats_data = DatasetGenerator(crop_dim, + data_path=data_path, + number_input_channels=number_input_channels, + batch_size=batch_size, + train_test_split=percent_train, + validate_test_split=0.5, + num_classes=num_classes, + random_seed=816) + + def get_feature_shape(self): + """ + Get the shape of an example feature array. + + Returns: + tuple: shape of an example feature array + """ + return tuple(self.brats_data.get_input_shape()) + + def get_train_loader(self, batch_size=None, num_batches=None): + """ + Get training data loader. + + Returns + ------- + loader object + """ + return self.brats_data.ds_train + + def get_valid_loader(self, batch_size=None): + """ + Get validation data loader. + + Returns: + loader object + """ + return self.brats_data.ds_val + + def get_train_data_size(self): + """ + Get total number of training samples. + + Returns: + int: number of training samples + """ + return self.brats_data.num_train + + def get_valid_data_size(self): + """ + Get total number of validation samples. + + Returns: + int: number of validation samples + """ + return self.brats_data.num_val From 6b6d6280f4a2b6138a94329d395d64c9d018692a Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 12 Jul 2024 15:31:07 -0700 Subject: [PATCH 22/23] update tf_3dunet_brats to newer convention and add general plan Signed-off-by: kta-intel --- .../tf_3dunet_brats/plan/data.yaml | 4 +- .../tf_3dunet_brats/plan/defaults | 1 + .../plan/defaults/aggregator.yaml | 4 - .../plan/defaults/assigner.yaml | 9 -- .../plan/defaults/collaborator.yaml | 5 - .../plan/defaults/compression_pipeline.yaml | 1 - .../plan/defaults/data_loader.yaml | 1 - .../tf_3dunet_brats/plan/defaults/defaults | 2 - .../plan/defaults/network.yaml | 9 -- .../plan/defaults/task_runner.yaml | 1 - .../plan/defaults/tasks_fast_estimator.yaml | 22 ----- .../plan/defaults/tasks_keras.yaml | 23 ----- .../plan/defaults/tasks_tensorflow.yaml | 23 ----- .../plan/defaults/tasks_torch.yaml | 19 ---- .../tf_3dunet_brats/plan/plan.yaml | 81 +++++++-------- .../tf_3dunet_brats/src/dataloader.py | 95 +++++++++++++++++- .../tf_3dunet_brats/src/define_model.py | 65 +++++++----- .../src/{tf_3dunet_model.py => taskrunner.py} | 16 +-- .../src/tf_brats_dataloader.py | 99 ------------------- 19 files changed, 173 insertions(+), 307 deletions(-) create mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/defaults delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml rename openfl-workspace/tf_3dunet_brats/src/{tf_3dunet_model.py => taskrunner.py} (97%) delete mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py diff --git a/openfl-workspace/tf_3dunet_brats/plan/data.yaml b/openfl-workspace/tf_3dunet_brats/plan/data.yaml index d006410dda..ca127225af 100644 --- a/openfl-workspace/tf_3dunet_brats/plan/data.yaml +++ b/openfl-workspace/tf_3dunet_brats/plan/data.yaml @@ -11,6 +11,6 @@ # Symbolically link the ./data directory to whereever you have BraTS stored. # e.g. ln -s ~/data/MICCAI_BraTS2020_TrainingData ./data/one -one,~/MICCAI_BraTS2020_TrainingData/split_0 -two,~/MICCAI_BraTS2020_TrainingData/split_1 +collaborator1,../data/split/split_0 +collaborator2,../data/split/split_1 diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults b/openfl-workspace/tf_3dunet_brats/plan/defaults new file mode 100644 index 0000000000..5042bedbcf --- /dev/null +++ b/openfl-workspace/tf_3dunet_brats/plan/defaults @@ -0,0 +1 @@ +../../workspace/plan/defaults \ No newline at end of file diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml deleted file mode 100644 index d3ef6e5082..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml +++ /dev/null @@ -1,4 +0,0 @@ -template : openfl.component.Aggregator -settings : - db_store_rounds : 1 - diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml deleted file mode 100644 index 0b7e744475..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml +++ /dev/null @@ -1,9 +0,0 @@ -template : openfl.component.RandomGroupedAssigner -settings : - task_groups : - - name : train_and_validate - percentage : 1.0 - tasks : - - aggregated_model_validation - - train - - locally_tuned_model_validation diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml deleted file mode 100644 index a9c2e6eb7b..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml +++ /dev/null @@ -1,5 +0,0 @@ -template : openfl.component.Collaborator -settings : - opt_treatment : 'CONTINUE_LOCAL' - delta_updates : True - db_store_rounds : 1 diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml deleted file mode 100644 index a508f94fd2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.pipelines.NoCompressionPipeline diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml deleted file mode 100644 index 33accd5ab2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.federated.DataLoader diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults b/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml deleted file mode 100644 index 9528631585..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml +++ /dev/null @@ -1,9 +0,0 @@ -template: openfl.federation.Network -settings: - agg_addr : auto - agg_port : auto - hash_salt : auto - disable_tls : False - client_reconnect_interval : 5 - disable_client_auth : False - cert_folder : cert diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml deleted file mode 100644 index b162724693..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.federated.task_runner.CoreTaskRunner diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml deleted file mode 100644 index 1548d4b225..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml +++ /dev/null @@ -1,22 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - accuracy - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - accuracy -train: - function : train - kwargs : - batch_size : 32 - epochs : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml deleted file mode 100644 index 79d067d8d2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - accuracy - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - accuracy - -train: - function : train - kwargs : - batch_size : 32 - epochs : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml deleted file mode 100644 index 586a885b40..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - acc - -train: - function : train_batches - kwargs : - batch_size : 32 - num_batches : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml deleted file mode 100644 index a240c2003b..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml +++ /dev/null @@ -1,19 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - apply: local - metrics : - - acc - -train: - function : train_batches - kwargs : - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml index b873de734b..fe5a1edcd9 100644 --- a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml +++ b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml @@ -1,83 +1,68 @@ -aggregator: - defaults: plan/defaults/aggregator.yaml - settings: - best_state_path: save/tf_3dunet_brats_best.pbuf - init_state_path: save/tf_3dunet_brats_init.pbuf - last_state_path: save/tf_3dunet_brats_latest.pbuf - db_store_rounds: 2 - rounds_to_train: 10 - template: openfl.component.Aggregator -assigner: - defaults: plan/defaults/assigner.yaml - settings: - task_groups: - - name: train_and_validate - percentage: 1.0 - tasks: - - aggregated_model_validation - - train - - locally_tuned_model_validation - template: openfl.component.RandomGroupedAssigner -collaborator: - defaults: plan/defaults/collaborator.yaml - settings: - db_store_rounds: 2 - delta_updates: true - opt_treatment: RESET - template: openfl.component.Collaborator +aggregator : + defaults : plan/defaults/aggregator.yaml + template : openfl.component.Aggregator + settings : + init_state_path : save/init.pbuf + last_state_path : save/latest.pbuf + best_state_path : save/best.pbuf + rounds_to_train : 10 + db_store_rounds : 2 + +collaborator : + defaults : plan/defaults/collaborator.yaml + template : openfl.component.Collaborator + settings : + delta_updates : true + opt_treatment : RESET + data_loader: defaults: plan/defaults/data_loader.yaml + template: src.dataloader.BratsDataloader settings: batch_size: 4 crop_dim: 64 num_classes: 1 number_input_channels: 1 percent_train: 0.8 - template: src.tf_brats_dataloader.TensorFlowBratsDataLoader -network: - defaults: plan/defaults/network.yaml - settings: - agg_addr: DESKTOP-AOKV1IJ.localdomain - agg_port: auto - cert_folder: cert - client_reconnect_interval: 5 - disable_client_auth: false - disable_tls: false - hash_salt: auto - template: openfl.federation.Network + task_runner: defaults: plan/defaults/task_runner.yaml + template: src.taskrunner.UNet3D settings: batch_norm: true batch_size: 4 depth: 4 initial_filters: 16 use_upsampling: false - template: src.tf_3dunet_model.TensorFlow3dUNet + +network : + defaults : plan/defaults/network.yaml + +assigner : + defaults : plan/defaults/assigner.yaml + tasks: + defaults : plan/defaults/task_tensorflow.yaml aggregated_model_validation: function: validate_task kwargs: apply: global batch_size: 4 metrics: - - dice_coef - - soft_dice_coef - defaults: plan/defaults/tasks_tensorflow.yaml + - dice_coef + - soft_dice_coef locally_tuned_model_validation: function: validate_task kwargs: apply: local batch_size: 4 metrics: - - dice_coef - - soft_dice_coef - settings: {} + - dice_coef + - soft_dice_coef train: function: train_task kwargs: batch_size: 4 epochs: 1 metrics: - - loss - num_batches: 1 + - loss diff --git a/openfl-workspace/tf_3dunet_brats/src/dataloader.py b/openfl-workspace/tf_3dunet_brats/src/dataloader.py index 80ac1cd004..afef27ca62 100644 --- a/openfl-workspace/tf_3dunet_brats/src/dataloader.py +++ b/openfl-workspace/tf_3dunet_brats/src/dataloader.py @@ -1,14 +1,103 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """You may copy this file as the starting point of your own model.""" -import os - import nibabel as nib import numpy as np +import os import tensorflow as tf +from openfl.federated import TensorFlowDataLoader + + +class BratsDataloader(TensorFlowDataLoader): + """TensorFlow Data Loader for the BraTS dataset.""" + + def __init__(self, data_path, batch_size=4, + crop_dim=64, percent_train=0.8, + number_input_channels=1, + num_classes=1, + **kwargs): + """Initialize. + + Args: + data_path: The file path for the BraTS dataset + batch_size (int): The batch size to use + crop_dim (int): Crop the original image to this size on each dimension + percent_train (float): The percentage of the data to use for training (Default=0.8) + pre_split_shuffle (bool): True= shuffle the dataset before + performing the train/validate split (Default=True) + **kwargs: Additional arguments, passed to super init + + Returns: + Data loader with BraTS data + """ + super().__init__(batch_size, **kwargs) + + self.data_path = os.path.abspath(os.path.expanduser(data_path)) + self.batch_size = batch_size + self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] + self.num_input_channels = number_input_channels + self.num_classes = num_classes + + self.train_test_split = percent_train + + self.brats_data = DatasetGenerator(crop_dim, + data_path=data_path, + number_input_channels=number_input_channels, + batch_size=batch_size, + train_test_split=percent_train, + validate_test_split=0.5, + num_classes=num_classes, + random_seed=816) + + def get_feature_shape(self): + """ + Get the shape of an example feature array. + + Returns: + tuple: shape of an example feature array + """ + return tuple(self.brats_data.get_input_shape()) + + def get_train_loader(self, batch_size=None, num_batches=None): + """ + Get training data loader. + + Returns + ------- + loader object + """ + return self.brats_data.ds_train + + def get_valid_loader(self, batch_size=None): + """ + Get validation data loader. + + Returns: + loader object + """ + return self.brats_data.ds_val + + def get_train_data_size(self): + """ + Get total number of training samples. + + Returns: + int: number of training samples + """ + return self.brats_data.num_train + + def get_valid_data_size(self): + """ + Get total number of validation samples. + + Returns: + int: number of validation samples + """ + return self.brats_data.num_val + class DatasetGenerator: """Generate a TensorFlow data loader from the BraTS .nii.gz files.""" diff --git a/openfl-workspace/tf_3dunet_brats/src/define_model.py b/openfl-workspace/tf_3dunet_brats/src/define_model.py index 148d66e9ad..df972ee30f 100644 --- a/openfl-workspace/tf_3dunet_brats/src/define_model.py +++ b/openfl-workspace/tf_3dunet_brats/src/define_model.py @@ -8,11 +8,16 @@ def dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): """ - Sorenson Dice. + Calculate the Sorenson-Dice coefficient. - Returns - ------- - dice coefficient (float) + Args: + target (tf.Tensor): The ground truth binary labels. + prediction (tf.Tensor): The predicted binary labels, rounded to 0 or 1. + axis (tuple, optional): The axes along which to compute the coefficient, typically the spatial dimensions. + smooth (float, optional): A small constant added to numerator and denominator for numerical stability. + + Returns: + tf.Tensor: The mean Dice coefficient over the batch. """ prediction = tf.round(prediction) # Round to 0 or 1 @@ -27,13 +32,18 @@ def dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): def soft_dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): """ - Soft Sorenson Dice. + Calculate the soft Sorenson-Dice coefficient. Does not round the predictions to either 0 or 1. - Returns - ------- - soft dice coefficient (float) + Args: + target (tf.Tensor): The ground truth binary labels. + prediction (tf.Tensor): The predicted probabilities. + axis (tuple, optional): The axes along which to compute the coefficient, typically the spatial dimensions. + smooth (float, optional): A small constant added to numerator and denominator for numerical stability. + + Returns: + tf.Tensor: The mean soft Dice coefficient over the batch. """ intersection = tf.reduce_sum(target * prediction, axis=axis) union = tf.reduce_sum(target + prediction, axis=axis) @@ -46,15 +56,20 @@ def soft_dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): def dice_loss(target, prediction, axis=(1, 2, 3), smooth=0.0001): """ - Sorenson (Soft) Dice loss. + Calculate the (Soft) Sorenson-Dice loss. Using -log(Dice) as the loss since it is better behaved. Also, the log allows avoidance of the division which can help prevent underflow when the numbers are very small. - Returns - ------- - dice loss (float) + Args: + target (tf.Tensor): The ground truth binary labels. + prediction (tf.Tensor): The predicted probabilities. + axis (tuple, optional): The axes along which to compute the loss, typically the spatial dimensions. + smooth (float, optional): A small constant added to numerator and denominator for numerical stability. + + Returns: + tf.Tensor: The mean Dice loss over the batch. """ intersection = tf.reduce_sum(prediction * target, axis=axis) p = tf.reduce_sum(prediction, axis=axis) @@ -70,29 +85,29 @@ def build_model(input_shape, n_cl_out=1, use_upsampling=False, dropout=0.2, - print_summary=True, seed=816, depth=5, dropout_at=(2, 3), initial_filters=16, - batch_norm=True, - **kwargs): - """Build the TensorFlow model. + batch_norm=True,): + """ + Build and compile 3D UNet model. Args: - input_tensor: input shape ot the model - use_upsampling (bool): True = use bilinear interpolation; - False = use transposed convolution (Default=False) + input_shape (List[int]): The shape of the data n_cl_out (int): Number of channels in output layer (Default=1) + use_upsampling (bool): True = use bilinear interpolation; + False = use transposed convolution (Default=False) dropout (float): Dropout percentage (Default=0.2) - print_summary (bool): True = print the model summary (Default = True) seed: random seed (Default=816) depth (int): Number of max pooling layers in encoder (Default=5) - dropout_at: Layers to perform dropout after (Default=[2,3]) - initial_filters (int): Number of filters in first convolutional - layer (Default=16) - batch_norm (bool): True = use batch normalization (Default=True) - **kwargs: Additional parameters to pass to the function + dropout_at (List[int]): Layers to perform dropout after (Default=[2,3]) + initial_filters (int): Number of filters in first convolutional layer (Default=16) + batch_norm (bool): Aply batch normalization (Default=True) + + Returns: + keras.src.engine.functional.Functional + A compiled Keras model ready for training. """ if (input_shape[0] % (2**depth)) > 0: raise ValueError(f'Crop dimension must be a multiple of 2^(depth of U-Net) = {2**depth}') diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py b/openfl-workspace/tf_3dunet_brats/src/taskrunner.py similarity index 97% rename from openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py rename to openfl-workspace/tf_3dunet_brats/src/taskrunner.py index 58f2901b80..d15e569598 100644 --- a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py +++ b/openfl-workspace/tf_3dunet_brats/src/taskrunner.py @@ -3,13 +3,15 @@ """You may copy this file as the starting point of your own model.""" -import tensorflow as tf - +import argparse from openfl.federated import TensorFlowTaskRunner +import os from src.define_model import build_model, dice_coef, dice_loss, soft_dice_coef +from src.dataloader import DatasetGenerator +import tensorflow as tf -class TensorFlow3dUNet(TensorFlowTaskRunner): +class UNet3D(TensorFlowTaskRunner): """Initialize. Args: @@ -48,7 +50,6 @@ def create_model(self, n_cl_out=1, use_upsampling=False, dropout=0.2, - print_summary=True, seed=816, depth=5, dropout_at=(2, 3), @@ -70,7 +71,6 @@ def create_model(self, n_cl_out=n_cl_out, use_upsampling=use_upsampling, dropout=dropout, - print_summary=print_summary, seed=seed, depth=depth, dropout_at=dropout_at, @@ -98,12 +98,6 @@ def create_model(self, if __name__ == '__main__': - - from tf_brats_dataloader import DatasetGenerator - import os - - import argparse - parser = argparse.ArgumentParser( description='Train 3D U-Net model', add_help=True, formatter_class=argparse.ArgumentDefaultsHelpFormatter) diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py b/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py deleted file mode 100644 index 85e5c576c3..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - - -import os - -from openfl.federated import TensorFlowDataLoader -from .dataloader import DatasetGenerator - - -class TensorFlowBratsDataLoader(TensorFlowDataLoader): - """TensorFlow Data Loader for the BraTS dataset.""" - - def __init__(self, data_path, batch_size=4, - crop_dim=64, percent_train=0.8, - pre_split_shuffle=True, - number_input_channels=1, - num_classes=1, - **kwargs): - """Initialize. - - Args: - data_path: The file path for the BraTS dataset - batch_size (int): The batch size to use - crop_dim (int): Crop the original image to this size on each dimension - percent_train (float): The percentage of the data to use for training (Default=0.8) - pre_split_shuffle (bool): True= shuffle the dataset before - performing the train/validate split (Default=True) - **kwargs: Additional arguments, passed to super init - - Returns: - Data loader with BraTS data - """ - super().__init__(batch_size, **kwargs) - - self.data_path = os.path.abspath(os.path.expanduser(data_path)) - self.batch_size = batch_size - self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] - self.num_input_channels = number_input_channels - self.num_classes = num_classes - - self.train_test_split = percent_train - - self.brats_data = DatasetGenerator(crop_dim, - data_path=data_path, - number_input_channels=number_input_channels, - batch_size=batch_size, - train_test_split=percent_train, - validate_test_split=0.5, - num_classes=num_classes, - random_seed=816) - - def get_feature_shape(self): - """ - Get the shape of an example feature array. - - Returns: - tuple: shape of an example feature array - """ - return tuple(self.brats_data.get_input_shape()) - - def get_train_loader(self, batch_size=None, num_batches=None): - """ - Get training data loader. - - Returns - ------- - loader object - """ - return self.brats_data.ds_train - - def get_valid_loader(self, batch_size=None): - """ - Get validation data loader. - - Returns: - loader object - """ - return self.brats_data.ds_val - - def get_train_data_size(self): - """ - Get total number of training samples. - - Returns: - int: number of training samples - """ - return self.brats_data.num_train - - def get_valid_data_size(self): - """ - Get total number of validation samples. - - Returns: - int: number of validation samples - """ - return self.brats_data.num_val From dad006f146d82b6c130761ca2bb97e1fa0f9210a Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 12 Jul 2024 15:31:35 -0700 Subject: [PATCH 23/23] update docstrings Signed-off-by: kta-intel --- openfl-workspace/tf_2dunet/plan/data.yaml | 4 +- openfl-workspace/tf_2dunet/src/taskrunner.py | 275 ++++++++---------- .../tf_cnn_histology/src/taskrunner.py | 35 +-- .../tf_cnn_mnist/src/taskrunner.py | 27 +- 4 files changed, 149 insertions(+), 192 deletions(-) diff --git a/openfl-workspace/tf_2dunet/plan/data.yaml b/openfl-workspace/tf_2dunet/plan/data.yaml index a04741e2b7..0b27947ad4 100644 --- a/openfl-workspace/tf_2dunet/plan/data.yaml +++ b/openfl-workspace/tf_2dunet/plan/data.yaml @@ -4,5 +4,5 @@ # all keys under 'collaborators' corresponds to a specific colaborator name the corresponding dictionary has data_name, data_path pairs. # Note that in the mnist case we do not store the data locally, and the data_path is used to pass an integer that helps the data object # construct the shard of the mnist dataset to be use for this collaborator. -collaborator1,~/MICCAI_BraTS_2019_Data_Training/HGG/0 -collaborator2,~/MICCAI_BraTS_2019_Data_Training/HGG/1 +collaborator1,../data/MICCAI_BraTS_2019_Data_Training/HGG/0 +collaborator2,../data/MICCAI_BraTS_2019_Data_Training/HGG/1 diff --git a/openfl-workspace/tf_2dunet/src/taskrunner.py b/openfl-workspace/tf_2dunet/src/taskrunner.py index a34a8c579a..4f8ff86b16 100644 --- a/openfl-workspace/tf_2dunet/src/taskrunner.py +++ b/openfl-workspace/tf_2dunet/src/taskrunner.py @@ -10,70 +10,115 @@ from openfl.federated import TensorFlowTaskRunner class UNet2D(TensorFlowTaskRunner): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - def __init__(self, initial_filters=16, depth=5, batch_norm=True, use_upsampling=False, **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ super().__init__(**kwargs) - self.model = self.create_model( + self.model = self.build_model( input_shape=self.feature_shape, n_cl_out=self.data_loader.num_classes, initial_filters=initial_filters, use_upsampling=use_upsampling, depth=depth, batch_norm=batch_norm, - **kwargs ) self.initialize_tensorkeys_for_functions() self.model.summary(print_fn=self.logger.info, line_length=120) - def create_model(self, - input_shape, - n_cl_out=1, - use_upsampling=False, - dropout=0.2, - print_summary=True, - seed=816, - depth=5, - dropout_at=(2, 3), - initial_filters=16, - batch_norm=True, - **kwargs): - """Create the TensorFlow 3D U-Net CNN model. + def build_model(self, + input_shape, + n_cl_out=1, + use_upsampling=False, + dropout=0.2, + seed=816, + depth=5, + dropout_at=(2, 3), + initial_filters=16, + batch_norm=True): + """ + Build and compile 2D UNet model. Args: - input_shape (list): input shape of the data - n_cl_out (int): Number of output classes in label (Default=1) - **kwargs: Additional parameters to pass to the function - + input_shape (List[int]): The shape of the data + n_cl_out (int): Number of channels in output layer (Default=1) + use_upsampling (bool): True = use bilinear interpolation; + False = use transposed convolution (Default=False) + dropout (float): Dropout percentage (Default=0.2) + seed: random seed (Default=816) + depth (int): Number of max pooling layers in encoder (Default=5) + dropout_at (List[int]): Layers to perform dropout after (Default=[2,3]) + initial_filters (int): Number of filters in first convolutional layer (Default=16) + batch_norm (bool): Aply batch normalization (Default=True) + + Returns: + keras.src.engine.functional.Functional + A compiled Keras model ready for training. """ - - model = build_model(input_shape, - n_cl_out=n_cl_out, - use_upsampling=use_upsampling, - dropout=dropout, - print_summary=print_summary, - seed=seed, - depth=depth, - dropout_at=dropout_at, - initial_filters=initial_filters, - batch_norm=batch_norm) + + if (input_shape[0] % (2**depth)) > 0: + raise ValueError(f'Crop dimension must be a multiple of 2^(depth of U-Net) = {2**depth}') + + inputs = tf.keras.layers.Input(input_shape, name='brats_mr_image') + + activation = tf.keras.activations.relu + + params = {'kernel_size': (3, 3), 'activation': activation, + 'padding': 'same', + 'kernel_initializer': tf.keras.initializers.he_uniform(seed=seed)} + + convb_layers = {} + + net = inputs + filters = initial_filters + for i in range(depth): + name = f'conv{i + 1}a' + net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) + if i in dropout_at: + net = tf.keras.layers.Dropout(dropout)(net) + name = f'conv{i + 1}b' + net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) + if batch_norm: + net = tf.keras.layers.BatchNormalization()(net) + convb_layers[name] = net + # only pool if not last level + if i != depth - 1: + name = f'pool{i + 1}' + net = tf.keras.layers.MaxPooling2D(name=name, pool_size=(2, 2))(net) + filters *= 2 + + # do the up levels + filters //= 2 + for i in range(depth - 1): + if use_upsampling: + up = tf.keras.layers.UpSampling2D( + name=f'up{depth + i + 1}', size=(2, 2))(net) + else: + up = tf.keras.layers.Conv2DTranspose(name=f'transConv{depth + i + 1}', + filters=filters, + kernel_size=(2, 2), + strides=(2, 2), + padding='same')(net) + net = tf.keras.layers.concatenate( + [up, convb_layers[f'conv{depth - i - 1}b']], + axis=-1 + ) + net = tf.keras.layers.Conv2D( + name=f'conv{depth + i + 1}a', + filters=filters, **params)(net) + net = tf.keras.layers.Conv2D( + name=f'conv{depth + i + 1}b', + filters=filters, **params)(net) + filters //= 2 + + net = tf.keras.layers.Conv2D(name='prediction', filters=n_cl_out, + kernel_size=(1, 1), + activation='sigmoid')(net) + + model = tf.keras.models.Model(inputs=[inputs], outputs=[net]) model.compile( loss=dice_loss, @@ -84,18 +129,22 @@ def create_model(self, return model def train_(self, batch_generator, metrics: list = None, **kwargs): - """Train single epoch. + """ + Train single epoch. Override this function for custom training. Args: - batch_generator: Generator of training batches. + batch_generator (generator): Generator of training batches. Each batch is a tuple of N train images and N train labels where N is the batch size of the DataLoader of the current TaskRunner instance. + metrics (List[str]): A list of metric names to compute and save + **kwargs (dict): Additional keyword arguments - epochs: Number of epochs to train. - metrics: Names of metrics to save. + Returns: + list: Metric objects containing the computed metrics """ + import pdb; pdb.set_trace() history = self.model.fit(batch_generator, verbose=1, **kwargs) @@ -108,11 +157,16 @@ def train_(self, batch_generator, metrics: list = None, **kwargs): def dice_coef(target, prediction, axis=(1, 2), smooth=0.0001): """ - Sorenson Dice. + Calculate the Sorenson-Dice coefficient. + + Args: + target (tf.Tensor): The ground truth binary labels. + prediction (tf.Tensor): The predicted binary labels, rounded to 0 or 1. + axis (tuple, optional): The axes along which to compute the coefficient, typically the spatial dimensions. + smooth (float, optional): A small constant added to numerator and denominator for numerical stability. - Returns - ------- - dice coefficient (float) + Returns: + tf.Tensor: The mean Dice coefficient over the batch. """ prediction = tf.round(prediction) # Round to 0 or 1 @@ -127,13 +181,18 @@ def dice_coef(target, prediction, axis=(1, 2), smooth=0.0001): def soft_dice_coef(target, prediction, axis=(1, 2), smooth=0.0001): """ - Soft Sorenson Dice. + Calculate the soft Sorenson-Dice coefficient. Does not round the predictions to either 0 or 1. - Returns - ------- - soft dice coefficient (float) + Args: + target (tf.Tensor): The ground truth binary labels. + prediction (tf.Tensor): The predicted probabilities. + axis (tuple, optional): The axes along which to compute the coefficient, typically the spatial dimensions. + smooth (float, optional): A small constant added to numerator and denominator for numerical stability. + + Returns: + tf.Tensor: The mean soft Dice coefficient over the batch. """ intersection = tf.reduce_sum(target * prediction, axis=axis) union = tf.reduce_sum(target + prediction, axis=axis) @@ -146,15 +205,20 @@ def soft_dice_coef(target, prediction, axis=(1, 2), smooth=0.0001): def dice_loss(target, prediction, axis=(1, 2), smooth=0.0001): """ - Sorenson (Soft) Dice loss. + Calculate the (Soft) Sorenson-Dice loss. Using -log(Dice) as the loss since it is better behaved. Also, the log allows avoidance of the division which can help prevent underflow when the numbers are very small. - Returns - ------- - dice loss (float) + Args: + target (tf.Tensor): The ground truth binary labels. + prediction (tf.Tensor): The predicted probabilities. + axis (tuple, optional): The axes along which to compute the loss, typically the spatial dimensions. + smooth (float, optional): A small constant added to numerator and denominator for numerical stability. + + Returns: + tf.Tensor: The mean Dice loss over the batch. """ intersection = tf.reduce_sum(prediction * target, axis=axis) p = tf.reduce_sum(prediction, axis=axis) @@ -163,95 +227,4 @@ def dice_loss(target, prediction, axis=(1, 2), smooth=0.0001): denominator = tf.reduce_mean(t + p + smooth) dice_loss = -tf.math.log(2. * numerator) + tf.math.log(denominator) - return dice_loss - - -def build_model(input_shape, - n_cl_out=1, - use_upsampling=False, - dropout=0.2, - seed=816, - depth=5, - dropout_at=(2, 3), - initial_filters=16, - batch_norm=True, - **kwargs): - """Build the TensorFlow model. - - Args: - input_tensor: input shape ot the model - use_upsampling (bool): True = use bilinear interpolation; - False = use transposed convolution (Default=False) - n_cl_out (int): Number of channels in output layer (Default=1) - dropout (float): Dropout percentage (Default=0.2) - print_summary (bool): True = print the model summary (Default = True) - seed: random seed (Default=816) - depth (int): Number of max pooling layers in encoder (Default=5) - dropout_at: Layers to perform dropout after (Default=[2,3]) - initial_filters (int): Number of filters in first convolutional - layer (Default=16) - batch_norm (bool): True = use batch normalization (Default=True) - **kwargs: Additional parameters to pass to the function - """ - if (input_shape[0] % (2**depth)) > 0: - raise ValueError(f'Crop dimension must be a multiple of 2^(depth of U-Net) = {2**depth}') - - inputs = tf.keras.layers.Input(input_shape, name='brats_mr_image') - - activation = tf.keras.activations.relu - - params = {'kernel_size': (3, 3), 'activation': activation, - 'padding': 'same', - 'kernel_initializer': tf.keras.initializers.he_uniform(seed=seed)} - - convb_layers = {} - - net = inputs - filters = initial_filters - for i in range(depth): - name = f'conv{i + 1}a' - net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) - if i in dropout_at: - net = tf.keras.layers.Dropout(dropout)(net) - name = f'conv{i + 1}b' - net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) - if batch_norm: - net = tf.keras.layers.BatchNormalization()(net) - convb_layers[name] = net - # only pool if not last level - if i != depth - 1: - name = f'pool{i + 1}' - net = tf.keras.layers.MaxPooling2D(name=name, pool_size=(2, 2))(net) - filters *= 2 - - # do the up levels - filters //= 2 - for i in range(depth - 1): - if use_upsampling: - up = tf.keras.layers.UpSampling2D( - name=f'up{depth + i + 1}', size=(2, 2))(net) - else: - up = tf.keras.layers.Conv2DTranspose(name=f'transConv{depth + i + 1}', - filters=filters, - kernel_size=(2, 2), - strides=(2, 2), - padding='same')(net) - net = tf.keras.layers.concatenate( - [up, convb_layers[f'conv{depth - i - 1}b']], - axis=-1 - ) - net = tf.keras.layers.Conv2D( - name=f'conv{depth + i + 1}a', - filters=filters, **params)(net) - net = tf.keras.layers.Conv2D( - name=f'conv{depth + i + 1}b', - filters=filters, **params)(net) - filters //= 2 - - net = tf.keras.layers.Conv2D(name='prediction', filters=n_cl_out, - kernel_size=(1, 1), - activation='sigmoid')(net) - - model = tf.keras.models.Model(inputs=[inputs], outputs=[net]) - - return model \ No newline at end of file + return dice_loss \ No newline at end of file diff --git a/openfl-workspace/tf_cnn_histology/src/taskrunner.py b/openfl-workspace/tf_cnn_histology/src/taskrunner.py index d048a34cb2..0f33c45317 100644 --- a/openfl-workspace/tf_cnn_histology/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_histology/src/taskrunner.py @@ -19,13 +19,6 @@ class CNN(TensorFlowTaskRunner): """ def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ super().__init__(**kwargs) self.model = self.build_model( @@ -45,16 +38,16 @@ def build_model(self, num_classes, **kwargs): """ - Define the model architecture. + Build and compile a convolutional neural network model. Args: - input_shape (numpy.ndarray): The shape of the data + input_shape (List[int]): The shape of the data num_classes (int): The number of classes of the dataset **kwargs: Additional parameters to pass to the function Returns: keras.src.engine.functional.Functional - + A compiled Keras model ready for training. """ # Define Model using Functional API @@ -106,29 +99,21 @@ def build_model(self, return model def train_(self, batch_generator, metrics: list = None, **kwargs): - """Train single epoch. + """ + Train single epoch. Override this function for custom training. Args: - batch_generator: Generator of training batches. + batch_generator (generator): Generator of training batches. Each batch is a tuple of N train images and N train labels where N is the batch size of the DataLoader of the current TaskRunner instance. + metrics (List[str]): A list of metric names to compute and save + **kwargs (dict): Additional keyword arguments - epochs: Number of epochs to train. - metrics: Names of metrics to save. + Returns: + list: Metric objects containing the computed metrics """ - if metrics is None: - metrics = [] - - model_metrics_names = self.model.metrics_names - - for param in metrics: - if param not in model_metrics_names: - raise ValueError( - f'TensorFlowTaskRunner does not support specifying new metrics. ' - f'Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}' - ) history = self.model.fit(batch_generator, verbose=1, diff --git a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py index 9211b3ce73..bb3cced194 100644 --- a/openfl-workspace/tf_cnn_mnist/src/taskrunner.py +++ b/openfl-workspace/tf_cnn_mnist/src/taskrunner.py @@ -14,12 +14,6 @@ class CNN(TensorFlowTaskRunner): """A basic convolutional neural network model.""" def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - """ super().__init__(**kwargs) self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) @@ -36,15 +30,16 @@ def build_model(self, num_classes, **kwargs): """ - Define the model architecture. + Build and compile a convolutional neural network model. Args: - input_shape (numpy.ndarray): The shape of the data + input_shape (List[int]): The shape of the data num_classes (int): The number of classes of the dataset + **kwargs (dict): Additional keyword arguments [optional] Returns: - tensorflow.python.keras.engine.sequential.Sequential - + tf.keras.models.Sequential + A compiled Keras Sequential model ready for training. """ model = tf.keras.models.Sequential([ @@ -71,17 +66,20 @@ def build_model(self, return model def train_(self, batch_generator, metrics: list = None, **kwargs): - """Train single epoch. + """ + Train single epoch. Override this function for custom training. Args: - batch_generator: Generator of training batches. + batch_generator (generator): Generator of training batches. Each batch is a tuple of N train images and N train labels where N is the batch size of the DataLoader of the current TaskRunner instance. + metrics (List[str]): A list of metric names to compute and save + **kwargs (dict): Additional keyword arguments - epochs: Number of epochs to train. - metrics: Names of metrics to save. + Returns: + list: Metric objects containing the computed metrics """ history = self.model.fit(batch_generator, @@ -91,4 +89,5 @@ def train_(self, batch_generator, metrics: list = None, **kwargs): for metric in metrics: value = np.mean([history.history[metric]]) results.append(Metric(name=metric, value=np.array(value))) + return results