Merge branch 'master' into hf_cli4

pranayasinghcsmpl · Aug 19, 2024 · f8c3e6a · f8c3e6a
2 parents 32a206e + e9d92ae
commit f8c3e6a
Show file tree

Hide file tree

Showing 16 changed files with 44 additions and 31 deletions.
diff --git a/.devcontainer/onCreateCommand.sh b/.devcontainer/onCreateCommand.sh
@@ -6,4 +6,4 @@ pip install wheel
 pip install openvino-dev==2023.0.1 # [OPTIONAL] to generate optimized models for inference
 pip install mlcube_docker          # [OPTIONAL] to deploy GaNDLF models as MLCube-compliant Docker containers
 pip install medmnist==2.1.0
-pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cpu
diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh
@@ -6,7 +6,7 @@
 # if runnning on a GPU machine, install the GPU version of pytorch
 if command -v nvidia-smi &> /dev/null
 then
-	pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118
+	pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu121
 fi
 
 pip install -e .

diff --git a/.github/workflows/mlcube-test.yml b/.github/workflows/mlcube-test.yml
@@ -70,7 +70,7 @@ jobs:
           python -m pip install --upgrade pip==24.0
           python -m pip install wheel
           python -m pip install openvino-dev==2023.0.1 mlcube_docker
-          pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+          pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cpu
           pip install -e .
       - name: Run mlcube deploy tests
         working-directory: ./testing

diff --git a/.github/workflows/openfl-test.yml b/.github/workflows/openfl-test.yml
@@ -70,7 +70,7 @@ jobs:
           sudo apt-get install libvips libvips-tools -y
           python -m pip install --upgrade pip==24.0
           python -m pip install wheel
-          pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+          pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cpu
           pip install -e .
       - name: Run generic unit tests to download data and construct CSVs
         if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change

diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -71,7 +71,7 @@ jobs:
           python -m pip install --upgrade pip==24.0
           python -m pip install wheel
           python -m pip install openvino-dev==2023.0.1 mlcube_docker
-          pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+          pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cpu
           pip install -e .
       - name: Run generic unit tests
         if: steps.changed-files-specific.outputs.only_modified == 'false' # Run on any non-docs change

diff --git a/Dockerfile-CPU b/Dockerfile-CPU
@@ -9,7 +9,7 @@ RUN add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update &&  apt-get install -y  python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1
 RUN python3.9 -m pip install --upgrade pip==24.0
 # EXPLICITLY install cpu versions of torch/torchvision (not all versions have +cpu modes on PyPI...)
-RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+RUN python3.9 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cpu
 RUN python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker
 
 # Do some dependency installation separately here to make layer caching more efficient

diff --git a/Dockerfile-CUDA11.8 b/Dockerfile-CUDA11.8
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y software-properties-common
 RUN add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update &&  apt-get install -y  python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1
 RUN python3.9 -m pip install --upgrade pip==24.0
-RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118
+RUN python3.9 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu118
 RUN python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker
 
 # Do some dependency installation separately here to make layer caching more efficient

diff --git a/Dockerfile-CUDA12.1 b/Dockerfile-CUDA12.1
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y software-properties-common
 RUN add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update &&  apt-get install -y  python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1
 RUN python3.9 -m pip install --upgrade pip==24.0
-RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121
+RUN python3.9 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu121
 RUN python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker
 
 # Do some dependency installation separately here to make layer caching more efficient

diff --git a/Dockerfile-ROCm b/Dockerfile-ROCm
@@ -1,4 +1,4 @@
-FROM rocm/pytorch:rocm5.7_ubuntu20.04_py3.9_pytorch_2.0.1
+FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch
 LABEL github="https://github.com/mlcommons/GaNDLF"
 LABEL docs="https://mlcommons.github.io/GaNDLF/"
 LABEL version=1.0
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y software-properties-common
 RUN add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update &&  apt-get install -y  python3.9 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.9-dev libffi-dev libgl1
 RUN python3.9 -m pip install --upgrade pip==24.0
-RUN python3.9 -m pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/rocm5.7
+RUN python3.9 -m pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/rocm6.0
 RUN python3.9 -m pip install --upgrade pip && python3.9 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker
 RUN apt-get update && apt-get install -y libgl1
 

diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py
@@ -337,11 +337,16 @@ def validate_network(
                     if ext in [".jpg", ".jpeg", ".png"]:
                         pred_mask = pred_mask.astype(np.uint8)
 
-                    ## special case for 2D
-                    if image.shape[-1] > 1:
-                        result_image = sitk.GetImageFromArray(pred_mask)
-                    else:
-                        result_image = sitk.GetImageFromArray(pred_mask.squeeze(0))
+                    pred_mask = (
+                        pred_mask.squeeze(0)
+                        if pred_mask.shape[0] == 1
+                        else (
+                            pred_mask.squeeze(-1)
+                            if pred_mask.shape[-1] == 1
+                            else pred_mask
+                        )
+                    )
+                    result_image = sitk.GetImageFromArray(pred_mask)
                     result_image.CopyInformation(img_for_metadata)
 
                     # this handles cases that need resampling/resizing

diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -3,7 +3,7 @@ This document will help you get started with GaNDLF using a few representative e
 
 ## Installation
 
-Please follow the [installation instructions](./setup.md) to install GaNDLF. When the installation is complete, you should end up with the shell that looks like the following, which indicates that the GaNDLF virtual environment has been activated:
+Follow the [installation instructions](./setup.md) to install GaNDLF. When the installation is complete, you should end up with the following shell, which indicates that the GaNDLF virtual environment has been activated:
 
 ```bash
 (venv_gandlf) $> ### subsequent commands go here
@@ -23,7 +23,7 @@ A codespace will open in a web-based version of [Visual Studio Code](https://cod
 
 ## Sample Data
 
-Sample data will be used for our extensive automated unit tests in all examples. You can download the sample data from [this link](https://upenn.box.com/shared/static/y8162xkq1zz5555ye3pwadry2m2e39bs.zip). Example of how to do this from the terminal is shown below:
+Sample data will be used for our extensive automated unit tests in all examples. You can download the sample data from [this link](https://upenn.box.com/shared/static/y8162xkq1zz5555ye3pwadry2m2e39bs.zip). An example is shown below:
 
 ```bash
 # continue from previous shell

diff --git a/docs/index.md b/docs/index.md
@@ -1,9 +1,14 @@
 # GaNDLF
 
-The **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework (GaNDLF) for segmentation and classification.
+The **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework (GaNDLF) for reproducible segmentation and classification.
 
 ## Why use GaNDLF?
+GaNDLF was developed to lower the barrier to AI, enabling reproducibility, translation, and deployment.
+As an out-of-the-box solution, GaNDLF alleviates the need to build from scratch. Users may kickstart their project
+by modifying only **a configuration (config) file** that provides guidelines for the envisioned pipeline
+and **CSV inputs** that describe the training data.
 
+## Range of GaNDLF functionalities:
 - Supports multiple
     - Deep Learning model architectures
     - Channels/modalities 

diff --git a/docs/setup.md b/docs/setup.md
@@ -30,13 +30,13 @@ GaNDLF's primary computational foundation is built on PyTorch, and as such it su
 (venv_gandlf) $> ### subsequent commands go here
 ### PyTorch installation - https://pytorch.org/get-started/previous-versions/#v210
 ## CUDA 12.1
-# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121
+# (venv_gandlf) $> pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu121
 ## CUDA 11.8
-# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118
-## ROCm 5.7
-# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/rocm5.7
+# (venv_gandlf) $> pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu118
+## ROCm 6.0
+# (venv_gandlf) $> pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/rocm6.0
 ## CPU-only
-# (venv_gandlf) $> pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+# (venv_gandlf) $> pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cpu
 ```
 
 ### Optional Dependencies 

diff --git a/docs/usage.md b/docs/usage.md
@@ -24,7 +24,7 @@ Please follow the [installation instructions](./setup.md#installation) to instal
 
 ### Anonymize Data
 
-A major reason why one would want to anonymize data is to ensure that trained models do not inadvertently do not encode protect health information [[1](https://doi.org/10.1145/3436755),[2](https://doi.org/10.1038/s42256-020-0186-1)]. GaNDLF can anonymize single images or a collection of images using the `gandlf anonymizer` command. It can be used as follows:
+A major reason why one would want to anonymize data is to ensure that trained models do not inadvertently encode protected health information [[1](https://doi.org/10.1145/3436755),[2](https://doi.org/10.1038/s42256-020-0186-1)]. GaNDLF can anonymize one or multiple images using the `gandlf anonymizer` command as follows:
 
 ```bash
 # continue from previous shell
@@ -81,7 +81,7 @@ Once these files are present, the patch miner can be run using the following com
 
 ### Running preprocessing before training/inference (optional)
 
-Running preprocessing before training/inference is optional, but recommended. It will significantly reduce the computational footprint during training/inference at the expense of larger storage requirements. To run preprocessing before training/inference you can use the following command, which will save the processed data in `./experiment_0/output_dir/` with a new data CSV and the corresponding model configuration:
+Running preprocessing before training/inference is optional, but recommended. It will significantly reduce the computational footprint during training/inference at the expense of larger storage requirements. Use the following command, which will save the processed data in `./experiment_0/output_dir/` with a new data CSV and the corresponding model configuration:
 
 ```bash
 # continue from previous shell
@@ -108,7 +108,7 @@ N,/full/path/N/0.nii.gz,/full/path/N/1.nii.gz,...,/full/path/N/X.nii.gz,/full/pa
 **Notes:**
 
 - `Channel` can be substituted with `Modality` or `Image`
-- `Label` can be substituted with `Mask` or `Segmentation`and is used to specify the annotation file for segmentation models
+- `Label` can be substituted with `Mask` or `Segmentation` and is used to specify the annotation file for segmentation models
 - For classification/regression, add a column called `ValueToPredict`. Currently, we are supporting only a single value prediction per model.
 - Only a single `Label` or `ValueToPredict` header should be passed 
     - Multiple segmentation classes should be in a single file with unique label numbers.
@@ -152,14 +152,14 @@ The following command shows how the script works:
 (venv_gandlf) $> gandlf construct-csv \
   # -h, --help         Show help message and exit
   -i $DATA_DIRECTORY # this is the main data directory 
-  -c _t1.nii.gz,_t1ce.nii.gz,_t2.nii.gz,_flair.nii.gz \ # an example image identifier for 4 structural brain MR sequences for BraTS, and can be changed based on your data
+  -c _t1.nii.gz,_t1ce.nii.gz,_t2.nii.gz,_flair.nii.gz \ # an example image identifier for 4 structural brain MR sequences for BraTS, and can be changed based on your data. In the simplest case of a single modality, a ".nii.gz" will suffice
   -l _seg.nii.gz \ # an example label identifier - not needed for regression/classification, and can be changed based on your data
   -o ./experiment_0/train_data.csv # output CSV to be used for training
 ```
 
 **Notes**:
 
-- For classification/regression, add a column called `ValueToPredict`. Currently, we are supporting only a single value prediction per model.
+- For classification/regression, add a column called `ValueToPredict`. Currently, we support only a single value prediction per model.
 - `SubjectID` or `PatientName` is used to ensure that the randomized split is done per-subject rather than per-image.
 - For data arrangement different to what is described above, a customized script will need to be written to generate the CSV, or you can enter the data manually into the CSV. 
 
@@ -179,13 +179,15 @@ To split the data CSV into training, validation, and testing CSVs, the `gandlf s
 
 ## Customize the Training
 
-GaNDLF requires a YAML-based configuration that controls various aspects of the training/inference process. There are multiple samples for users to start as their baseline for further customization. A list of the available samples is presented as follows:
+Adapting GaNDLF to your needs boils down to modifying a YAML-based configuration file which controls the parameters of training and inference. Below is a list of available samples for users to start as their baseline for further customization:
 
-- [Sample showing all the available options](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_all_options.yaml)
 - [Segmentation example](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_segmentation_brats.yaml)
 - [Regression example](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_regression.yaml)
 - [Classification example](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_classification.yaml)
 
+To find **all the parameters** a GaNDLF config may modify, consult the following file: 
+- [All available options](https://github.com/mlcommons/GaNDLF/blob/master/samples/config_all_options.yaml)
+
 **Notes**: 
 
 - More details on the configuration options are available in the [customization page](customize.md).

diff --git a/samples/config_classification.yaml b/samples/config_classification.yaml
@@ -14,6 +14,7 @@ model:
     final_layer: None, # can be either sigmoid, softmax or none (none == regression)
     amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
     n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
+    class_list: [0,1,2], # this is to classify 3 classes, denoted by 0,1,2 in the main csv file - change as needed
   }
 # metrics to evaluate the validation performance
 metrics:

diff --git a/setup.py b/setup.py
@@ -37,7 +37,7 @@
 # specifying version for `black` separately because it is also used to [check for lint](https://github.com/mlcommons/GaNDLF/blob/master/.github/workflows/black.yml)
 black_version = "23.11.0"
 requirements = [
-    "torch==2.2.1",
+    "torch==2.3.1",
     f"black=={black_version}",
     "numpy==1.25.0",
     "scipy",