Merge branch 'master' into hf_cli4

pranayasinghcsmpl · Aug 20, 2024 · 4651320 · 4651320
2 parents f8c3e6a + 41a0705
commit 4651320
Show file tree

Hide file tree

Showing 18 changed files with 141 additions and 13 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -23,3 +23,4 @@ Note that if a box is left unchecked, PR merges will take longer than usual.
 - [ ] [Usage documentation](https://github.com/mlcommons/GaNDLF/blob/master/docs) has been updated, if appropriate.
 - [ ] Tests added or modified to [cover the changes](https://app.codecov.io/gh/mlcommons/GaNDLF); if coverage is reduced, please give explanation.
 - [ ] If customized dependency installation is required (i.e., a separate `pip install` step is needed for PR to be functional), please ensure it is reflected in all the files that control the CI, namely: [python-test.yml](https://github.com/mlcommons/GaNDLF/blob/master/.github/workflows/python-test.yml), and all docker files [[1](https://github.com/mlcommons/GaNDLF/blob/master/Dockerfile-CPU),[2](https://github.com/mlcommons/GaNDLF/blob/devcontainer_build_fix/Dockerfile-CUDA11.6),[3](https://github.com/mlcommons/GaNDLF/blob/master/Dockerfile-ROCm)].
+- [ ] The `logging` library is being used and no `print` statements are left.
diff --git a/GANDLF/entrypoints/anonymizer.py b/GANDLF/entrypoints/anonymizer.py
@@ -62,9 +62,16 @@ def _anonymize_images(
     type=click.Path(),
     help="Output directory or file which will contain the image(s) after anonymization.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(input_dir, config, modality, output_file):
+def new_way(input_dir, config, modality, output_file, log_file):
     """Anonymize images/scans in the data directory."""
+    logger_setup(log_file)
     _anonymize_images(input_dir, output_file, config, modality)
 
 

diff --git a/GANDLF/entrypoints/cli_tool.py b/GANDLF/entrypoints/cli_tool.py
@@ -12,7 +12,7 @@
 def gandlf(ctx):
     """GANDLF command-line tool."""
     ctx.ensure_object(dict)
-    logger_setup()
+    # logger_setup()
 
 
 # registers subcommands: `gandlf anonymizer`, `gandlf run`, etc.

diff --git a/GANDLF/entrypoints/collect_stats.py b/GANDLF/entrypoints/collect_stats.py
@@ -191,9 +191,16 @@ def _collect_stats(model_dir: str, output_dir: str):
     required=True,
     help="Output directory to save stats and plot",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(model_dir: str, output_dir: str):
+def new_way(model_dir: str, output_dir: str, log_file: str):
     """Collect statistics from different testing/validation combinations from output directory."""
+    logger_setup(log_file)
     _collect_stats(model_dir=model_dir, output_dir=output_dir)
 
 

diff --git a/GANDLF/entrypoints/config_generator.py b/GANDLF/entrypoints/config_generator.py
@@ -34,9 +34,17 @@ def _generate_config(config: str, strategy: str, output: str):
     type=click.Path(file_okay=False, dir_okay=True),
     help="Path to output directory.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(config, strategy, output):
+def new_way(config, strategy, output, log_file):
     """Generate multiple GaNDLF configurations based on a single baseline GaNDLF for experimentation."""
+
+    logger_setup(log_file)
     _generate_config(config, strategy, output)
 
 

diff --git a/GANDLF/entrypoints/construct_csv.py b/GANDLF/entrypoints/construct_csv.py
@@ -90,15 +90,24 @@ def _construct_csv(
     help="If True, paths in the output data CSV will always be relative to the location"
     " of the output data CSV itself.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
 def new_way(
     input_dir: str,
     channels_id: str,
     label_id: Optional[str],
     output_file: str,
     relativize_paths: bool,
+    log_file: str,
 ):
     """Generate training/inference CSV from data directory."""
+
+    logger_setup(log_file)
     _construct_csv(
         input_dir=input_dir,
         channels_id=channels_id,

diff --git a/GANDLF/entrypoints/debug_info.py b/GANDLF/entrypoints/debug_info.py
@@ -39,9 +39,17 @@ def _debug_info(verbose: bool):
     is_flag=True,
     help="If passed, prints all packages installed as well",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(verbose: bool):
+def new_way(verbose: bool, log_file):
     """Displays detailed info about system environment: library versions, settings, etc."""
+
+    logger_setup(log_file)
     _debug_info(verbose=verbose)
 
 

diff --git a/GANDLF/entrypoints/deploy.py b/GANDLF/entrypoints/deploy.py
@@ -126,6 +126,12 @@ def _deploy(
     help="An optional custom python entrypoint script to use instead of the default specified in mlcube.yaml."
     " (Only for inference and metrics)",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
 def new_way(
     model: Optional[str],
@@ -136,8 +142,10 @@ def new_way(
     output_dir: str,
     requires_gpu: bool,
     entrypoint: Optional[str],
+    log_file: str,
 ):
     """Generate frozen/deployable versions of trained GaNDLF models."""
+    logger_setup(log_file)
     _deploy(
         model=model,
         config=config,

diff --git a/GANDLF/entrypoints/generate_metrics.py b/GANDLF/entrypoints/generate_metrics.py
@@ -53,6 +53,12 @@ def _generate_metrics(
     default=-1,
     help="The value to use for missing predictions as penalty; if `-1`, this does not get added. This is only used in the case where the targets and predictions are passed independently.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @click.option("--raw-input", hidden=True)
 @append_copyright_to_help
 def new_way(
@@ -61,8 +67,11 @@ def new_way(
     output_file: Optional[str],
     missing_prediction: int,
     raw_input: str,
+    log_file: str,
 ):
     """Metrics calculator."""
+
+    logger_setup(log_file)
     _generate_metrics(
         input_data=input_data,
         config=config,

diff --git a/GANDLF/entrypoints/optimize_model.py b/GANDLF/entrypoints/optimize_model.py
@@ -46,11 +46,22 @@ def _optimize_model(
     required=False,
     type=click.Path(exists=True, file_okay=True, dir_okay=False),
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
 def new_way(
-    model: str, config: Optional[str] = None, output_path: Optional[str] = None
+    model: str,
+    log_file: str,
+    config: Optional[str] = None,
+    output_path: Optional[str] = None,
 ):
     """Generate optimized versions of trained GaNDLF models."""
+
+    logger_setup(log_file)
     _optimize_model(model=model, config=config, output_path=output_path)
 
 

diff --git a/GANDLF/entrypoints/patch_miner.py b/GANDLF/entrypoints/patch_miner.py
@@ -42,9 +42,17 @@ def _mine_patches(input_path: str, output_dir: str, config: Optional[str]):
     help="config (in YAML) for running the patch miner. Needs 'scale' and 'patch_size' to be defined, "
     "otherwise defaults to 16 and (256, 256), respectively.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(input_csv: str, output_dir: str, config: Optional[str]):
+def new_way(input_csv: str, output_dir: str, log_file: str, config: Optional[str]):
     """Construct patches from whole slide image(s)."""
+
+    logger_setup(log_file)
     _mine_patches(input_path=input_csv, output_dir=output_dir, config=config)
 
 

diff --git a/GANDLF/entrypoints/preprocess.py b/GANDLF/entrypoints/preprocess.py
@@ -82,6 +82,12 @@ def _preprocess(
     is_flag=True,
     help="If passed, applies zero cropping during output creation.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
 def new_way(
     config: str,
@@ -90,8 +96,11 @@ def new_way(
     label_pad: str,
     apply_augs: bool,
     crop_zero: bool,
+    log_file: str,
 ):
     """Generate training/inference data which are preprocessed to reduce resource footprint during computation."""
+
+    logger_setup(log_file)
     _preprocess(
         config=config,
         input_data=input_data,

diff --git a/GANDLF/entrypoints/recover_config.py b/GANDLF/entrypoints/recover_config.py
@@ -47,10 +47,18 @@ def _recover_config(model_dir: Optional[str], mlcube: bool, output_file: str):
     type=click.Path(file_okay=True, dir_okay=False),
     help="Path to an output file where the config will be written.",
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(model_dir, mlcube, output_file):
+def new_way(model_dir, mlcube, output_file, log_file):
     """Recovers a config file from a GaNDLF model. If used from within a deployed GaNDLF MLCube,
     attempts to extract the config from the embedded model."""
+
+    logger_setup(log_file)
     _recover_config(model_dir=model_dir, mlcube=mlcube, output_file=output_file)
 
 

diff --git a/GANDLF/entrypoints/run.py b/GANDLF/entrypoints/run.py
@@ -141,6 +141,12 @@ def _run(
     help="Location to save the output of the inference session. Not used for training.",
 )
 @click.option("--raw-input", hidden=True)
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
 def new_way(
     config: str,
@@ -152,8 +158,11 @@ def new_way(
     resume: bool,
     output_path: str,
     raw_input: str,
+    log_file: str,
 ):
     """Semantic segmentation, regression, and classification for medical images using Deep Learning."""
+
+    logger_setup(log_file)
     _run(
         config=config,
         input_data=input_data,

diff --git a/GANDLF/entrypoints/split_csv.py b/GANDLF/entrypoints/split_csv.py
@@ -49,9 +49,17 @@ def _split_csv(input_csv: str, output_dir: str, config_path: Optional[str]):
     help="The GaNDLF config (in YAML) with the `nested_training` key specified to the folds needed.",
     type=click.Path(exists=True, file_okay=True, dir_okay=False),
 )
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way(input_csv: str, output_dir: str, config: Optional[str]):
+def new_way(input_csv: str, output_dir: str, log_file: str, config: Optional[str]):
     """Split the data into training, validation, and testing sets and save them as csvs in the output directory."""
+
+    logger_setup(log_file)
     _split_csv(input_csv, output_dir, config)
 
 

diff --git a/GANDLF/entrypoints/verify_install.py b/GANDLF/entrypoints/verify_install.py
@@ -23,9 +23,16 @@ def _verify_install():
 
 
 @click.command()
+@click.option(
+    "--log-file",
+    type=click.Path(),
+    default=None,
+    help="Output file which will contain the logs.",
+)
 @append_copyright_to_help
-def new_way():
+def new_way(log_file):
     """Verify GaNDLF installation."""
+    logger_setup(log_file)
     _verify_install()
 
 

diff --git a/GANDLF/utils/gandlf_logging.py b/GANDLF/utils/gandlf_logging.py
@@ -40,9 +40,9 @@ def logger_setup(log_file=None, config_path="logging_config.yaml") -> None:
     log_tmp_file = log_file
     if log_file is None:  # create tmp file
         log_tmp_file = _create_tmp_log_file()
-        logging.info(f"The logs are saved in {log_tmp_file}")
     _create_log_file(log_tmp_file)
     _configure_logging_with_logfile(log_tmp_file, config_path)
+    logging.info(f"The logs are saved in {log_tmp_file}")
 
 
 class InfoOnlyFilter(logging.Filter):

diff --git a/docs/extending.md b/docs/extending.md
@@ -137,6 +137,16 @@ bash
 ### Use loggers instead of print
 We use the native `logging` [library](https://docs.python.org/3/library/logging.html) for logs management. This gets automatically configured when GaNDLF gets launched. So, if you are extending the code, please use loggers instead of prints.
 
+Here is an example how `root logger` can be used
+```
+def my_new_cool_function(df: pd.DataFrame):
+    logging.debug("Message for debug file only")
+    logging.info("Hi GaNDLF user, I greet you in the CLI output")
+    logging.error(f"A detailed message about any error if needed. Exception: {str(e)}, params: {params}, df shape: {df.shape}")
+    # do NOT use normal print statements
+    # print("Hi GaNDLF user!")
+```
+
 Here is an example how logger can be used:
 
 ```
@@ -148,16 +158,17 @@ def my_new_cool_function(df: pd.DataFrame):
     # print("Hi GaNDLF user!")  # don't use prints please.
 ```
 
+
 ### What and where is logged
 
 GaNDLF logs are splitted into multiple parts:
 - CLI output: only `info` messages are shown here
 - debug file: all messages are shown 
 - stderr: display `warning`, `error`, or `critical` messages
 
-By default, the logs are flushed to console.
+By default, the logs are saved in the `/tmp/.gandlf` dir.
 The logs are **saved** in the path that is defined by the '--log-file' parameter in the CLI commands.
-If the path is not provided or an error is raised, the logs will be flushed to console.
+