HopkinsIDD · TimothyWillard · Nov 4, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 25, 2024
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,41 @@
+name: Lint
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - 'flepimop/gempyor_pkg/**/*.py'
+  pull_request:
+    paths:
+      - '**/*.py'
+    branches:
+      - main
+
+jobs:
+  black-for-python:
+    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
+    env:
+      BLACK_LINE_LENGTH: 92
+      BLACK_EXTEND_EXCLUDE: 'flepimop/gempyor_pkg/src/gempyor/steps_rk4.py'
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+          sparse-checkout: |
+            *
+            !documentation/
+          sparse-checkout-cone-mode: false
+      - name: Determine Source
+        run: |
+          if [ ${{ github.event_name }} == "push" ]; then
+            echo "BLACK_SRC=flepimop/gempyor_pkg/" >> $GITHUB_ENV
+          else
+            echo "BLACK_SRC=." >> $GITHUB_ENV
+          fi
+      - name: Black Formatter Check
+        uses: psf/black@stable
+        with:
+          src: ${{ env.BLACK_SRC }}
+          options: "--line-length ${{ env.BLACK_LINE_LENGTH }} --extend-exclude '${{ env.BLACK_EXTEND_EXCLUDE }}' --check --verbose"
diff --git a/batch/inference_job_launcher.py b/batch/inference_job_launcher.py
diff --git a/batch/scenario_job.py b/batch/scenario_job.py
@@ -196,13 +196,17 @@ def launch_job_inner(
     tarfile_name = f"{job_name}.tar.gz"
     tar = tarfile.open(tarfile_name, "w:gz")
     for p in os.listdir("."):
-        if not (p.startswith(".") or p.endswith("tar.gz") or p in dvc_outputs or p == "batch"):
+        if not (
+            p.startswith(".") or p.endswith("tar.gz") or p in dvc_outputs or p == "batch"
+        ):
             tar.add(p, filter=lambda x: None if x.name.startswith(".") else x)
     tar.close()
 
     # Upload the tar'd contents of this directory and the runner script to S3
     runner_script_name = f"{job_name}-runner.sh"
-    local_runner_script = os.path.join(os.path.dirname(os.path.realpath(__file__)), "AWS_scenario_runner.sh")
+    local_runner_script = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), "AWS_scenario_runner.sh"
+    )
     s3_client = boto3.client("s3")
     s3_client.upload_file(local_runner_script, s3_input_bucket, runner_script_name)
     s3_client.upload_file(tarfile_name, s3_input_bucket, tarfile_name)
@@ -219,7 +223,9 @@ def launch_job_inner(
         {"name": "S3_RESULTS_PATH", "value": results_path},
         {"name": "SLOTS_PER_JOB", "value": str(slots_per_job)},
     ]
-    s3_cp_run_script = f"aws s3 cp s3://{s3_input_bucket}/{runner_script_name} $PWD/run-flepimop-inference"
+    s3_cp_run_script = (
+        f"aws s3 cp s3://{s3_input_bucket}/{runner_script_name} $PWD/run-flepimop-inference"
+    )
     command = ["sh", "-c", f"{s3_cp_run_script}; /bin/bash $PWD/run-flepimop-inference"]
     container_overrides = {
         "vcpus": vcpu,
@@ -246,7 +252,9 @@ def launch_job_inner(
             containerOverrides=container_overrides,
         )
 
-    print(f"Batch job with id {resp['jobId']} launched; output will be written to {results_path}")
+    print(
+        f"Batch job with id {resp['jobId']} launched; output will be written to {results_path}"
+    )
 
 
 def get_dvc_outputs():

diff --git a/bin/pre-commit b/bin/pre-commit
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+if which black > /dev/null 2>&1; then
+    black --line-length 92 \
+        --extend-exclude 'flepimop/gempyor_pkg/src/gempyor/steps_rk4.py' \
+        --check --verbose .
+else
+    echo "'black' is not available so python files will not be checked."
+fi
diff --git a/documentation/gitbook/development/python-guidelines-for-developers.md b/documentation/gitbook/development/python-guidelines-for-developers.md
@@ -61,21 +61,23 @@ and to run just some subset of the tests (e.g here just the outcome tests), use:
 pytest -vvvv -k outcomes
 ```
 
-{% hint style="danger" %}
-Before committing, make sure you **format your code** using black (see below) and that the **test passes** (see above).
-{% endhint %}
+For more details on how to use `pytest` please refer to their [usage guide](https://docs.pytest.org/en/latest/how-to/usage.html).
 
 ### Formatting
 
-We try to remain close to Python conventions and to follow the updated rules and best practices. For formatting, we use [black](https://github.com/psf/black), the _Uncompromising Code Formatter_ before submitting pull requests. It provides a consistent style, which is useful when diffing. We use a custom length of 120 characters as the baseline is short for scientific code. Here is the line to use to format your code:
+We try to remain close to Python conventions and to follow the updated rules and best practices. For formatting, we use [black](https://github.com/psf/black), the _Uncompromising Code Formatter_ before submitting pull requests. It provides a consistent style, which is useful when diffing. We use a custom length of 92 characters as the baseline is short for scientific code. Here is the line to use to format your code:
 
 ```bash
-black --line-length 120 . --exclude renv*
+black --line-length 92 \
+    --extend-exclude 'flepimop/gempyor_pkg/src/gempyor/steps_rk4.py' \
+    --verbose .
 ```
 
-{% hint style="warning" %}
-Please use type-hints as much as possible, as we are trying to move towards static checks.
-{% endhint %}
+For those using a Mac or Linux system for development you can take advantage of the formatting pre-commit hook found at `bin/pre-commit`. To start using it copy this file to your git hooks folder:
+
+```bash
+cp -f bin/pre-commit .git/hooks/
+```
 
 #### Structure of the main classes
 

diff --git a/examples/test_cli.py b/examples/test_cli.py
@@ -1,38 +1,39 @@
-
 from click.testing import CliRunner
 from gempyor.simulate import simulate
 import os
 
 # See here to test click application https://click.palletsprojects.com/en/8.1.x/testing/
 # would be useful to also call the command directly
 
+
 def test_config_sample_2pop():
-  os.chdir(os.path.dirname(__file__) + "/tutorials")
-  runner = CliRunner()
-  result = runner.invoke(simulate, ['-c', 'config_sample_2pop.yml'])
-  print(result.output) # useful for debug
-  print(result.exit_code) # useful for debug
-  print(result.exception) # useful for debug
-  assert result.exit_code == 0
-  assert 'completed in' in result.output
+    os.chdir(os.path.dirname(__file__) + "/tutorials")
+    runner = CliRunner()
+    result = runner.invoke(simulate, ["-c", "config_sample_2pop.yml"])
+    print(result.output)  # useful for debug
+    print(result.exit_code)  # useful for debug
+    print(result.exception)  # useful for debug
+    assert result.exit_code == 0
+    assert "completed in" in result.output
 
 
 def test_sample_2pop_modifiers():
-  os.chdir(os.path.dirname(__file__) + "/tutorials")
-  runner = CliRunner()
-  result = runner.invoke(simulate, ['-c', 'config_sample_2pop_modifiers.yml'])
-  print(result.output) # useful for debug
-  print(result.exit_code) # useful for debug
-  print(result.exception) # useful for debug
-  assert result.exit_code == 0
-  assert 'completed in' in result.output
+    os.chdir(os.path.dirname(__file__) + "/tutorials")
+    runner = CliRunner()
+    result = runner.invoke(simulate, ["-c", "config_sample_2pop_modifiers.yml"])
+    print(result.output)  # useful for debug
+    print(result.exit_code)  # useful for debug
+    print(result.exception)  # useful for debug
+    assert result.exit_code == 0
+    assert "completed in" in result.output
+
 
 def test_simple_usa_statelevel():
-  os.chdir(os.path.dirname(__file__) + "/simple_usa_statelevel")
-  runner = CliRunner()
-  result = runner.invoke(simulate, ['-c', 'simple_usa_statelevel.yml', '-n', '1'])
-  print(result.output) # useful for debug
-  print(result.exit_code) # useful for debug
-  print(result.exception) # useful for debug
-  assert result.exit_code == 0
-  assert 'completed in' in result.output
+    os.chdir(os.path.dirname(__file__) + "/simple_usa_statelevel")
+    runner = CliRunner()
+    result = runner.invoke(simulate, ["-c", "simple_usa_statelevel.yml", "-n", "1"])
+    print(result.output)  # useful for debug
+    print(result.exit_code)  # useful for debug
+    print(result.exception)  # useful for debug
+    assert result.exit_code == 0
+    assert "completed in" in result.output
diff --git a/flepimop/gempyor_pkg/src/gempyor/NPI/MultiPeriodModifier.py b/flepimop/gempyor_pkg/src/gempyor/NPI/MultiPeriodModifier.py
@@ -21,7 +21,8 @@ def __init__(
             name=getattr(
                 npi_config,
                 "key",
-                (npi_config["scenario"].exists() and npi_config["scenario"].get()) or "unknown",
+                (npi_config["scenario"].exists() and npi_config["scenario"].get())
+                or "unknown",
             )
         )
 
@@ -68,14 +69,20 @@ def __init__(
 
         # if parameters are exceeding global start/end dates, index of parameter df will be out of range so check first
         if self.sanitize:
-            too_early = min([min(i) for i in self.parameters["start_date"]]) < self.start_date
+            too_early = (
+                min([min(i) for i in self.parameters["start_date"]]) < self.start_date
+            )
             too_late = max([max(i) for i in self.parameters["end_date"]]) > self.end_date
             if too_early or too_late:
-                raise ValueError("at least one period start or end date is not between global dates")
+                raise ValueError(
+                    "at least one period start or end date is not between global dates"
+                )
 
         for grp_config in npi_config["groups"]:
             affected_subpops_grp = self.__get_affected_subpops_grp(grp_config)
-            for sub_index in range(len(self.parameters["start_date"][affected_subpops_grp[0]])):
+            for sub_index in range(
+                len(self.parameters["start_date"][affected_subpops_grp[0]])
+            ):
                 period_range = pd.date_range(
                     self.parameters["start_date"][affected_subpops_grp[0]][sub_index],
                     self.parameters["end_date"][affected_subpops_grp[0]][sub_index],
@@ -111,7 +118,9 @@ def __checkErrors(self):
             )
 
         if not (self.parameters["start_date"] <= self.parameters["end_date"]).all():
-            raise ValueError(f"at least one period_start_date is greater than the corresponding period end date")
+            raise ValueError(
+                f"at least one period_start_date is greater than the corresponding period end date"
+            )
 
         for n in self.affected_subpops:
             if n not in self.subpops:
@@ -153,7 +162,9 @@ def __createFromConfig(self, npi_config):
             else:
                 start_dates = [self.start_date]
                 end_dates = [self.end_date]
-            this_spatial_group = helpers.get_spatial_groups(grp_config, affected_subpops_grp)
+            this_spatial_group = helpers.get_spatial_groups(
+                grp_config, affected_subpops_grp
+            )
             self.spatial_groups.append(this_spatial_group)
             # print(self.name, this_spatial_groups)
 
@@ -209,7 +220,9 @@ def __createFromDf(self, loaded_df, npi_config):
             else:
                 start_dates = [self.start_date]
                 end_dates = [self.end_date]
-            this_spatial_group = helpers.get_spatial_groups(grp_config, affected_subpops_grp)
+            this_spatial_group = helpers.get_spatial_groups(
+                grp_config, affected_subpops_grp
+            )
             self.spatial_groups.append(this_spatial_group)
 
             for subpop in this_spatial_group["ungrouped"]:
@@ -227,7 +240,9 @@ def __createFromDf(self, loaded_df, npi_config):
                     for subpop in group:
                         self.parameters.at[subpop, "start_date"] = start_dates
                         self.parameters.at[subpop, "end_date"] = end_dates
-                        self.parameters.at[subpop, "value"] = loaded_df.at[",".join(group), "value"]
+                        self.parameters.at[subpop, "value"] = loaded_df.at[
+                            ",".join(group), "value"
+                        ]
                 else:
                     dist = npi_config["value"].as_random_distribution()
                     drawn_value = dist(size=1)
@@ -258,11 +273,16 @@ def __get_affected_subpops(self, npi_config):
                 affected_subpops_grp += [str(n.get()) for n in grp_config["subpop"]]
         affected_subpops = set(affected_subpops_grp)
         if len(affected_subpops) != len(affected_subpops_grp):
-            raise ValueError(f"In NPI {self.name}, some subpops belong to several groups. This is unsupported.")
+            raise ValueError(
+                f"In NPI {self.name}, some subpops belong to several groups. This is unsupported."
+            )
         return affected_subpops
 
     def get_default(self, param):
-        if param in self.pnames_overlap_operation_sum or param in self.pnames_overlap_operation_reductionprod:
+        if (
+            param in self.pnames_overlap_operation_sum
+            or param in self.pnames_overlap_operation_reductionprod
+        ):
             return 0.0
         else:
             return 1.0
@@ -278,7 +298,9 @@ def getReductionToWrite(self):
         # self.parameters.index is a list of subpops
         for this_spatial_groups in self.spatial_groups:
             # spatially ungrouped dataframe
-            df_ungroup = self.parameters[self.parameters.index.isin(this_spatial_groups["ungrouped"])].copy()
+            df_ungroup = self.parameters[
+                self.parameters.index.isin(this_spatial_groups["ungrouped"])
+            ].copy()
             df_ungroup.index.name = "subpop"
             df_ungroup["start_date"] = df_ungroup["start_date"].apply(
                 lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l])
@@ -301,7 +323,9 @@ def getReductionToWrite(self):
                         "start_date": df_group["start_date"].apply(
                             lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l])
                         ),
-                        "end_date": df_group["end_date"].apply(lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l])),
+                        "end_date": df_group["end_date"].apply(
+                            lambda l: ",".join([d.strftime("%Y-%m-%d") for d in l])
+                        ),
                         "value": df_group["value"],
                     }
                 ).set_index("subpop")