Merge branch 'main' into release

wearepal · Jun 28, 2022 · 6e9d172 · 6e9d172
2 parents 06bdf48 + f992eda
commit 6e9d172
Show file tree

Hide file tree

Showing 36 changed files with 294 additions and 322 deletions.
diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
@@ -28,10 +28,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v2
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install pylint
       run: |
         python -m pip install --upgrade pip
@@ -47,10 +47,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v2
       with:
-        python-version: 3.7
+        python-version: 3.8
     - name: Install black
       run: |
         python -m pip install --upgrade pip
@@ -98,10 +98,10 @@ jobs:
       #----------------------------------------------
       - name: Check out repository
         uses: actions/checkout@v2
-      - name: Set up Python 3.7
+      - name: Set up Python 3.8
         uses: actions/setup-python@v2
         with:
-          python-version: 3.7
+          python-version: 3.8
 
       #----------------------------------------------
       #  -----  install & configure poetry  -----

diff --git a/README.md b/README.md
@@ -34,7 +34,7 @@ Where appropriate, we even subsume some of these libraries.
 
 ## Installation
 
-EthicML requires Python >= 3.7.
+EthicML requires Python >= 3.8.
 To install EthicML, just do
 ```
 pip3 install ethicml

diff --git a/docs/conf.py b/docs/conf.py
@@ -77,7 +77,7 @@
     "sphinx.ext.autodoc",
     "autodocsumm",
     "nbsphinx",
-    'IPython.sphinxext.ipython_console_highlighting',
+    "ipython_pygments",
 ]
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -4,4 +4,4 @@ typing_extensions
 furo
 toml
 nbsphinx @ git+https://github.com/thomkeh/nbsphinx.git@e69b06f131428c595361117754b3b0fbc385d43a
-ipython
+ipython-pygments
diff --git a/ethicml/data/csvs/make_nursery.py b/ethicml/data/csvs/make_nursery.py
@@ -1,6 +1,8 @@
 """Transparently show how the UCI Nursey dataset was modified from the raw download."""
 # The Heritage Health dataset. It needs some (mild) preprocessing before we can plug and play.
 
+from typing import Hashable, List
+
 import pandas as pd
 
 if __name__ == "__main__":
@@ -29,10 +31,19 @@
     for column in features:
         df[column] = df[column].astype("category").cat.codes
 
-    features1 = ['form', 'health', "finance", "class", "parents", "has_nurs", "housing", "social"]
+    features1: List[Hashable] = [
+        'form',
+        'health',
+        "finance",
+        "class",
+        "parents",
+        "has_nurs",
+        "housing",
+        "social",
+    ]
     x1 = df.drop(features1, axis=1)
 
-    x2 = df.drop(features, axis=1)
+    x2 = df.drop(features, axis=1)  # type: ignore[arg-type]
     x2 = pd.get_dummies(x2)
 
     df = pd.concat([x1, x2], axis=1)

diff --git a/ethicml/data/dataset.py b/ethicml/data/dataset.py
@@ -4,8 +4,19 @@
 from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
-from typing import ClassVar, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union
-from typing_extensions import Literal, TypedDict, final
+from typing import (
+    ClassVar,
+    Dict,
+    List,
+    Literal,
+    NamedTuple,
+    Optional,
+    Sequence,
+    Tuple,
+    TypedDict,
+    Union,
+    final,
+)
 
 import pandas as pd
 from ranzen import implements

diff --git a/ethicml/data/tabular_data/acs.py b/ethicml/data/tabular_data/acs.py
@@ -8,8 +8,7 @@
 import contextlib
 import os
 from pathlib import Path
-from typing import Generator, Iterable, List, Optional, Tuple, Union
-from typing_extensions import Literal
+from typing import Generator, Iterable, List, Literal, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -487,7 +486,7 @@ def load(
             target_transform=lambda x: x == 1,
             group=self.split,
             preprocess=lambda x: x,
-            postprocess=lambda x: np.nan_to_num(x, -1),
+            postprocess=lambda x: np.nan_to_num(x, nan=-1),
         )
 
         dataframe = data_obj._preprocess(dataframe)

diff --git a/ethicml/data/util.py b/ethicml/data/util.py
@@ -184,7 +184,9 @@ def from_dummies(data: pd.DataFrame, categorical_cols: Mapping[str, Sequence[str
     for col_parent, filter_col in categorical_cols.items():
         if len(filter_col) > 1:
             undummified = (
-                out[filter_col].idxmax(axis=1).apply(lambda x: x.split(f"{col_parent}_", 1)[1])
+                out[list(filter_col)]
+                .idxmax(axis=1)
+                .apply(lambda x: x.split(f"{col_parent}_", 1)[1])
             )
 
             out[col_parent] = undummified

diff --git a/ethicml/evaluators/evaluate_models.py b/ethicml/evaluators/evaluate_models.py
@@ -1,7 +1,6 @@
 """Runs given metrics on given algorithms for given datasets."""
 from pathlib import Path
-from typing import Dict, List, NamedTuple, Optional, Sequence, Union
-from typing_extensions import Literal
+from typing import Dict, List, Literal, NamedTuple, Optional, Sequence, Union
 
 import pandas as pd
 
@@ -63,7 +62,7 @@ def run_metrics(
     per_sens_metrics: Sequence[Metric] = (),
     diffs_and_ratios: bool = True,
     use_sens_name: bool = True,
-) -> Dict[str, HyperParamValue]:
+) -> Dict[str, float]:
     """Run all the given metrics on the given predictions and return the results.
 
     :param predictions: DataFrame with predictions
@@ -73,8 +72,9 @@ def run_metrics(
     :param diffs_and_ratios: if True, compute diffs and ratios per sensitive attribute (Default: True)
     :param use_sens_name: if True, use the name of the senisitive variable in the returned results.
                         If False, refer to the sensitive varibale as `S`. (Default: True)
+    :returns: A dictionary of all the metric results.
     """
-    result: Dict[str, HyperParamValue] = {}
+    result: Dict[str, float] = {}
     if predictions.hard.isna().any(axis=None):  # type: ignore[arg-type]
         return {"algorithm_failed": 1.0}
     for metric in metrics:
@@ -88,7 +88,6 @@ def run_metrics(
             per_sens.update(diffs_ratios)
         for key, value in per_sens.items():
             result[f"{metric.name}_{key}"] = value
-    result.update(predictions.info)
     return result  # SUGGESTION: we could return a DataFrame here instead of a dictionary
 
 
@@ -333,6 +332,7 @@ def _gather_metrics(
                 **hyperparameters,
             }
             df_row.update(run_metrics(predictions, data_info.test, metrics, per_sens_metrics))
+            df_row.update(predictions.info)
 
             results_df = results_df.append(df_row, ignore_index=True, sort=False)
 

diff --git a/ethicml/implementations/adv_debiasing_modules/model.py b/ethicml/implementations/adv_debiasing_modules/model.py
@@ -3,8 +3,8 @@
 Original implementation is modified to handle regression and multi-class
 classification problems
 """
-from typing import Tuple
-from typing_extensions import Literal, Self
+from typing import Literal, Tuple
+from typing_extensions import Self
 
 import numpy as np
 import pandas as pd

diff --git a/ethicml/implementations/beutel.py b/ethicml/implementations/beutel.py
@@ -21,14 +21,14 @@
 
 from ethicml.preprocessing.adjust_labels import LabelBinarizer, assert_binary_labels
 from ethicml.preprocessing.splits import train_test_split
-from ethicml.utility import DataTuple, FairnessType
+from ethicml.utility import DataTuple, FairnessType, SubgroupTuple
 
 from .pytorch_common import CustomDataset, TestDataset, make_dataset_and_loader
 from .utils import load_data_from_flags, save_transformations
 
 if TYPE_CHECKING:
     from ethicml.models.preprocess.beutel import BeutelArgs
-    from ethicml.models.preprocess.pre_subprocess import PreAlgoArgs, T
+    from ethicml.models.preprocess.pre_subprocess import PreAlgoArgs
 
 STRING_TO_ACTIVATION_MAP = {"Sigmoid()": nn.Sigmoid()}
 
@@ -187,7 +187,7 @@ def fit(train: DataTuple, flags: BeutelArgs, seed: int = 888) -> Tuple[DataTuple
     return transformed_train, enc
 
 
-def transform(data: T, enc: torch.nn.Module, flags: BeutelArgs) -> T:
+def transform(data: SubgroupTuple, enc: torch.nn.Module, flags: BeutelArgs) -> SubgroupTuple:
     """Transform the test data using the trained autoencoder.
 
     :param data:
@@ -202,8 +202,8 @@ def transform(data: T, enc: torch.nn.Module, flags: BeutelArgs) -> T:
 
 
 def train_and_transform(
-    train: DataTuple, test: T, flags: BeutelArgs, seed: int
-) -> Tuple[DataTuple, T]:
+    train: DataTuple, test: SubgroupTuple, flags: BeutelArgs, seed: int
+) -> Tuple[DataTuple, SubgroupTuple]:
     """Train the fair autoencoder on the training data and then transform both training and test.
 
     :param train:
@@ -263,7 +263,9 @@ def encode_dataset(
     return datatuple.replace(x=pd.DataFrame(data_to_return))
 
 
-def encode_testset(enc: nn.Module, dataloader: torch.utils.data.DataLoader, testtuple: T) -> T:
+def encode_testset(
+    enc: nn.Module, dataloader: torch.utils.data.DataLoader, testtuple: SubgroupTuple
+) -> SubgroupTuple:
     """Encode a dataset.
 
     :param enc:
@@ -456,7 +458,8 @@ def main() -> None:
         dump(enc, Path(pre_algo_args["model"]))
     elif pre_algo_args["mode"] == "transform":
         model = load(Path(pre_algo_args["model"]))
-        transformed_test = transform(DataTuple.from_file(Path(pre_algo_args["test"])), model, flags)
+        test = SubgroupTuple.from_file(Path(pre_algo_args["test"]))
+        transformed_test = transform(test, model, flags)
         transformed_test.save_to_file(Path(pre_algo_args["new_test"]))
 
 

diff --git a/ethicml/implementations/fair_dummies_modules/model.py b/ethicml/implementations/fair_dummies_modules/model.py
@@ -2,8 +2,8 @@
 from __future__ import annotations
 
 import random
-from typing import Callable, Tuple
-from typing_extensions import Literal, Self
+from typing import Callable, Literal, Tuple
+from typing_extensions import Self
 
 import numpy as np
 import pandas as pd

diff --git a/ethicml/implementations/hgr_modules/hgr_impl.py b/ethicml/implementations/hgr_modules/hgr_impl.py
@@ -4,7 +4,8 @@
 # The function for measuring HGR is in the facl package, can be downloaded from
 # https://github.com/criteo-research/continuous-fairness/tree/master/facl/independence
 import random
-from typing_extensions import Literal, Self
+from typing import Literal
+from typing_extensions import Self
 
 import numpy as np
 import pandas as pd

diff --git a/ethicml/implementations/pytorch_common.py b/ethicml/implementations/pytorch_common.py
@@ -1,7 +1,6 @@
 """Functions that are common to PyTorch models."""
 import random
-from typing import Tuple
-from typing_extensions import Literal
+from typing import Literal, Tuple
 
 import numpy as np
 import pandas as pd

diff --git a/ethicml/implementations/vfae.py b/ethicml/implementations/vfae.py
@@ -88,7 +88,9 @@ def transform(model: VFAENetwork, dataset: T, flags: VfaeArgs) -> T:
     return dataset.replace(x=pd.DataFrame(post_train))
 
 
-def train_and_transform(train: DataTuple, test: T, flags: VfaeArgs) -> Tuple[DataTuple, T]:
+def train_and_transform(
+    train: DataTuple, test: SubgroupTuple, flags: VfaeArgs
+) -> Tuple[DataTuple, SubgroupTuple]:
     """Train the model and transform the dataset.
 
     :param train:
@@ -171,7 +173,8 @@ def main() -> None:
         dump(enc, Path(pre_algo_args["model"]))
     elif pre_algo_args["mode"] == "transform":
         model = load(Path(pre_algo_args["model"]))
-        transformed_test = transform(model, DataTuple.from_file(Path(pre_algo_args["test"])), flags)
+        test = SubgroupTuple.from_file(Path(pre_algo_args["test"]))
+        transformed_test = transform(model, test, flags)
         transformed_test.save_to_file(Path(pre_algo_args["new_test"]))
 
 

diff --git a/ethicml/implementations/zemel.py b/ethicml/implementations/zemel.py
@@ -14,10 +14,10 @@
 from scipy.special import softmax  # type: ignore[attr-defined]
 
 from ethicml.implementations.utils import load_data_from_flags, save_transformations
-from ethicml.utility import DataTuple, TestTuple
+from ethicml.utility import DataTuple, SubgroupTuple, TestTuple
 
 if TYPE_CHECKING:
-    from ethicml.models.preprocess.pre_subprocess import PreAlgoArgs, T
+    from ethicml.models.preprocess.pre_subprocess import PreAlgoArgs
     from ethicml.models.preprocess.zemel import ZemelArgs
 
 
@@ -108,8 +108,8 @@ def get_xhat_y_hat(
 
 
 def train_and_transform(
-    train: DataTuple, test: T, flags: ZemelArgs, seed: int
-) -> (Tuple[DataTuple, T]):
+    train: DataTuple, test: SubgroupTuple, flags: ZemelArgs, seed: int
+) -> (Tuple[DataTuple, SubgroupTuple]):
     """Train and transform.
 
     :param train:
@@ -130,7 +130,7 @@ def train_and_transform(
     return train.replace(x=train_transformed), test.replace(x=test_transformed)
 
 
-def transform(data: T, prototypes: np.ndarray, w: np.ndarray) -> T:
+def transform(data: SubgroupTuple, prototypes: np.ndarray, w: np.ndarray) -> SubgroupTuple:
     """Transform.
 
     :param data:
@@ -258,9 +258,8 @@ def main() -> None:
         dump(model, Path(pre_algo_args["model"]))
     elif pre_algo_args["mode"] == "transform":
         model = load(Path(pre_algo_args["model"]))
-        transformed_test = transform(
-            DataTuple.from_file(Path(pre_algo_args["test"])), model.prototypes, model.w
-        )
+        test = SubgroupTuple.from_file(Path(pre_algo_args["test"]))
+        transformed_test = transform(test, model.prototypes, model.w)
         transformed_test.save_to_file(Path(pre_algo_args["new_test"]))
 
 

diff --git a/ethicml/models/inprocess/adv_debiasing.py b/ethicml/models/inprocess/adv_debiasing.py
@@ -5,8 +5,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import List
-from typing_extensions import Literal, TypedDict
+from typing import List, Literal, TypedDict
 
 from ranzen import implements
 

diff --git a/ethicml/models/inprocess/fair_dummies.py b/ethicml/models/inprocess/fair_dummies.py
@@ -5,8 +5,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import List
-from typing_extensions import Literal, TypedDict
+from typing import List, Literal, TypedDict
 
 from ranzen import implements
 

diff --git a/ethicml/models/inprocess/hgr.py b/ethicml/models/inprocess/hgr.py
@@ -7,8 +7,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import List
-from typing_extensions import Literal, TypedDict
+from typing import List, Literal, TypedDict
 
 from ranzen import implements
 

diff --git a/ethicml/models/inprocess/in_subprocess.py b/ethicml/models/inprocess/in_subprocess.py
@@ -4,8 +4,8 @@
 from dataclasses import dataclass
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import Any, List, Mapping, TypeVar, Union
-from typing_extensions import Literal, TypeAlias, TypedDict, final
+from typing import Any, List, Literal, Mapping, TypedDict, TypeVar, Union, final
+from typing_extensions import TypeAlias
 
 from ethicml.models.algorithm_base import SubprocessAlgorithmMixin
 from ethicml.models.inprocess.in_algorithm import InAlgorithm