added new metrics for regression tasks

BiomedSciAI · Sep 8, 2024 · 8aed967 · 8aed967
1 parent a108f32
commit 8aed967
Show file tree

Hide file tree

Showing 2 changed files with 173 additions and 3 deletions.
diff --git a/fuse/eval/metrics/libs/stat.py b/fuse/eval/metrics/libs/stat.py
@@ -1,6 +1,6 @@
 import numpy as np
 from typing import Sequence, Union
-from scipy.stats import pearsonr
+from scipy.stats import pearsonr, spearmanr
 
 
 class Stat:
@@ -55,3 +55,57 @@ def pearson_correlation(
         results["statistic"] = statistic
         results["p_value"] = p_value
         return results
+
+    @staticmethod
+    def spearman_correlation(
+        pred: Union[np.ndarray, Sequence],
+        target: Union[np.ndarray, Sequence],
+        mask: Union[np.ndarray, Sequence, None] = None,
+    ) -> dict:
+        """
+        Spearman correlation coefficient measuring the monotonic relationship between two datasets/vectors.
+        :param pred: prediction values
+        :param target: target values
+        :param mask: optional boolean mask. if it is provided, the metric will be applied only to the masked samples
+        """
+        if 0 == len(pred):
+            return dict(statistic=float("nan"), p_value=float("nan"))
+
+        if isinstance(pred, Sequence):
+            if np.isscalar(pred[0]):
+                pred = np.array(pred)
+            else:
+                pred = np.concatenate(pred)
+        if isinstance(target, Sequence):
+            if np.isscalar(target[0]):
+                target = np.array(target)
+            else:
+                target = np.concatenate(target)
+        if isinstance(mask, Sequence):
+            if np.isscalar(mask[0]):
+                mask = np.array(mask).astype("bool")
+            else:
+                mask = np.concatenate(mask).astype("bool")
+        if mask is not None:
+            pred = pred[mask]
+            target = target[mask]
+
+        pred = pred.squeeze()
+        target = target.squeeze()
+        if len(pred.shape) > 1 or len(target.shape) > 1:
+            raise ValueError(
+                f"expected 1D vectors. got pred shape: {pred.shape}, target shape: {target.shape}"
+            )
+
+        assert len(pred) == len(
+            target
+        ), f"Spearman corr expected to get pred and target with same length but got pred={len(pred)} - target={len(target)}"
+
+        statistic, p_value = spearmanr(
+            pred, target, nan_policy="propagate"
+        )  # nans will result in nan outputs
+
+        results = {}
+        results["statistic"] = statistic
+        results["p_value"] = p_value
+        return results
diff --git a/fuse/eval/metrics/stat/metrics_stat_common.py b/fuse/eval/metrics/stat/metrics_stat_common.py
@@ -1,8 +1,11 @@
-from typing import Any, Dict, Hashable, Optional, Sequence
+from typing import Any, Dict, Hashable, Optional, Sequence, Union, List
 from collections import Counter
 from fuse.eval.metrics.metrics_common import MetricDefault, MetricWithCollectorBase
 from fuse.eval.metrics.libs.stat import Stat
 
+import numpy as np  # is this import an issue here? if so can move to other dir
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+
 
 class MetricUniqueValues(MetricWithCollectorBase):
     """
@@ -31,5 +34,118 @@ def __init__(
             target=target,
             mask=mask,
             metric_func=Stat.pearson_correlation,
-            **kwargs
+            **kwargs,
+        )
+
+
+class MetricSpearmanCorrelation(MetricDefault):
+    def __init__(
+        self, pred: str, target: str, mask: Optional[str] = None, **kwargs: dict
+    ) -> None:
+        super().__init__(
+            pred=pred,
+            target=target,
+            mask=mask,
+            metric_func=Stat.spearman_correlation,
+            **kwargs,
         )
+
+
+class MetricMAE(MetricDefault):
+    def __init__(
+        self,
+        pred: str,
+        target: str,
+        **kwargs: dict,
+    ) -> None:
+        """
+        See MetricDefault for the missing params
+        :param pred: scalar predictions
+        :param target: ground truth scalar labels
+        :param threshold: threshold to apply to both pred and target
+        :param balanced: optionally to use balanced accuracy (from sklearn) instead of regular accuracy.
+        """
+        super().__init__(
+            pred=pred,
+            target=target,
+            metric_func=self.mae,
+            **kwargs,
+        )
+
+    def mae(
+        self,
+        pred: Union[List, np.ndarray],
+        target: Union[List, np.ndarray],
+        **kwargs: dict,
+    ) -> float:
+        return mean_absolute_error(y_true=target, y_pred=pred)
+
+
+class MetricMSE(MetricDefault):
+    def __init__(
+        self,
+        pred: str,
+        target: str,
+        **kwargs: dict,
+    ) -> None:
+        """
+        Our implementation of standard MSE, current version of scikit dones't support it as a metric.
+        See MetricDefault for the missing params
+        :param pred: scalar predictions
+        :param target: ground truth scalar labels
+        :param threshold: threshold to apply to both pred and target
+        :param balanced: optionally to use balanced accuracy (from sklearn) instead of regular accuracy.
+        """
+        super().__init__(
+            pred=pred,
+            target=target,
+            metric_func=self.mse,
+            **kwargs,
+        )
+
+    def mse(
+        self,
+        pred: Union[List, np.ndarray],
+        target: Union[List, np.ndarray],
+        **kwargs: dict,
+    ) -> float:
+        return mean_squared_error(y_true=target, y_pred=pred)
+
+
+class MetricRMSE(MetricDefault):
+    def __init__(
+        self,
+        pred: str,
+        target: str,
+        **kwargs: dict,
+    ) -> None:
+        """
+        See MetricDefault for the missing params
+        :param pred: scalar predictions
+        :param target: ground truth scalar labels
+        :param threshold: threshold to apply to both pred and target
+        :param balanced: optionally to use balanced accuracy (from sklearn) instead of regular accuracy.
+        """
+        super().__init__(
+            pred=pred,
+            target=target,
+            metric_func=self.mse,
+            **kwargs,
+        )
+
+    def mse(
+        self,
+        pred: Union[List, np.ndarray],
+        target: Union[List, np.ndarray],
+        **kwargs: dict,
+    ) -> float:
+
+        pred = np.array(pred).flatten()
+        target = np.array(target).flatten()
+
+        assert len(pred) == len(
+            target
+        ), f"Expected pred and target to have the dimensions but found: {len(pred)} elements in pred and {len(target)} in target"
+
+        squared_diff = (pred - target) ** 2
+        return squared_diff.mean()