use scipy implementation for Pearson correlation (#355)

* switch to scipy implementation * formatting * fix test
BiomedSciAI · Jun 18, 2024 · 8905d68 · 8905d68
1 parent 7ed012b
commit 8905d68
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 9 deletions.
diff --git a/fuse/eval/metrics/libs/stat.py b/fuse/eval/metrics/libs/stat.py
@@ -1,5 +1,6 @@
 import numpy as np
 from typing import Sequence, Union
+from scipy.stats import pearsonr
 
 
 class Stat:
@@ -12,7 +13,7 @@ def pearson_correlation(
         pred: Union[np.ndarray, Sequence],
         target: Union[np.ndarray, Sequence],
         mask: Union[np.ndarray, Sequence, None] = None,
-    ) -> float:
+    ) -> dict:
         """
         Pearson correlation coefficient measuring the linear relationship between two datasets/vectors.
         :param pred: prediction values
@@ -36,11 +37,9 @@ def pearson_correlation(
                 f"expected 1D vectors. got pred shape: {pred.shape}, target shape: {target.shape}"
             )
 
-        mean_pred = np.mean(pred)
-        mean_target = np.mean(target)
+        statistic, p_value = pearsonr(pred, target)
 
-        r = np.sum((pred - mean_pred) * (target - mean_target)) / np.sqrt(
-            np.sum((pred - mean_pred) ** 2) * np.sum((target - mean_target) ** 2)
-        )
-
-        return r
+        results = {}
+        results["statistic"] = statistic
+        results["p_value"] = p_value
+        return results
diff --git a/fuse/eval/tests/test_eval.py b/fuse/eval/tests/test_eval.py
@@ -245,7 +245,7 @@ def test_eval_example_seq_gen_2(self) -> None:
 
     def test_pearson_correlation(self) -> None:
         res = example_pearson_correlation()
-        self.assertAlmostEqual(res["metrics.pearsonr"], 1.0, places=2)
+        self.assertAlmostEqual(res["metrics.pearsonr.statistic"], 1.0, places=2)
 
 
 if __name__ == "__main__":