From 70ba2a16a5f5ba45feaa3a4bed59cc1180ae7d0c Mon Sep 17 00:00:00 2001
From: Nicolas Tessore <n.tessore@ucl.ac.uk>
Date: Thu, 11 Jan 2024 12:43:18 +0000
Subject: [PATCH] ENH(fields): keep track of effective number of points

Add a new metadata field `neff` to maps generated by `ScalarField` and
`ComplexField` which tracks the effective number of points that made it
into the map.

Closes: #54
---
 heracles/fields.py   | 20 ++++++++++++++------
 heracles/io.py       |  9 ++++++++-
 heracles/twopoint.py |  9 +++++++--
 tests/test_fields.py |  4 ++++
 4 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/heracles/fields.py b/heracles/fields.py
index b813e06..5624b6c 100644
--- a/heracles/fields.py
+++ b/heracles/fields.py
@@ -320,7 +320,7 @@ async def __call__(
 
         # total weighted variance from online algorithm
         ngal = 0
-        wmean, var = 0.0, 0.0
+        wmean, w2mean, var = 0.0, 0.0, 0.0
 
         # go through pages in catalogue and map values
         async for page in _pages(catalog, progress):
@@ -338,6 +338,7 @@ async def __call__(
                     var += (v**2 - var).sum() / ngal
                 else:
                     wmean += (w - wmean).sum() / ngal
+                    w2mean += (w**2 - w2mean).sum() / ngal
                     var += ((w * v) ** 2 - var).sum() / ngal
 
                 del lon, lat, v, w
@@ -347,7 +348,7 @@ async def __call__(
 
         # fix mean weight if there was no column for it
         if wcol is None:
-            wmean = 1.0
+            wmean = w2mean = 1.0
 
         # compute mean visibility
         if catalog.visibility is None:
@@ -364,8 +365,11 @@ async def __call__(
         # compute bias from variance (per object)
         bias = 4 * np.pi * vbar**2 * (var / wmean**2) / ngal
 
+        # bias correction factor for intrinsic variance
+        bcor = 4 * np.pi * vbar**2 * (w2mean / wmean**2) / ngal
+
         # set metadata of array
-        update_metadata(val, self, catalog, mapper, wbar=wbar, bias=bias)
+        update_metadata(val, self, catalog, mapper, wbar=wbar, bias=bias, bcor=bcor)
 
         # return the value map
         return val
@@ -398,7 +402,7 @@ async def __call__(
 
         # total weighted variance from online algorithm
         ngal = 0
-        wmean, var = 0.0, 0.0
+        wmean, w2mean, var = 0.0, 0.0, 0.0
 
         # go through pages in catalogue and get the shear values,
         async for page in _pages(catalog, progress):
@@ -417,6 +421,7 @@ async def __call__(
                     var += (re**2 + im**2 - var).sum() / ngal
                 else:
                     wmean += (w - wmean).sum() / ngal
+                    w2mean += (w**2 - w2mean).sum() / ngal
                     var += ((w * re) ** 2 + (w * im) ** 2 - var).sum() / ngal
 
                 del lon, lat, re, im, w
@@ -425,7 +430,7 @@ async def __call__(
 
         # set mean weight if there was no column for it
         if wcol is None:
-            wmean = 1.0
+            wmean = w2mean = 1.0
 
         # compute mean visibility
         if catalog.visibility is None:
@@ -442,8 +447,11 @@ async def __call__(
         # bias from measured variance, for E/B decomposition
         bias = 2 * np.pi * vbar**2 * (var / wmean**2) / ngal
 
+        # bias correction factor for intrinsic field variance
+        bcor = 2 * np.pi * vbar**2 * (w2mean / wmean**2) / ngal
+
         # set metadata of array
-        update_metadata(val, self, catalog, mapper, wbar=wbar, bias=bias)
+        update_metadata(val, self, catalog, mapper, wbar=wbar, bias=bias, bcor=bcor)
 
         # return the shear map
         return val
diff --git a/heracles/io.py b/heracles/io.py
index c5121b9..72a8005 100644
--- a/heracles/io.py
+++ b/heracles/io.py
@@ -40,15 +40,22 @@
     "spin": "spin weight of map",
     "kernel": "mapping kernel of map",
     "nside": "NSIDE parameter of HEALPix map",
+    "nbar": "mean number density",
+    "wbar": "mean weight density",
     "catalog_1": "catalog of first map",
     "spin_1": "spin weight of first map",
     "kernel_1": "mapping kernel of first map",
     "nside_1": "NSIDE parameter of first HEALPix map",
+    "nbar_1": "mean number density of first field",
+    "wbar_1": "mean weight density of first field",
     "catalog_2": "catalog of second map",
     "spin_2": "spin weight of second map",
     "kernel_2": "mapping kernel of second map",
     "nside_2": "NSIDE parameter of second HEALPix map",
-    "noisbias": "noise bias of spectrum",
+    "nbar_2": "mean number density of second field",
+    "wbar_2": "mean weight density of second field",
+    "bias": "additive bias of spectrum",
+    "bcor": "additive bias correction",
 }
 
 
diff --git a/heracles/twopoint.py b/heracles/twopoint.py
index 800fe96..5f9cda8 100644
--- a/heracles/twopoint.py
+++ b/heracles/twopoint.py
@@ -126,22 +126,27 @@ def angular_power_spectra(
 
         # collect metadata
         md = {}
-        bias = None
+        bias, bcor = None, None
         if alm1.dtype.metadata:
             for key, value in alm1.dtype.metadata.items():
                 if key == "bias":
                     if k1 == k2 and i1 == i2:
                         bias = value
+                elif key == "bcor":
+                    if k1 == k2 and i1 == i2:
+                        bcor = value
                 else:
                     md[f"{key}_1"] = value
         if alm2.dtype.metadata:
             for key, value in alm2.dtype.metadata.items():
-                if key == "bias":
+                if key == "bias" or key == "bcor":
                     pass
                 else:
                     md[f"{key}_2"] = value
         if bias is not None:
             md["bias"] = bias
+        if bcor is not None:
+            md["bcor"] = bcor
 
         # debias cl if asked to
         if debias and bias is not None:
diff --git a/tests/test_fields.py b/tests/test_fields.py
index 3472f64..8d3417e 100644
--- a/tests/test_fields.py
+++ b/tests/test_fields.py
@@ -263,6 +263,7 @@ def test_scalar_field(mapper, catalog):
     w = next(iter(catalog))["w"]
     v = next(iter(catalog))["g1"]
     v2 = ((w * v) ** 2).sum()
+    neff = w.sum() ** 2 / (w**2).sum()
     w = w.reshape(w.size // 4, 4).sum(axis=-1)
     wbar = w.mean()
     bias = (4 * np.pi / npix / npix) * v2
@@ -275,6 +276,7 @@ def test_scalar_field(mapper, catalog):
         "kernel": "healpix",
         "nside": mapper.nside,
         "bias": pytest.approx(bias / wbar**2),
+        "bcor": pytest.approx(4 * np.pi / neff),
     }
     np.testing.assert_array_almost_equal(m, 0)
 
@@ -291,6 +293,7 @@ def test_complex_field(mapper, catalog):
     re = next(iter(catalog))["g1"]
     im = next(iter(catalog))["g2"]
     v2 = ((w * re) ** 2 + (w * im) ** 2).sum()
+    neff = w.sum() ** 2 / (w**2).sum()
     w = w.reshape(w.size // 4, 4).sum(axis=-1)
     wbar = w.mean()
     bias = (4 * np.pi / npix / npix) * v2 / 2
@@ -303,6 +306,7 @@ def test_complex_field(mapper, catalog):
         "kernel": "healpix",
         "nside": mapper.nside,
         "bias": pytest.approx(bias / wbar**2),
+        "bcor": pytest.approx(2 * np.pi / neff),
     }
     np.testing.assert_array_almost_equal(m, 0)