Skip to content

Commit

Permalink
implement histograms with variable binning
Browse files Browse the repository at this point in the history
  • Loading branch information
ManuelHu committed Jul 13, 2024
1 parent 96e6f81 commit 61cf866
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 58 deletions.
16 changes: 11 additions & 5 deletions src/lgdo/lh5/_serializers/read/composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,11 +418,17 @@ def _h5_read_histogram(
decompress,
)
isdensity = struct.isdensity.value
binning = [
(a.binedges.first, a.binedges.last, a.binedges.step, a.closedleft)
for _, a in struct.binning.items()
]
binning = [tuple(v.value for v in b) for b in binning]
binning = []
for _, a in struct.binning.items():
be = a.binedges
if isinstance(be, Struct):
b = (None, be.first.value, be.last.value, be.step.value, a.closedleft.value)
elif isinstance(be, Array):
b = (be, None, None, None, a.closedleft.value)
else:
msg = "unexpected binning of histogram"
raise LH5DecodeError(msg, h5g)
binning.append(Histogram.Axis(*b))
weights = struct.weights.view_as("np")
histogram = Histogram(weights, binning, isdensity)

Expand Down
135 changes: 94 additions & 41 deletions src/lgdo/types/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ class Histogram(Struct):
class Axis(Struct):
def __init__(
self,
first: float,
last: float,
step: float,
edges: np.ndarray | Array | None,
first: float | None,
last: float | None,
step: float | None,
closedleft: bool = True,
binedge_attrs: dict[str, Any] | None = None,
) -> None:
Expand All @@ -39,38 +40,67 @@ def __init__(
attributes that will be added to the ``binedges`` LGDO that
is part of the axis struct.
"""
super().__init__(
{
"binedges": Struct(
{
"first": Scalar(first),
"last": Scalar(last),
"step": Scalar(step),
},
binedge_attrs,
),
"closedleft": Scalar(closedleft),
},
)
if edges is not None and (
first is not None or last is not None or step is not None
):
msg = "can only construct Axis either from edges or from range"
raise ValueError(msg)
if edges is None and (first is None or last is None or step is None):
msg = "did not pass all range parameters"
raise ValueError(msg)

if edges is None:
edges = Struct(
{
"first": Scalar(first),
"last": Scalar(last),
"step": Scalar(step),
},
binedge_attrs,
)
else:
if not isinstance(edges, Array):
edges = Array(edges, attrs=binedge_attrs)
elif binedge_attrs is not None:
msg = "passed both binedge types.Array instance and binedge_attrs"
raise ValueError(msg)

if len(edges.nda.shape) != 1:
msg = "must pass an array<1>{real} as edges vector"
raise ValueError(msg)

super().__init__({"binedges": edges, "closedleft": Scalar(closedleft)})

@classmethod
def from_edges(cls, edges: np.ndarray) -> Histogram.Axis:
edge_diff = np.diff(edges)
if np.any(~np.isclose(edge_diff, edge_diff[0])):
msg = "only evenly distributed edges can be converted"
raise ValueError(msg)
return cls(edges[0], edges[-1], edge_diff[0], True)
return cls(edges, None, None, None, True)
return cls(None, edges[0], edges[-1], edge_diff[0], True)

@property
def is_range(self) -> bool:
return isinstance(self["binedges"], Struct)

@property
def first(self) -> float:
if not self.is_range:
msg = "Axis is not a range"
raise TypeError(msg)
return self["binedges"]["first"].value

@property
def last(self) -> float:
if not self.is_range:
msg = "Axis is not a range"
raise TypeError(msg)
return self["binedges"]["last"].value

@property
def step(self) -> float:
if not self.is_range:
msg = "Axis is not a range"
raise TypeError(msg)
return self["binedges"]["step"].value

@property
Expand All @@ -79,24 +109,43 @@ def closedleft(self) -> bool:

@property
def nbins(self) -> int:
bins = (self.last - self.first) / self.step
bins_int = int(np.rint(bins))
assert np.isclose(bins, bins_int)
return bins_int
if self.is_range:
bins = (self.last - self.first) / self.step
bins_int = int(np.rint(bins))
assert np.isclose(bins, bins_int)
return bins_int
return len(self["binedges"].nda) - 1

@property
def edges(self) -> np.ndarray:
if self.is_range:
return np.linspace(self.first, self.last, self.nbins + 1)
return self["binedges"].nda

def __str__(self) -> str:
return (
f"first={self.first}, last={self.last}, step={self.step}, "
+ f"closedleft={self.closedleft}"
)
thr_orig = np.get_printoptions()["threshold"]
np.set_printoptions(threshold=8)

if self.is_range:
string = f"first={self.first}, last={self.last}, step={self.step}"
else:
string = f"edges={self.edges}"
string += f", closedleft={self.closedleft}"

attrs = self["binedges"].getattrs()
if attrs:
string += f" with attrs={attrs}"

np.set_printoptions(threshold=thr_orig)
return string

def __init__(
self,
weights: hist.Hist | np.ndarray,
binning: None
| list[Histogram.Axis]
| list[np.ndarray]
| list[tuple[float, float, float, bool]] = None,
| list[tuple[float, float, float]] = None,
isdensity: bool = False,
attrs: dict[str, Any] | None = None,
) -> None:
Expand All @@ -112,7 +161,7 @@ def __init__(
binning
* has to by None if a :class:`hist.Hist` has been passed as ``weights``
* can be a list of pre-initialized :class:`Histogram.Axis`
* can be a list of tuples, representing arguments to :meth:`Histogram.Axis.__init__()`
* can be a list of tuples, representing a range, ``(first, last, step)``
* can be a list of numpy arrays, as returned by :func:`numpy.histogramdd`.
"""
if isinstance(weights, hist.Hist):
Expand All @@ -127,15 +176,15 @@ def __init__(
msg = "flow bins of hist.Hist cannot be represented"
raise ValueError(msg)
weights_view = weights.view(flow=False)
if not isinstance(weights_view, np.ndarray):
msg = "only numpy-backed storages can be used in a hist.Hist"
if type(weights_view) is not np.ndarray:
msg = "only simple numpy-backed storages can be used in a hist.Hist"
raise ValueError(msg)
w = Array(weights_view)

b = []
for ax in weights.axes:
if not isinstance(ax, hist.axis.Regular):
msg = "only regular axes of hist.Hist can be converted"
if not isinstance(ax, (hist.axis.Regular, hist.axis.Variable)):
msg = "only regular or variable axes of hist.Hist can be converted"
raise ValueError(msg)
b.append(Histogram.Axis.from_edges(ax.edges))
b = self._create_binning(b)
Expand All @@ -150,7 +199,7 @@ def __init__(
elif all(isinstance(ax, np.ndarray) for ax in binning):
b = [Histogram.Axis.from_edges(ax) for ax in binning]
elif all(isinstance(ax, tuple) for ax in binning):
b = [Histogram.Axis(*ax) for ax in binning]
b = [Histogram.Axis(None, *ax, True) for ax in binning]
else:
msg = "invalid binning object passed"
raise ValueError(msg)
Expand Down Expand Up @@ -254,23 +303,27 @@ def view_as(
if not a.closedleft:
msg = "hist.Hist cannot represent right-closed intervals"
raise ValueError(msg)
hist_axes.append(
hist.axis.Regular(
if a.is_range:
hist_ax = hist.axis.Regular(
bins=a.nbins,
start=a.first,
stop=a.last,
underflow=False,
overflow=False,
)
)
else:
hist_ax = hist.axis.Variable(
a.edges,
underflow=False,
overflow=False,
)
hist_axes.append(hist_ax)

return hist.Hist(*hist_axes, data=self.weights.view_as("np"))

if library == "np":
edges = []
for a in self.binning:
edges.append(np.linspace(a.first, a.last, a.nbins))
return self.weights.view_as("np"), tuple(edges)
edges = tuple([a.edges for a in self.binning])
return self.weights.view_as("np"), edges

msg = f"{library!r} is not a supported third-party format."
raise TypeError(msg)
46 changes: 46 additions & 0 deletions tests/lh5/test_lh5_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,49 @@ def test_write_histogram(caplog, tmptestdir):
write_start=1,
group="my_group",
)


def test_write_histogram_variable(caplog, tmptestdir):
caplog.set_level(logging.DEBUG)
caplog.clear()

# Start with an types.Histogram
if os.path.exists(f"{tmptestdir}/write_histogram_test.lh5"):
os.remove(f"{tmptestdir}/write_histogram_test.lh5")

h1 = types.Histogram(
np.array([[1, 1], [1, 1]]), (np.array([0, 1.2, 2]), np.array([2.1, 2.5, 2.3]))
)
h2 = types.Histogram(
np.array([[10, 10], [10, 10]]),
(np.array([2, 3.5, 4]), np.array([5, 6.5, 7])),
isdensity=True,
) # Same field name, different values
store = lh5.LH5Store()
store.write(
h1,
"my_histogram",
f"{tmptestdir}/write_histogram_test.lh5",
group="my_group",
wo_mode="write_safe",
)
store.write(
h2,
"my_histogram",
f"{tmptestdir}/write_histogram_test.lh5",
wo_mode="overwrite",
group="my_group",
)

# Now, check that the data were overwritten
h3, _ = store.read(
"my_group/my_histogram", f"{tmptestdir}/write_histogram_test.lh5"
)
assert np.array_equal(h3.weights.nda, np.array([[10, 10], [10, 10]]))
assert np.array_equal(h3.binning[0].edges, np.array([2, 3.5, 4]))
with pytest.raises(TypeError):
x = h3.binning[0].first
with pytest.raises(TypeError):
x = h3.binning[1].last # noqa: F841
assert not h3.binning[0].is_range
assert h3.isdensity
Loading

0 comments on commit 61cf866

Please sign in to comment.