Fix quantizer (#163)

simple-crypto · May 21, 2024 · 3db680e · 3db680e
1 parent 4be758f
commit 3db680e
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 72 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,7 @@ Not released
 ------------
 
 * Raise minimum supported python version to 3.9.
+* Add ``scalib.preprocessing.Quantizer``.
 
 v0.5.7 (2024/03/18)
 -------------------

diff --git a/src/scalib/metrics/information.py b/src/scalib/metrics/information.py
@@ -67,7 +67,7 @@ class RLDAInformationEstimator:
     """
 
     def __init__(self, model: RLDAClassifier.ClusteredModel, max_popped_classes: int):
-        """
+        r"""
         Parameters
         ----------
         model

diff --git a/src/scalib/preprocessing/quantization.py b/src/scalib/preprocessing/quantization.py
@@ -1,111 +1,138 @@
 import numpy as np
+import numpy.typing as npt
 from enum import Enum, auto
 
 
-class QuantFitMethod(Enum):
-    r"""An enum class used to specify how the maximum and minimum of the traces is estimated based on a set of fitting traces.
-    With method = QuantFitMethod.BOUNDS they are estimated as the minimum and maximum of the fitting trace respectively.
-    With method = QuantFitMethod.MOMENT they are estimated as the average of the fitting traces minus/plus seven standard deviations
+class QFitMethod(Enum):
+    r"""An enum class used to specify how the maximum and minimum of the traces
+    is estimated based on a set of fitting traces.
+
+    With method = QuantFitMethod.BOUNDS they are estimated as the minimum and
+    maximum of the fitting trace respectively.
+    With method = QuantFitMethod.MOMENT they are estimated as the average of
+    the fitting traces minus/plus seven standard deviations
     """
 
     BOUNDS = auto()
     MOMENT = auto()
 
 
-class Quantizer:
-    r"""Quantize a side channel traces given as an array of float into an array of int16.
-        The quantizer estimates a shift and scale that minimize the loss due to the rounding operation.
+class QuantFitMethod:
+    """Method for esimating the scale and shift parameters of Quantizer."""
 
-    .. math::
-        \mathrm{Quantize}( x) = (x - \mathrm{Shift}) \cdot \mathrm{Scale}
+    @classmethod
+    def bounds(cls, margin=2.0):
+        """Take the min and max of the training traces, fit such that the [min,
+        max] range is mapped to a zero-centered interval covering a ``1/margin``
+        fraction of the quantized domain: if the quantized domain is
+        ``[-Q,Q]``, ``min` is mapped to ``-Q/margin`` and ``max`` is mapped to
+        ``Q/margin``.
+        """
+        return cls(QFitMethod.BOUNDS, margin=margin)
 
-    The shift and scale are vectors whose j-th coordinate is computed using `n` samples as
+    @classmethod
+    def moment(cls, nstd=7.0):
+        """Take the mean and standard deviation of the training traces, fit
+        such that ``mean-nstd*std`` is mapped to `-Q` and ``mean+nstd*std`` is
+        mapped to `Q`, where `[-Q, Q]` is the quantized domain.
+        """
+        return cls(QFitMethod.MOMENT, nstd=nstd)
+
+    def __init__(self, method: QFitMethod, **kwargs):
+        self.method = method
+        self.opts = kwargs
+
+
+class Quantizer:
+    r"""Quantize a side channel traces given as an array of float into an array
+    of int16.
+
+    The quantizer estimates a shift and scale that minimize the loss due to the
+    rounding operation.
 
     .. math::
-        \mathrm{Shift}_j = \frac{1}{2} (\max_{i=1}^n x_{i,j} + \min_{i=1}^n x_{i,j}) \qquad and \qquad  \mathrm{Scale}_j = \frac{2^{14}}{\max_{i=1}^n x_{i,j} - \min_{i=1}^n x_{i,j}}.
+        \mathrm{Quantize}( x) = \mathrm{Round}((x - \mathrm{Shift}) \cdot \mathrm{Scale})
+
+    The shift and scale parameter can be provided explicitly, or can be
+    estimated based on a few traces.
 
     Warning
     ^^^^^^^
 
-    The quantization procedure operates pointwise: each point is shifted and scaled by a different value.
-    As a consequence the quantized version of the trace probably does not look like its non quantized version.
+    The quantization procedure operates pointwise: each point is shifted and
+    scaled by a different value.
+    As a consequence the quantized version of the trace probably does not look
+    like its non quantized version.
 
     Parameters
     ----------
-    shift : np.ndarray[np.floating]
+    shift : npt.NDArray[np.floating]
         The value to shift every traces.
-    scale : np.ndarray[np.floating]
+    scale : npt.NDArray[np.floating]
         The value to scale every traces.
 
     Examples
     --------
     >>> from scalib.preprocessing import Quantizer
     >>> import numpy as np
     >>> # 500 traces of 200 points
-    >>> traces : np.ndarray[np.floating] = np.random.randn(500,200)
+    >>> traces : npt.NDArray[np.floating] = np.random.randn(500,200)
     >>> quantizer = Quantizer.fit(traces)
-    >>> quantized_traces : np.ndarray[np.int16] = quantizer.quantize(traces)
+    >>> quantized_traces : npt.NDArray[np.int16] = quantizer.quantize(traces)
     >>> # Can be reused directly on 5000 new traces for instance
-    >>> traces : np.ndarray[np.floating] = np.random.randn(5000,200)
-    >>> quantized_traces : np.ndarray[np.int16] = quantizer.quantize(traces)
+    >>> traces : npt.NDArray[np.floating] = np.random.randn(5000,200)
+    >>> quantized_traces : npt.NDArray[np.int16] = quantizer.quantize(traces)
     """
 
-    def __init__(self, shift: np.ndarray[np.floating], scale: np.ndarray[np.floating]):
+    def __init__(
+        self, shift: npt.NDArray[np.floating], scale: npt.NDArray[np.floating]
+    ):
         self._shift = shift
         self._scale = scale
 
     @classmethod
     def fit(
         cls,
-        traces: np.ndarray[np.floating],
-        method: QuantFitMethod = QuantFitMethod.MOMENT,
+        traces: npt.NDArray[np.floating],
+        method: QuantFitMethod = QuantFitMethod.bounds(),
     ):
         r"""Compute the shift and scale estimation from sample of `traces`
-        This class method returns an instance of Quantizer with the corresponding shift and scale.
+
+        This class method returns an instance of Quantizer with the
+        corresponding shift and scale.
 
         Parameters
         ----------
         traces : array_like, np.floating
-            Array that contains the traces to estimate the shift and scale in the quantization. The array must
-            be of dimension `(n, ns)`
+            Array that contains the traces to estimate the shift and scale in
+            the quantization. The array must be of dimension `(n, ns)`
         method : QuantFitMethod
-            A member of QuantFitMethod enum class that specifies how the minimum and maximum value of the trace to be quantized is estimated.
+            A member of QuantFitMethod enum class that specifies how the
+            minimum and maximum value of the trace to be quantized is
+            estimated.
         """
 
-        if method == QuantFitMethod.BOUNDS:
-            # Max/Min Centering and Multiplication by a constant prior to quantization to avoid information loss via rounding error
-            max: np.ndarray[np.floating] = np.amax(traces, axis=0)
-            min: np.ndarray[np.floating] = np.amin(traces, axis=0)
-
-        elif method == QuantFitMethod.MOMENT:
+        if method.method == QFitMethod.BOUNDS:
+            # Max/Min Centering and Multiplication by a constant prior to
+            # quantization to avoid information loss via rounding error
+            max = np.amax(traces, axis=0)
+            min = np.amin(traces, axis=0)
+            shift = (max + min) / 2
+            scale = 2**15 / ((max - min) / 2) / method.opts["margin"]
+        elif method.method == QFitMethod.MOMENT:
             # Gaussian Methods
-            mean: np.ndarray[np.floating] = np.amax(traces, axis=0)
-            std: np.ndarray[np.floating] = np.std(traces, axis=0, ddof=1)
-
-            # Conservative confidence interval.
-            min: np.ndarray[np.floating] = mean - 7 * std
-            max: np.ndarray[np.floating] = mean + 7 * std
-
+            mean = np.mean(traces, axis=0)
+            std = np.std(traces, axis=0, ddof=1)
+            shift = mean
+            scale = 2**15 / (method.opts["nstd"] * std)
         else:
-            raise ValueError(
-                "Method should be a member of QuantFitMethod enum class such as QuantFitMethod.MOMENT or QuantFitMethod.BOUNDS"
-            )
-
-        # Derive shift and scale accordingly to center the traces
-        shift: np.ndarray[np.floating] = (max + min) / 2
-        width: np.ndarray[np.floating] = (max - min) / 2
-        scale: np.ndarray[np.floating] = (
-            2**14
-        ) / width  # 2**14 instead of 2**15 as a safety margin.
-
-        # Create Quantizer
-        quantizer = cls(shift, scale)
+            raise ValueError("method.method should be a QFitMethod object")
 
-        return quantizer
+        return cls(shift, scale)
 
     def quantize(
-        self, traces: np.ndarray[np.floating], clip: bool = False
-    ) -> np.ndarray[np.int16]:
+        self, traces: npt.NDArray[np.floating], clip: bool = False
+    ) -> npt.NDArray[np.int16]:
         r"""Quantize the traces provide in `traces`
 
         Parameters
@@ -114,20 +141,22 @@ def quantize(
             Array that contains the traces to be quantized into int16. The array must
             be of dimension `(n, ns)`
         clip : bool
-            Boolean to bypass the overflow check prior to quantization and clip the overflowing values to the boundaries.
+            Boolean to bypass the overflow check prior to quantization and clip
+            the overflowing values to the boundaries.
             By default it is set to False.
         """
-        adjusted_traces: np.ndarray[np.floating] = (traces - self._shift) * self._scale
+        adjusted_traces: npt.NDArray[np.floating] = (traces - self._shift) * self._scale
         if clip:
             adjusted_traces = np.clip(adjusted_traces, -(2**15), 2**15 - 1)
         else:
-            overflow: bool = (adjusted_traces > 2**15 - 1).any() or (
+            overflow = (adjusted_traces > 2**15 - 1).any() or (
                 adjusted_traces < -(2**15)
             ).any()
             if overflow:
                 raise ValueError(
-                    "Overflow detected in the quantization. Update shift and scale more precisely to avoid the error. "
+                    "Overflow detected in the quantization. Update shift and "
+                    "scale more precisely to avoid the error."
                 )
 
-        quantized_traces: np.ndarray[np.int16] = adjusted_traces.astype(np.int16)
+        quantized_traces: npt.NDArray[np.int16] = adjusted_traces.astype(np.int16)
         return quantized_traces
diff --git a/tests/test_quantizer.py b/tests/test_quantizer.py
@@ -4,21 +4,22 @@
 
 
 def test_quantizer():
-    fitting_traces: np.ndarray[np.float64] = np.random.randn(500, 200)
-    traces: np.ndarray[np.float64] = np.random.randn(5000, 200)
+    ns = 200
+    fitting_traces = np.random.randn(500, ns)
+    traces = np.random.randn(5000, ns)
 
-    quantizer = Quantizer.fit(fitting_traces, QuantFitMethod.MOMENT)
-    quantized_traces: np.ndarray[np.int16] = quantizer.quantize(traces)
+    quantizer = Quantizer.fit(fitting_traces, QuantFitMethod.moment())
+    quantized_traces = quantizer.quantize(traces)
 
-    quantized_traces: np.ndarray[np.int16] = quantizer.quantize(8 * traces, True)
+    quantized_traces = quantizer.quantize(8 * traces, True)
     with pytest.raises(ValueError):
-        quantized_traces: np.ndarray[np.int16] = quantizer.quantize(8 * traces)
+        quantized_traces = quantizer.quantize(8 * traces)
 
-    quantizer = Quantizer.fit(fitting_traces, QuantFitMethod.BOUNDS)
-    quantized_traces: np.ndarray[np.int16] = quantizer.quantize(traces)
+    quantizer = Quantizer.fit(fitting_traces, QuantFitMethod.bounds(4.0))
+    quantized_traces = quantizer.quantize(traces)
 
-    reconstruction: np.ndarray[np.float64] = (
-        quantized_traces / quantizer._scale + quantizer._shift
-    ).astype(np.float64)
+    reconstruction = (quantized_traces / quantizer._scale + quantizer._shift).astype(
+        np.float64
+    )
     reconstruction_error: np.float64 = np.linalg.norm(traces - reconstruction, axis=1)
     assert (reconstruction_error <= 10**-2).all()
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ Not released @@
     ------------
     * Raise minimum supported python version to 3.9.
+    * Add ``scalib.preprocessing.Quantizer``.
     v0.5.7 (2024/03/18)
     -------------------
@@ Expand Down @@