Add double mixture privacy losses

DP Accounting changes: * Add accounting for mechanisms dominated by a pair of two mixture distributions
google · Nov 13, 2024 · 9dfed70 · 9dfed70
1 parent 0a80e37
commit 9dfed70
Show file tree

Hide file tree

Showing 2 changed files with 1,088 additions and 317 deletions.
diff --git a/python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py b/python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py
@@ -1330,6 +1330,138 @@ def single_discrete_gaussian_pld(
                                        sampling_prob)
 
 
+def from_double_mixture_gaussian_mechanism(
+    standard_deviation: float,
+    sensitivities_upper: Sequence[float],
+    sensitivities_lower: Sequence[float],
+    sampling_probs_upper: Sequence[float],
+    sampling_probs_lower: Sequence[float],
+    pessimistic_estimate: bool = True,
+    value_discretization_interval: float = 1e-4,
+    log_mass_truncation_bound: float = -50,
+    use_connect_dots: bool = True,
+) -> PrivacyLossDistribution:
+  """Creates the pld of a Double Mixture of Gaussians mechanism.
+
+  This method supports two algorithms for constructing the privacy loss
+  distribution. One given by the "Privacy Buckets" algorithm and other given by
+  "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary
+  material for more details.
+
+  Args:
+    standard_deviation: the standard_deviation of the Gaussian distribution.
+    sensitivities_upper: the support of the first mixture's
+      sensitivity distribution.
+      Must be the same length as sampling_probs_upper, and both should be 1D.
+    sensitivities_lower: the support of the second mixture's
+      sensitivity distribution.
+      Must be the same length as sampling_probs_lower, and both should be 1D.
+    sampling_probs_upper: the probabilities associated with sensitivities_upper.
+    sampling_probs_lower: the probabilities associated with sensitivities_lower.
+    pessimistic_estimate: a value indicating whether the rounding is done in
+      such a way that the resulting epsilon-hockey stick divergence computation
+      gives an upper estimate to the real value.
+    value_discretization_interval: the length of the dicretization interval for
+      the privacy loss distribution. The values will be rounded up/down to be
+      integer multiples of this number. Smaller value results in more accurate
+      estimates of the privacy loss, at the cost of increased run-time / memory
+      usage.
+    log_mass_truncation_bound: the ln of the probability mass that might be
+      discarded from the noise distribution. The larger this number, the more
+      error it may introduce in divergence calculations.
+    use_connect_dots: When True (default), the connect-the-dots algorithm will
+      be used to construct the privacy loss distribution. When False, the
+      privacy buckets algorithm will be used.
+
+  Returns:
+    The privacy loss distribution corresponding to the Mixture of Gaussians
+    mechanism with given parameters.
+  """
+
+  pmf = _create_pld_pmf_from_additive_noise(
+        privacy_loss_mechanism.DoubleMixtureGaussianPrivacyLoss(
+            standard_deviation,
+            sensitivities_upper,
+            sensitivities_lower,
+            sampling_probs_upper,
+            sampling_probs_lower,
+            pessimistic_estimate=pessimistic_estimate,
+            log_mass_truncation_bound=log_mass_truncation_bound,
+        ),
+        pessimistic_estimate=pessimistic_estimate,
+        value_discretization_interval=value_discretization_interval,
+        use_connect_dots=use_connect_dots,
+    )
+
+  return PrivacyLossDistribution(pmf)
+
+
+def from_double_mixture_laplace_mechanism(
+    scale: float,
+    sensitivities_upper: Sequence[float],
+    sensitivities_lower: Sequence[float],
+    sampling_probs_upper: Sequence[float],
+    sampling_probs_lower: Sequence[float],
+    pessimistic_estimate: bool = True,
+    value_discretization_interval: float = 1e-4,
+    log_mass_truncation_bound: float = -50,
+    use_connect_dots: bool = True,
+) -> PrivacyLossDistribution:
+  """Creates the pld of a Double Mixture of Laplace mechanism.
+
+  This method supports two algorithms for constructing the privacy loss
+  distribution. One given by the "Privacy Buckets" algorithm and other given by
+  "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary
+  material for more details.
+
+  Args:
+    scale: the scale of the Laplace distribution.
+    sensitivities_upper: the support of the first mixture's
+      sensitivity distribution.
+      Must be the same length as sampling_probs_upper, and both should be 1D.
+    sensitivities_lower: the support of the second mixture's
+      sensitivity distribution.
+      Must be the same length as sampling_probs_lower, and both should be 1D.
+    sampling_probs_upper: the probabilities associated with sensitivities_upper.
+    sampling_probs_lower: the probabilities associated with sensitivities_lower.
+    pessimistic_estimate: a value indicating whether the rounding is done in
+      such a way that the resulting epsilon-hockey stick divergence computation
+      gives an upper estimate to the real value.
+    value_discretization_interval: the length of the dicretization interval for
+      the privacy loss distribution. The values will be rounded up/down to be
+      integer multiples of this number. Smaller value results in more accurate
+      estimates of the privacy loss, at the cost of increased run-time / memory
+      usage.
+    log_mass_truncation_bound: the ln of the probability mass that might be
+      discarded from the noise distribution. The larger this number, the more
+      error it may introduce in divergence calculations.
+    use_connect_dots: When True (default), the connect-the-dots algorithm will
+      be used to construct the privacy loss distribution. When False, the
+      privacy buckets algorithm will be used.
+
+  Returns:
+    The privacy loss distribution corresponding to the Mixture of Gaussians
+    mechanism with given parameters.
+  """
+
+  pmf = _create_pld_pmf_from_additive_noise(
+        privacy_loss_mechanism.DoubleMixtureLaplacePrivacyLoss(
+            scale,
+            sensitivities_upper,
+            sensitivities_lower,
+            sampling_probs_upper,
+            sampling_probs_lower,
+            pessimistic_estimate=pessimistic_estimate,
+            log_mass_truncation_bound=log_mass_truncation_bound,
+        ),
+        pessimistic_estimate=pessimistic_estimate,
+        value_discretization_interval=value_discretization_interval,
+        use_connect_dots=use_connect_dots,
+    )
+
+  return PrivacyLossDistribution(pmf)
+
+
 def from_mixture_gaussian_mechanism(
     standard_deviation: float,
     sensitivities: Sequence[float],