Skip to content

Commit

Permalink
More accurate RDP computation and other improvements to Accounting
Browse files Browse the repository at this point in the history
Python Accounting:
- [PLD] Add a standalone method for computing hockey stick divergence for a single `epsilon`. This is done to address the commonly occurring use case of computing `delta` for a single `epsilon`.
- [RDP] More stable loop termination criterion in fractional RDP order computation. Previously the loop sometimes terminated too early, resulting in underestimates of the RDP at some orders. Now it will run to convergence in most cases, and in case of too many iterations, it will return an RDP of inf at that order, guaranteeing that the resulting epsilon is a true upper bound.
- [Requirements] Change `attrs` version from `>=22` to `>=22,<24`.

Java:
- Change maxContributions from Integer to int in ApproximateBounds

Privacy on Beam:
- Formatting changes in mean.go & mean_test.go

Change-Id: I7322d5f5438ce5fe45670180470054664c297c3c
GitOrigin-RevId: afd7fcc5d03cce8632305943c37ba70fd9a9bb23
  • Loading branch information
Differential Privacy Team authored and miracvbasaran committed Sep 24, 2024
1 parent 156c8fb commit 6f7deb3
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ public abstract static class Params {

abstract InputType inputType();

abstract Integer maxContributions();
abstract int maxContributions();

public abstract Builder toBuilder();

Expand All @@ -378,7 +378,7 @@ public abstract static class Builder {
public abstract Builder inputType(InputType inputType);

/** The maximum number of contributions each privacy unit can make to the dataset. */
public abstract Builder maxContributions(Integer value);
public abstract Builder maxContributions(int value);

abstract Params autoBuild();

Expand Down
4 changes: 1 addition & 3 deletions privacy-on-beam/pbeam/mean.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,7 @@ func MeanPerKey(s beam.Scope, pcol PrivatePCollection, params MeanParams) beam.P

// Combine all values for <id, partition> into a slice.
// Result is PCollection<kv.Pair{ID,K},[]float64>.
combined := beam.CombinePerKey(s,
&expandFloat64ValuesCombineFn{},
converted)
combined := beam.CombinePerKey(s, &expandFloat64ValuesCombineFn{}, converted)

// Result is PCollection<ID, pairArrayFloat64>.
rekeyed := beam.ParDo(s, rekeyArrayFloat64, combined)
Expand Down
131 changes: 89 additions & 42 deletions privacy-on-beam/pbeam/mean_test.go

Large diffs are not rendered by default.

55 changes: 36 additions & 19 deletions python/dp_accounting/dp_accounting/pld/pld_pmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,36 @@
_MAX_PMF_SPARSE_SIZE = 1000


def _get_delta_for_epsilon(infinity_mass: float,
losses: Sequence[float],
probs: Sequence[float],
epsilon: float) -> float:
"""Computes the epsilon-hockey stick divergence.
Args:
infinity_mass: The probability of the infinite loss.
losses: The privacy losses, assumed to be sorted in ascending order.
probs: The probabilities corresponding to losses.
epsilon: The epsilon in the epsilon-hockey stick divergence.
Returns:
The epsilon-hockey stick divergence.
"""
# delta is inf_mass + sum_{loss} max(0, 1 - exp(epsilon - loss)) * prob
losses = np.asarray(losses)
probs = np.asarray(probs)
indices = losses > epsilon
return (
infinity_mass +
np.dot(-np.expm1(epsilon - losses[indices]), probs[indices])
)


def _get_delta_for_epsilon_vectorized(infinity_mass: float,
losses: Sequence[float],
probs: Sequence[float],
epsilons: Sequence[float]) -> np.ndarray:
"""Computes the epsilon-hockey stick divergence.
"""Computes the epsilon-hockey stick divergence for multiple epsilons.
Args:
infinity_mass: the probability of the infinite loss.
Expand Down Expand Up @@ -346,15 +371,11 @@ def get_delta_for_epsilon(
"""Computes the epsilon-hockey stick divergence."""
losses = (np.arange(self.size) + self._lower_loss) * self._discretization

is_scalar = isinstance(epsilon, numbers.Number)
if is_scalar:
epsilon = [epsilon]

delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses,
self._probs, epsilon)
if is_scalar:
delta = delta[0]
return delta
if isinstance(epsilon, numbers.Number):
return _get_delta_for_epsilon(self._infinity_mass, losses,
self._probs, epsilon)
return _get_delta_for_epsilon_vectorized(self._infinity_mass, losses,
self._probs, epsilon)

def get_epsilon_for_delta(self, delta: float) -> float:
"""Computes epsilon for which hockey stick divergence is at most delta."""
Expand Down Expand Up @@ -499,15 +520,11 @@ def get_delta_for_epsilon(
self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]:
"""Computes the epsilon-hockey stick divergence."""
losses, probs = self._get_losses_probs()
is_scalar = isinstance(epsilon, numbers.Number)
if is_scalar:
epsilon = [epsilon]

delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses,
probs, epsilon)
if is_scalar:
delta = delta[0]
return delta
if isinstance(epsilon, numbers.Number):
return _get_delta_for_epsilon(self._infinity_mass, losses, probs, epsilon)

return _get_delta_for_epsilon_vectorized(self._infinity_mass, losses,
probs, epsilon)

def get_epsilon_for_delta(self, delta: float) -> float:
"""Computes epsilon for which hockey stick divergence is at most delta."""
Expand Down
38 changes: 31 additions & 7 deletions python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,21 @@ def _compute_log_a_int(q: float, sigma: float, alpha: int) -> float:
return log_a


_MAX_STEPS_LOG_A_FRAC = 1000


def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float:
"""Computes log(A_alpha) for fractional alpha, 0 < q < 1."""
# Computation derived in Sec 3.3 of https://arxiv.org/pdf/1908.10530.
# The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are
# initialized to 0 in the log space:
log_a0, log_a1 = -np.inf, -np.inf
z0 = sigma**2 * math.log(1 / q - 1) + .5
log1mq = math.log1p(-q)

i = 0
while True: # do ... until loop
last_s0 = last_s1 = -np.inf

for i in range(_MAX_STEPS_LOG_A_FRAC):
log_coef = _log_comb(alpha, i)
j = alpha - i

Expand All @@ -100,11 +105,30 @@ def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float:
log_a0 = _log_add(log_a0, log_s0)
log_a1 = _log_add(log_a1, log_s1)

i += 1
if max(log_s0, log_s1) < -30:
break

return _log_add(log_a0, log_a1)
total = _log_add(log_a0, log_a1)

# Terminate when both s0 and s1 are decreasing and sufficiently small
# relative to total.
if (
log_s0 < last_s0
and log_s1 < last_s1
and max(log_s0, log_s1) < total - 30
):
return total

last_s0 = log_s0
last_s1 = log_s1

logging.warning(
'_compute_log_a_frac failed to converge after %d iterations with q=%f'
', sigma=%f, alpha=%f. Excluding this order from the epsilon '
'computation.',
_MAX_STEPS_LOG_A_FRAC,
q,
sigma,
alpha,
)
return np.inf


def _log_erfc(x: float) -> float:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,22 @@ def test_repeat_and_select_gaussian_poisson(self, sigma, mean):
lb = min(rdp[j] for j in range(len(orders)) if orders[j] >= order)
self.assertLessEqual(lb, accountant_rdp)

def test_log_a_frac_positive(self):
# Testing a combination of q, sigma and alpha that formerly returned a
# negative log_a_frac.
for order in np.linspace(58.5, 59.5, 21):
log_a = rdp_privacy_accountant._compute_log_a_frac(0.4, 12, order)
self.assertGreater(log_a, 0)

def test_log_a_frac_early_termination(self):
# Test an event that is known to not converge for small orders.
event = dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0))
accountant = rdp_privacy_accountant.RdpAccountant()
with self.assertLogs(level='WARNING') as log:
accountant.compose(event)
self.assertNotEmpty([l for l in log.output if 'failed to converge' in l])
self.assertIn(np.inf, accountant._rdp)


if __name__ == '__main__':
absltest.main()
2 changes: 1 addition & 1 deletion python/dp_accounting/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# the dependenices from `../learning/requirements.txt`.

absl-py~=1.0
attrs>=22
attrs>=22,<24
dm-tree~=0.1.8
mpmath~=1.2
numpy~=1.21
Expand Down

0 comments on commit 6f7deb3

Please sign in to comment.