diff --git a/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java b/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java index 944e237f..bf88aca8 100644 --- a/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java +++ b/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java @@ -352,7 +352,7 @@ public abstract static class Params { abstract InputType inputType(); - abstract Integer maxContributions(); + abstract int maxContributions(); public abstract Builder toBuilder(); @@ -378,7 +378,7 @@ public abstract static class Builder { public abstract Builder inputType(InputType inputType); /** The maximum number of contributions each privacy unit can make to the dataset. */ - public abstract Builder maxContributions(Integer value); + public abstract Builder maxContributions(int value); abstract Params autoBuild(); diff --git a/privacy-on-beam/pbeam/mean.go b/privacy-on-beam/pbeam/mean.go index 96e769bc..4a373fad 100644 --- a/privacy-on-beam/pbeam/mean.go +++ b/privacy-on-beam/pbeam/mean.go @@ -194,9 +194,7 @@ func MeanPerKey(s beam.Scope, pcol PrivatePCollection, params MeanParams) beam.P // Combine all values for into a slice. // Result is PCollection. - combined := beam.CombinePerKey(s, - &expandFloat64ValuesCombineFn{}, - converted) + combined := beam.CombinePerKey(s, &expandFloat64ValuesCombineFn{}, converted) // Result is PCollection. rekeyed := beam.ParDo(s, rekeyArrayFloat64, combined) diff --git a/privacy-on-beam/pbeam/mean_test.go b/privacy-on-beam/pbeam/mean_test.go index d59a713a..a7dc0e07 100644 --- a/privacy-on-beam/pbeam/mean_test.go +++ b/privacy-on-beam/pbeam/mean_test.go @@ -138,7 +138,11 @@ func TestBoundedMeanFnAddInput(t *testing.T) { delta := 1e-23 minValue := 0.0 maxValue := 5.0 - spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) + spec := privacySpec(t, PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + }) fn, err := newBoundedMeanFn(*spec, MeanParams{ AggregationEpsilon: epsilon, PartitionSelectionParams: PartitionSelectionParams{Epsilon: epsilon, Delta: delta}, @@ -185,7 +189,11 @@ func TestBoundedMeanFnMergeAccumulators(t *testing.T) { delta := 1e-23 minValue := 0.0 maxValue := 5.0 - spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) + spec := privacySpec(t, PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + }) fn, err := newBoundedMeanFn(*spec, MeanParams{ AggregationEpsilon: epsilon, PartitionSelectionParams: PartitionSelectionParams{Epsilon: epsilon, Delta: delta}, @@ -221,12 +229,15 @@ func TestBoundedMeanFnMergeAccumulators(t *testing.T) { exactCount := 4.0 exactMean := exactSum / exactCount want := testutils.Float64Ptr(exactMean) - tolerance, err := testutils.LaplaceToleranceForMean(23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } if !cmp.Equal(want, got, cmpopts.EquateApprox(0, tolerance)) { - t.Errorf("MergeAccumulators: when merging 2 instances of boundedMeanAccum got: %f, want %f", *got, *want) + t.Errorf("MergeAccumulators: when merging 2 instances of boundedMeanAccum got: %f, want %f", + *got, *want) } } @@ -495,7 +506,7 @@ func TestMeanPerKeyNoNoiseFloat(t *testing.T) { exactCount := 250.0 exactMean := (1.3*100 + 2.5*150) / exactCount result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -551,7 +562,7 @@ func TestMeanPerKeyNoNoiseInt(t *testing.T) { exactCount := 250.0 exactMean := (100.0 + 2.0*150.0) / exactCount result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) @@ -584,13 +595,16 @@ func TestMeanPerKeyNoNoiseInt(t *testing.T) { // Assert want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 150.0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 150.0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyNoNoiseInt: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyNoNoiseInt: MeanPerKey(%v) = %v, want %v, error %v", + col, got, want, err) } } @@ -640,7 +654,7 @@ func TestMeanPerKeyWithPartitionsNoNoiseFloat(t *testing.T) { exactCount := 7.0 exactMean := 14.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, // Partition 1 will be dropped because it's not in the list of public partitions. } publicPartitionsSlice := []int{0} @@ -678,13 +692,16 @@ func TestMeanPerKeyWithPartitionsNoNoiseFloat(t *testing.T) { // Assert want = beam.ParDo(s, testutils.PairIF64ToKV, want) exactNormalizedSum := (2.0 - (tc.maxValue+tc.minValue)/2) * exactCount - tolerance, err := testutils.LaplaceToleranceForMean(24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, exactNormalizedSum, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, exactNormalizedSum, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean test case=%+v: got error %v", tc, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseFloat test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) + t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseFloat test case=%+v: "+ + "MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) } } } @@ -748,7 +765,7 @@ func TestMeanPerKeyWithPartitionsNoNoiseInt(t *testing.T) { result := []testutils.PairIF64{ // Partition 0 will be dropped because it's not in the list of public partitions. - {1, exactMean}, + {Key: 1, Value: exactMean}, } publicPartitionsSlice := []int{1} @@ -776,13 +793,16 @@ func TestMeanPerKeyWithPartitionsNoNoiseInt(t *testing.T) { want = beam.ParDo(s, testutils.PairIF64ToKV, want) exactNormalizedSum := (1.0-(tc.maxValue+tc.minValue)/2)*100 + (2.0-(tc.maxValue+tc.minValue)/2)*150 - tolerance, err := testutils.LaplaceToleranceForMean(23, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, exactNormalizedSum, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 23, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, exactNormalizedSum, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: test case=%+v got error %v", tc, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseInt test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) + t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseInt test case=%+v: "+ + "MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) } } } @@ -805,7 +825,7 @@ func TestMeanPerKeyCountsPrivacyUnitIDsWithMultipleContributionsCorrectly(t *tes exactCount := 11.0 exactMean := 1.3 result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -836,7 +856,9 @@ func TestMeanPerKeyCountsPrivacyUnitIDsWithMultipleContributionsCorrectly(t *tes }) want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -7.7, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -7.7, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } @@ -889,7 +911,8 @@ func TestMeanPartitionSelection(t *testing.T) { t.Run(tc.name, func(t *testing.T) { // Verify that entriesPerPartition is sensical. if tc.entriesPerPartition <= 0 { - t.Fatalf("Invalid test case: entriesPerPartition must be positive. Got: %d", tc.entriesPerPartition) + t.Fatalf("Invalid test case: entriesPerPartition must be positive. Got: %d", + tc.entriesPerPartition) } // Build up {ID, Partition, Value} pairs such that for each of the tc.numPartitions partitions, @@ -904,7 +927,8 @@ func TestMeanPartitionSelection(t *testing.T) { ) for i := 0; i < tc.numPartitions; i++ { for j := 0; j < tc.entriesPerPartition; j++ { - triples = append(triples, testutils.TripleWithFloatValue{ID: kOffset + j, Partition: i, Value: 1.0}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: kOffset + j, Partition: i, Value: 1.0}) } kOffset += tc.entriesPerPartition } @@ -945,9 +969,9 @@ func TestMeanKeyNegativeBounds(t *testing.T) { testutils.MakeTripleWithFloatValueStartingFromKey(100, 150, 1, -1.0)) exactCount := 250.0 - exactMean := (-5.0*100 - 1.0*150) / exactCount + exactMean := (-5.0*100 - 2.0*150) / exactCount // 1.0 is clamped down to -2.0 result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) @@ -979,13 +1003,16 @@ func TestMeanKeyNegativeBounds(t *testing.T) { }) want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 200.0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 200.0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyNegativeBounds: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyNegativeBounds: MeanPerKey(%v) = %v, want %v, error %v", + col, got, want, err) } } @@ -1007,7 +1034,7 @@ func TestMeanPerKeyCrossPartitionContributionBounding(t *testing.T) { exactCount := 51.0 exactMean := 150.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1044,18 +1071,23 @@ func TestMeanPerKeyCrossPartitionContributionBounding(t *testing.T) { want = beam.ParDo(s, testutils.PairIF64ToKV, want) // Tolerance for the partition with an extra contribution which is equal to 150. - tolerance1, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 + tolerance1, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } // Tolerance for the partition without an extra contribution. - tolerance2, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3700.0, 50.0, 0.0) // ≈1.074 + tolerance2, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3700.0, 50.0, 0.0) // ≈1.074 if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance1+tolerance2) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyCrossPartitionContributionBounding: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyCrossPartitionContributionBounding: MeanPerKey(%v) = %v, "+ + "want %v, error %v", col, got, want, err) } } @@ -1075,7 +1107,7 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { exactCount := 51.0 exactMean := 50.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) @@ -1111,13 +1143,16 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { got = beam.AddFixedKey(s, sumOverPartitions) // Adds a fixed key of 0. want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -2500.0, exactCount, exactMean) // ≈0.92 + tolerance, err := testutils.LaplaceToleranceForMean( + 23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -2500.0, exactCount, exactMean) // ≈0.92 if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyPerPartitionContributionBounding: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyPerPartitionContributionBounding: MeanPerKey(%v) = %v, "+ + "want %v, error %v", col, got, want, err) } } @@ -1125,7 +1160,8 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { func TestMeanPerKeyReturnsNonNegative(t *testing.T) { var triples []testutils.TripleWithFloatValue for key := 0; key < 100; key++ { - triples = append(triples, testutils.TripleWithFloatValue{key, key, 0.01}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: key, Partition: key, Value: 0.01}) } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1170,7 +1206,8 @@ func TestMeanPerKeyWithPartitionsReturnsNonNegative(t *testing.T) { } { var triples []testutils.TripleWithFloatValue for key := 0; key < 100; key++ { - triples = append(triples, testutils.TripleWithFloatValue{key, key, 0.01}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: key, Partition: key, Value: 0.01}) } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1206,7 +1243,8 @@ func TestMeanPerKeyWithPartitionsReturnsNonNegative(t *testing.T) { values := beam.DropKey(s, means) beam.ParDo0(s, testutils.CheckNoNegativeValuesFloat64, values) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsReturnsNonNegativeFloat64 in-memory=%t returned errors: %v", tc.inMemory, err) + t.Errorf("TestMeanPerKeyWithPartitionsReturnsNonNegativeFloat64 in-memory=%t "+ + "returned errors: %v", tc.inMemory, err) } } } @@ -1217,7 +1255,8 @@ func TestMeanPerKeyNoClampingForNegativeMinValue(t *testing.T) { // The probability that any given partition has a negative noisy mean is 1/2 * 0.999. // The probability of none of the partitions having a noisy negative mean is 1 - (1/2 * 0.999)^100, which is negligible. for key := 0; key < 100; key++ { - triples = append(triples, testutils.TripleWithFloatValue{key, key, 0}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: key, Partition: key, Value: 0}) } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1274,7 +1313,7 @@ func TestMeanPerKeyWithPartitionsCrossPartitionContributionBounding(t *testing.T exactCount := 51.0 exactMean := 150.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, } publicPartitionsSlice := []int{0, 1} @@ -1314,18 +1353,23 @@ func TestMeanPerKeyWithPartitionsCrossPartitionContributionBounding(t *testing.T want = beam.ParDo(s, testutils.PairIF64ToKV, want) // Tolerance for the partition with an extra contribution which is equal to 150. - tolerance1, err := testutils.LaplaceToleranceForMean(25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 + tolerance1, err := testutils.LaplaceToleranceForMean( + 25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 if err != nil { t.Fatalf("LaplaceToleranceForMean in-memory=%t: got error %v", tc.inMemory, err) } // Tolerance for the partition without an extra contribution. - tolerance2, err := testutils.LaplaceToleranceForMean(25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3700.0, 50.0, 0.0) // ≈1.074 + tolerance2, err := testutils.LaplaceToleranceForMean( + 25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3700.0, 50.0, 0.0) // ≈1.074 if err != nil { t.Fatalf("LaplaceToleranceForMean in-memory=%t: got error %v", tc.inMemory, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance1+tolerance2) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsPerPartitionContributionBounding in-memory=%t: MeanPerKey(%v) = %v, want %v, error %v", tc.inMemory, col, got, want, err) + t.Errorf("TestMeanPerKeyWithPartitionsPerPartitionContributionBounding in-memory=%t: "+ + "MeanPerKey(%v) = %v, want %v, error %v", tc.inMemory, col, got, want, err) } } } @@ -1383,9 +1427,9 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { epsilon := 50.0 result := []testutils.PairIF64{ - {1, midpoint}, - {2, midpoint}, - {3, midpoint}, + {Key: 1, Value: midpoint}, + {Key: 2, Value: midpoint}, + {Key: 3, Value: midpoint}, } publicPartitionsSlice := []int{1, 2, 3} @@ -1412,13 +1456,16 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { got := MeanPerKey(s, pcol, meanParams) want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 0.0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 0.0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean test case=%+v: got error %v", tc, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithEmptyPartitionsNoNoise test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) + t.Errorf("TestMeanPerKeyWithEmptyPartitionsNoNoise test case=%+v: MeanPerKey(%v) = %v, "+ + "want %v, error %v", tc, col, got, want, err) } } } diff --git a/python/dp_accounting/dp_accounting/pld/pld_pmf.py b/python/dp_accounting/dp_accounting/pld/pld_pmf.py index 504b77ed..e798abd7 100644 --- a/python/dp_accounting/dp_accounting/pld/pld_pmf.py +++ b/python/dp_accounting/dp_accounting/pld/pld_pmf.py @@ -35,11 +35,36 @@ _MAX_PMF_SPARSE_SIZE = 1000 +def _get_delta_for_epsilon(infinity_mass: float, + losses: Sequence[float], + probs: Sequence[float], + epsilon: float) -> float: + """Computes the epsilon-hockey stick divergence. + + Args: + infinity_mass: The probability of the infinite loss. + losses: The privacy losses, assumed to be sorted in ascending order. + probs: The probabilities corresponding to losses. + epsilon: The epsilon in the epsilon-hockey stick divergence. + + Returns: + The epsilon-hockey stick divergence. + """ + # delta is inf_mass + sum_{loss} max(0, 1 - exp(epsilon - loss)) * prob + losses = np.asarray(losses) + probs = np.asarray(probs) + indices = losses > epsilon + return ( + infinity_mass + + np.dot(-np.expm1(epsilon - losses[indices]), probs[indices]) + ) + + def _get_delta_for_epsilon_vectorized(infinity_mass: float, losses: Sequence[float], probs: Sequence[float], epsilons: Sequence[float]) -> np.ndarray: - """Computes the epsilon-hockey stick divergence. + """Computes the epsilon-hockey stick divergence for multiple epsilons. Args: infinity_mass: the probability of the infinite loss. @@ -346,15 +371,11 @@ def get_delta_for_epsilon( """Computes the epsilon-hockey stick divergence.""" losses = (np.arange(self.size) + self._lower_loss) * self._discretization - is_scalar = isinstance(epsilon, numbers.Number) - if is_scalar: - epsilon = [epsilon] - - delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, - self._probs, epsilon) - if is_scalar: - delta = delta[0] - return delta + if isinstance(epsilon, numbers.Number): + return _get_delta_for_epsilon(self._infinity_mass, losses, + self._probs, epsilon) + return _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, + self._probs, epsilon) def get_epsilon_for_delta(self, delta: float) -> float: """Computes epsilon for which hockey stick divergence is at most delta.""" @@ -499,15 +520,11 @@ def get_delta_for_epsilon( self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]: """Computes the epsilon-hockey stick divergence.""" losses, probs = self._get_losses_probs() - is_scalar = isinstance(epsilon, numbers.Number) - if is_scalar: - epsilon = [epsilon] - - delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, - probs, epsilon) - if is_scalar: - delta = delta[0] - return delta + if isinstance(epsilon, numbers.Number): + return _get_delta_for_epsilon(self._infinity_mass, losses, probs, epsilon) + + return _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, + probs, epsilon) def get_epsilon_for_delta(self, delta: float) -> float: """Computes epsilon for which hockey stick divergence is at most delta.""" diff --git a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py index 2476a398..8e1855ae 100644 --- a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py +++ b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py @@ -75,16 +75,21 @@ def _compute_log_a_int(q: float, sigma: float, alpha: int) -> float: return log_a +_MAX_STEPS_LOG_A_FRAC = 1000 + + def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float: """Computes log(A_alpha) for fractional alpha, 0 < q < 1.""" + # Computation derived in Sec 3.3 of https://arxiv.org/pdf/1908.10530. # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are # initialized to 0 in the log space: log_a0, log_a1 = -np.inf, -np.inf z0 = sigma**2 * math.log(1 / q - 1) + .5 log1mq = math.log1p(-q) - i = 0 - while True: # do ... until loop + last_s0 = last_s1 = -np.inf + + for i in range(_MAX_STEPS_LOG_A_FRAC): log_coef = _log_comb(alpha, i) j = alpha - i @@ -100,11 +105,30 @@ def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float: log_a0 = _log_add(log_a0, log_s0) log_a1 = _log_add(log_a1, log_s1) - i += 1 - if max(log_s0, log_s1) < -30: - break - - return _log_add(log_a0, log_a1) + total = _log_add(log_a0, log_a1) + + # Terminate when both s0 and s1 are decreasing and sufficiently small + # relative to total. + if ( + log_s0 < last_s0 + and log_s1 < last_s1 + and max(log_s0, log_s1) < total - 30 + ): + return total + + last_s0 = log_s0 + last_s1 = log_s1 + + logging.warning( + '_compute_log_a_frac failed to converge after %d iterations with q=%f' + ', sigma=%f, alpha=%f. Excluding this order from the epsilon ' + 'computation.', + _MAX_STEPS_LOG_A_FRAC, + q, + sigma, + alpha, + ) + return np.inf def _log_erfc(x: float) -> float: diff --git a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py index 11418db4..34c2b76a 100644 --- a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py +++ b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py @@ -732,6 +732,22 @@ def test_repeat_and_select_gaussian_poisson(self, sigma, mean): lb = min(rdp[j] for j in range(len(orders)) if orders[j] >= order) self.assertLessEqual(lb, accountant_rdp) + def test_log_a_frac_positive(self): + # Testing a combination of q, sigma and alpha that formerly returned a + # negative log_a_frac. + for order in np.linspace(58.5, 59.5, 21): + log_a = rdp_privacy_accountant._compute_log_a_frac(0.4, 12, order) + self.assertGreater(log_a, 0) + + def test_log_a_frac_early_termination(self): + # Test an event that is known to not converge for small orders. + event = dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0)) + accountant = rdp_privacy_accountant.RdpAccountant() + with self.assertLogs(level='WARNING') as log: + accountant.compose(event) + self.assertNotEmpty([l for l in log.output if 'failed to converge' in l]) + self.assertIn(np.inf, accountant._rdp) + if __name__ == '__main__': absltest.main() diff --git a/python/dp_accounting/requirements.txt b/python/dp_accounting/requirements.txt index 89264f11..6cb58dad 100644 --- a/python/dp_accounting/requirements.txt +++ b/python/dp_accounting/requirements.txt @@ -2,7 +2,7 @@ # the dependenices from `../learning/requirements.txt`. absl-py~=1.0 -attrs>=22 +attrs>=22,<24 dm-tree~=0.1.8 mpmath~=1.2 numpy~=1.21