From 6f7deb3108699dc9849ac0692c5e2b7546578190 Mon Sep 17 00:00:00 2001 From: Differential Privacy Team Date: Wed, 18 Sep 2024 18:38:44 -0700 Subject: [PATCH] More accurate RDP computation and other improvements to Accounting Python Accounting: - [PLD] Add a standalone method for computing hockey stick divergence for a single `epsilon`. This is done to address the commonly occurring use case of computing `delta` for a single `epsilon`. - [RDP] More stable loop termination criterion in fractional RDP order computation. Previously the loop sometimes terminated too early, resulting in underestimates of the RDP at some orders. Now it will run to convergence in most cases, and in case of too many iterations, it will return an RDP of inf at that order, guaranteeing that the resulting epsilon is a true upper bound. - [Requirements] Change `attrs` version from `>=22` to `>=22,<24`. Java: - Change maxContributions from Integer to int in ApproximateBounds Privacy on Beam: - Formatting changes in mean.go & mean_test.go Change-Id: I7322d5f5438ce5fe45670180470054664c297c3c GitOrigin-RevId: afd7fcc5d03cce8632305943c37ba70fd9a9bb23 --- .../ApproximateBounds.java | 4 +- privacy-on-beam/pbeam/mean.go | 4 +- privacy-on-beam/pbeam/mean_test.go | 131 ++++++++++++------ .../dp_accounting/pld/pld_pmf.py | 55 +++++--- .../rdp/rdp_privacy_accountant.py | 38 ++++- .../rdp/rdp_privacy_accountant_test.py | 16 +++ python/dp_accounting/requirements.txt | 2 +- 7 files changed, 176 insertions(+), 74 deletions(-) diff --git a/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java b/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java index 944e237f..bf88aca8 100644 --- a/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java +++ b/java/main/com/google/privacy/differentialprivacy/ApproximateBounds.java @@ -352,7 +352,7 @@ public abstract static class Params { abstract InputType inputType(); - abstract Integer maxContributions(); + abstract int maxContributions(); public abstract Builder toBuilder(); @@ -378,7 +378,7 @@ public abstract static class Builder { public abstract Builder inputType(InputType inputType); /** The maximum number of contributions each privacy unit can make to the dataset. */ - public abstract Builder maxContributions(Integer value); + public abstract Builder maxContributions(int value); abstract Params autoBuild(); diff --git a/privacy-on-beam/pbeam/mean.go b/privacy-on-beam/pbeam/mean.go index 96e769bc..4a373fad 100644 --- a/privacy-on-beam/pbeam/mean.go +++ b/privacy-on-beam/pbeam/mean.go @@ -194,9 +194,7 @@ func MeanPerKey(s beam.Scope, pcol PrivatePCollection, params MeanParams) beam.P // Combine all values for into a slice. // Result is PCollection. - combined := beam.CombinePerKey(s, - &expandFloat64ValuesCombineFn{}, - converted) + combined := beam.CombinePerKey(s, &expandFloat64ValuesCombineFn{}, converted) // Result is PCollection. rekeyed := beam.ParDo(s, rekeyArrayFloat64, combined) diff --git a/privacy-on-beam/pbeam/mean_test.go b/privacy-on-beam/pbeam/mean_test.go index d59a713a..a7dc0e07 100644 --- a/privacy-on-beam/pbeam/mean_test.go +++ b/privacy-on-beam/pbeam/mean_test.go @@ -138,7 +138,11 @@ func TestBoundedMeanFnAddInput(t *testing.T) { delta := 1e-23 minValue := 0.0 maxValue := 5.0 - spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) + spec := privacySpec(t, PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + }) fn, err := newBoundedMeanFn(*spec, MeanParams{ AggregationEpsilon: epsilon, PartitionSelectionParams: PartitionSelectionParams{Epsilon: epsilon, Delta: delta}, @@ -185,7 +189,11 @@ func TestBoundedMeanFnMergeAccumulators(t *testing.T) { delta := 1e-23 minValue := 0.0 maxValue := 5.0 - spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) + spec := privacySpec(t, PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + }) fn, err := newBoundedMeanFn(*spec, MeanParams{ AggregationEpsilon: epsilon, PartitionSelectionParams: PartitionSelectionParams{Epsilon: epsilon, Delta: delta}, @@ -221,12 +229,15 @@ func TestBoundedMeanFnMergeAccumulators(t *testing.T) { exactCount := 4.0 exactMean := exactSum / exactCount want := testutils.Float64Ptr(exactMean) - tolerance, err := testutils.LaplaceToleranceForMean(23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } if !cmp.Equal(want, got, cmpopts.EquateApprox(0, tolerance)) { - t.Errorf("MergeAccumulators: when merging 2 instances of boundedMeanAccum got: %f, want %f", *got, *want) + t.Errorf("MergeAccumulators: when merging 2 instances of boundedMeanAccum got: %f, want %f", + *got, *want) } } @@ -495,7 +506,7 @@ func TestMeanPerKeyNoNoiseFloat(t *testing.T) { exactCount := 250.0 exactMean := (1.3*100 + 2.5*150) / exactCount result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -551,7 +562,7 @@ func TestMeanPerKeyNoNoiseInt(t *testing.T) { exactCount := 250.0 exactMean := (100.0 + 2.0*150.0) / exactCount result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) @@ -584,13 +595,16 @@ func TestMeanPerKeyNoNoiseInt(t *testing.T) { // Assert want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 150.0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 150.0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyNoNoiseInt: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyNoNoiseInt: MeanPerKey(%v) = %v, want %v, error %v", + col, got, want, err) } } @@ -640,7 +654,7 @@ func TestMeanPerKeyWithPartitionsNoNoiseFloat(t *testing.T) { exactCount := 7.0 exactMean := 14.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, // Partition 1 will be dropped because it's not in the list of public partitions. } publicPartitionsSlice := []int{0} @@ -678,13 +692,16 @@ func TestMeanPerKeyWithPartitionsNoNoiseFloat(t *testing.T) { // Assert want = beam.ParDo(s, testutils.PairIF64ToKV, want) exactNormalizedSum := (2.0 - (tc.maxValue+tc.minValue)/2) * exactCount - tolerance, err := testutils.LaplaceToleranceForMean(24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, exactNormalizedSum, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, exactNormalizedSum, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean test case=%+v: got error %v", tc, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseFloat test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) + t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseFloat test case=%+v: "+ + "MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) } } } @@ -748,7 +765,7 @@ func TestMeanPerKeyWithPartitionsNoNoiseInt(t *testing.T) { result := []testutils.PairIF64{ // Partition 0 will be dropped because it's not in the list of public partitions. - {1, exactMean}, + {Key: 1, Value: exactMean}, } publicPartitionsSlice := []int{1} @@ -776,13 +793,16 @@ func TestMeanPerKeyWithPartitionsNoNoiseInt(t *testing.T) { want = beam.ParDo(s, testutils.PairIF64ToKV, want) exactNormalizedSum := (1.0-(tc.maxValue+tc.minValue)/2)*100 + (2.0-(tc.maxValue+tc.minValue)/2)*150 - tolerance, err := testutils.LaplaceToleranceForMean(23, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, exactNormalizedSum, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 23, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, exactNormalizedSum, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: test case=%+v got error %v", tc, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseInt test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) + t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseInt test case=%+v: "+ + "MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) } } } @@ -805,7 +825,7 @@ func TestMeanPerKeyCountsPrivacyUnitIDsWithMultipleContributionsCorrectly(t *tes exactCount := 11.0 exactMean := 1.3 result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -836,7 +856,9 @@ func TestMeanPerKeyCountsPrivacyUnitIDsWithMultipleContributionsCorrectly(t *tes }) want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -7.7, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -7.7, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } @@ -889,7 +911,8 @@ func TestMeanPartitionSelection(t *testing.T) { t.Run(tc.name, func(t *testing.T) { // Verify that entriesPerPartition is sensical. if tc.entriesPerPartition <= 0 { - t.Fatalf("Invalid test case: entriesPerPartition must be positive. Got: %d", tc.entriesPerPartition) + t.Fatalf("Invalid test case: entriesPerPartition must be positive. Got: %d", + tc.entriesPerPartition) } // Build up {ID, Partition, Value} pairs such that for each of the tc.numPartitions partitions, @@ -904,7 +927,8 @@ func TestMeanPartitionSelection(t *testing.T) { ) for i := 0; i < tc.numPartitions; i++ { for j := 0; j < tc.entriesPerPartition; j++ { - triples = append(triples, testutils.TripleWithFloatValue{ID: kOffset + j, Partition: i, Value: 1.0}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: kOffset + j, Partition: i, Value: 1.0}) } kOffset += tc.entriesPerPartition } @@ -945,9 +969,9 @@ func TestMeanKeyNegativeBounds(t *testing.T) { testutils.MakeTripleWithFloatValueStartingFromKey(100, 150, 1, -1.0)) exactCount := 250.0 - exactMean := (-5.0*100 - 1.0*150) / exactCount + exactMean := (-5.0*100 - 2.0*150) / exactCount // 1.0 is clamped down to -2.0 result := []testutils.PairIF64{ - {1, exactMean}, + {Key: 1, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) @@ -979,13 +1003,16 @@ func TestMeanKeyNegativeBounds(t *testing.T) { }) want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 200.0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 200.0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyNegativeBounds: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyNegativeBounds: MeanPerKey(%v) = %v, want %v, error %v", + col, got, want, err) } } @@ -1007,7 +1034,7 @@ func TestMeanPerKeyCrossPartitionContributionBounding(t *testing.T) { exactCount := 51.0 exactMean := 150.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1044,18 +1071,23 @@ func TestMeanPerKeyCrossPartitionContributionBounding(t *testing.T) { want = beam.ParDo(s, testutils.PairIF64ToKV, want) // Tolerance for the partition with an extra contribution which is equal to 150. - tolerance1, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 + tolerance1, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } // Tolerance for the partition without an extra contribution. - tolerance2, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3700.0, 50.0, 0.0) // ≈1.074 + tolerance2, err := testutils.LaplaceToleranceForMean( + 24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3700.0, 50.0, 0.0) // ≈1.074 if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance1+tolerance2) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyCrossPartitionContributionBounding: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyCrossPartitionContributionBounding: MeanPerKey(%v) = %v, "+ + "want %v, error %v", col, got, want, err) } } @@ -1075,7 +1107,7 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { exactCount := 51.0 exactMean := 50.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, } p, s, col, want := ptest.CreateList2(triples, result) @@ -1111,13 +1143,16 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { got = beam.AddFixedKey(s, sumOverPartitions) // Adds a fixed key of 0. want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -2500.0, exactCount, exactMean) // ≈0.92 + tolerance, err := testutils.LaplaceToleranceForMean( + 23, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -2500.0, exactCount, exactMean) // ≈0.92 if err != nil { t.Fatalf("LaplaceToleranceForMean: got error %v", err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyPerPartitionContributionBounding: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) + t.Errorf("TestMeanPerKeyPerPartitionContributionBounding: MeanPerKey(%v) = %v, "+ + "want %v, error %v", col, got, want, err) } } @@ -1125,7 +1160,8 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { func TestMeanPerKeyReturnsNonNegative(t *testing.T) { var triples []testutils.TripleWithFloatValue for key := 0; key < 100; key++ { - triples = append(triples, testutils.TripleWithFloatValue{key, key, 0.01}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: key, Partition: key, Value: 0.01}) } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1170,7 +1206,8 @@ func TestMeanPerKeyWithPartitionsReturnsNonNegative(t *testing.T) { } { var triples []testutils.TripleWithFloatValue for key := 0; key < 100; key++ { - triples = append(triples, testutils.TripleWithFloatValue{key, key, 0.01}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: key, Partition: key, Value: 0.01}) } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1206,7 +1243,8 @@ func TestMeanPerKeyWithPartitionsReturnsNonNegative(t *testing.T) { values := beam.DropKey(s, means) beam.ParDo0(s, testutils.CheckNoNegativeValuesFloat64, values) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsReturnsNonNegativeFloat64 in-memory=%t returned errors: %v", tc.inMemory, err) + t.Errorf("TestMeanPerKeyWithPartitionsReturnsNonNegativeFloat64 in-memory=%t "+ + "returned errors: %v", tc.inMemory, err) } } } @@ -1217,7 +1255,8 @@ func TestMeanPerKeyNoClampingForNegativeMinValue(t *testing.T) { // The probability that any given partition has a negative noisy mean is 1/2 * 0.999. // The probability of none of the partitions having a noisy negative mean is 1 - (1/2 * 0.999)^100, which is negligible. for key := 0; key < 100; key++ { - triples = append(triples, testutils.TripleWithFloatValue{key, key, 0}) + triples = append(triples, testutils.TripleWithFloatValue{ + ID: key, Partition: key, Value: 0}) } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) @@ -1274,7 +1313,7 @@ func TestMeanPerKeyWithPartitionsCrossPartitionContributionBounding(t *testing.T exactCount := 51.0 exactMean := 150.0 / exactCount result := []testutils.PairIF64{ - {0, exactMean}, + {Key: 0, Value: exactMean}, } publicPartitionsSlice := []int{0, 1} @@ -1314,18 +1353,23 @@ func TestMeanPerKeyWithPartitionsCrossPartitionContributionBounding(t *testing.T want = beam.ParDo(s, testutils.PairIF64ToKV, want) // Tolerance for the partition with an extra contribution which is equal to 150. - tolerance1, err := testutils.LaplaceToleranceForMean(25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 + tolerance1, err := testutils.LaplaceToleranceForMean( + 25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3675.0, 51.0, exactMean) // ≈0.00367 if err != nil { t.Fatalf("LaplaceToleranceForMean in-memory=%t: got error %v", tc.inMemory, err) } // Tolerance for the partition without an extra contribution. - tolerance2, err := testutils.LaplaceToleranceForMean(25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, -3700.0, 50.0, 0.0) // ≈1.074 + tolerance2, err := testutils.LaplaceToleranceForMean( + 25, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, -3700.0, 50.0, 0.0) // ≈1.074 if err != nil { t.Fatalf("LaplaceToleranceForMean in-memory=%t: got error %v", tc.inMemory, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance1+tolerance2) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsPerPartitionContributionBounding in-memory=%t: MeanPerKey(%v) = %v, want %v, error %v", tc.inMemory, col, got, want, err) + t.Errorf("TestMeanPerKeyWithPartitionsPerPartitionContributionBounding in-memory=%t: "+ + "MeanPerKey(%v) = %v, want %v, error %v", tc.inMemory, col, got, want, err) } } } @@ -1383,9 +1427,9 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { epsilon := 50.0 result := []testutils.PairIF64{ - {1, midpoint}, - {2, midpoint}, - {3, midpoint}, + {Key: 1, Value: midpoint}, + {Key: 2, Value: midpoint}, + {Key: 3, Value: midpoint}, } publicPartitionsSlice := []int{1, 2, 3} @@ -1412,13 +1456,16 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { got := MeanPerKey(s, pcol, meanParams) want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 0.0, exactCount, exactMean) + tolerance, err := testutils.LaplaceToleranceForMean( + 24, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, + epsilon, 0.0, exactCount, exactMean) if err != nil { t.Fatalf("LaplaceToleranceForMean test case=%+v: got error %v", tc, err) } testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithEmptyPartitionsNoNoise test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) + t.Errorf("TestMeanPerKeyWithEmptyPartitionsNoNoise test case=%+v: MeanPerKey(%v) = %v, "+ + "want %v, error %v", tc, col, got, want, err) } } } diff --git a/python/dp_accounting/dp_accounting/pld/pld_pmf.py b/python/dp_accounting/dp_accounting/pld/pld_pmf.py index 504b77ed..e798abd7 100644 --- a/python/dp_accounting/dp_accounting/pld/pld_pmf.py +++ b/python/dp_accounting/dp_accounting/pld/pld_pmf.py @@ -35,11 +35,36 @@ _MAX_PMF_SPARSE_SIZE = 1000 +def _get_delta_for_epsilon(infinity_mass: float, + losses: Sequence[float], + probs: Sequence[float], + epsilon: float) -> float: + """Computes the epsilon-hockey stick divergence. + + Args: + infinity_mass: The probability of the infinite loss. + losses: The privacy losses, assumed to be sorted in ascending order. + probs: The probabilities corresponding to losses. + epsilon: The epsilon in the epsilon-hockey stick divergence. + + Returns: + The epsilon-hockey stick divergence. + """ + # delta is inf_mass + sum_{loss} max(0, 1 - exp(epsilon - loss)) * prob + losses = np.asarray(losses) + probs = np.asarray(probs) + indices = losses > epsilon + return ( + infinity_mass + + np.dot(-np.expm1(epsilon - losses[indices]), probs[indices]) + ) + + def _get_delta_for_epsilon_vectorized(infinity_mass: float, losses: Sequence[float], probs: Sequence[float], epsilons: Sequence[float]) -> np.ndarray: - """Computes the epsilon-hockey stick divergence. + """Computes the epsilon-hockey stick divergence for multiple epsilons. Args: infinity_mass: the probability of the infinite loss. @@ -346,15 +371,11 @@ def get_delta_for_epsilon( """Computes the epsilon-hockey stick divergence.""" losses = (np.arange(self.size) + self._lower_loss) * self._discretization - is_scalar = isinstance(epsilon, numbers.Number) - if is_scalar: - epsilon = [epsilon] - - delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, - self._probs, epsilon) - if is_scalar: - delta = delta[0] - return delta + if isinstance(epsilon, numbers.Number): + return _get_delta_for_epsilon(self._infinity_mass, losses, + self._probs, epsilon) + return _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, + self._probs, epsilon) def get_epsilon_for_delta(self, delta: float) -> float: """Computes epsilon for which hockey stick divergence is at most delta.""" @@ -499,15 +520,11 @@ def get_delta_for_epsilon( self, epsilon: Union[float, Sequence[float]]) -> Union[float, np.ndarray]: """Computes the epsilon-hockey stick divergence.""" losses, probs = self._get_losses_probs() - is_scalar = isinstance(epsilon, numbers.Number) - if is_scalar: - epsilon = [epsilon] - - delta = _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, - probs, epsilon) - if is_scalar: - delta = delta[0] - return delta + if isinstance(epsilon, numbers.Number): + return _get_delta_for_epsilon(self._infinity_mass, losses, probs, epsilon) + + return _get_delta_for_epsilon_vectorized(self._infinity_mass, losses, + probs, epsilon) def get_epsilon_for_delta(self, delta: float) -> float: """Computes epsilon for which hockey stick divergence is at most delta.""" diff --git a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py index 2476a398..8e1855ae 100644 --- a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py +++ b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py @@ -75,16 +75,21 @@ def _compute_log_a_int(q: float, sigma: float, alpha: int) -> float: return log_a +_MAX_STEPS_LOG_A_FRAC = 1000 + + def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float: """Computes log(A_alpha) for fractional alpha, 0 < q < 1.""" + # Computation derived in Sec 3.3 of https://arxiv.org/pdf/1908.10530. # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are # initialized to 0 in the log space: log_a0, log_a1 = -np.inf, -np.inf z0 = sigma**2 * math.log(1 / q - 1) + .5 log1mq = math.log1p(-q) - i = 0 - while True: # do ... until loop + last_s0 = last_s1 = -np.inf + + for i in range(_MAX_STEPS_LOG_A_FRAC): log_coef = _log_comb(alpha, i) j = alpha - i @@ -100,11 +105,30 @@ def _compute_log_a_frac(q: float, sigma: float, alpha: float) -> float: log_a0 = _log_add(log_a0, log_s0) log_a1 = _log_add(log_a1, log_s1) - i += 1 - if max(log_s0, log_s1) < -30: - break - - return _log_add(log_a0, log_a1) + total = _log_add(log_a0, log_a1) + + # Terminate when both s0 and s1 are decreasing and sufficiently small + # relative to total. + if ( + log_s0 < last_s0 + and log_s1 < last_s1 + and max(log_s0, log_s1) < total - 30 + ): + return total + + last_s0 = log_s0 + last_s1 = log_s1 + + logging.warning( + '_compute_log_a_frac failed to converge after %d iterations with q=%f' + ', sigma=%f, alpha=%f. Excluding this order from the epsilon ' + 'computation.', + _MAX_STEPS_LOG_A_FRAC, + q, + sigma, + alpha, + ) + return np.inf def _log_erfc(x: float) -> float: diff --git a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py index 11418db4..34c2b76a 100644 --- a/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py +++ b/python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py @@ -732,6 +732,22 @@ def test_repeat_and_select_gaussian_poisson(self, sigma, mean): lb = min(rdp[j] for j in range(len(orders)) if orders[j] >= order) self.assertLessEqual(lb, accountant_rdp) + def test_log_a_frac_positive(self): + # Testing a combination of q, sigma and alpha that formerly returned a + # negative log_a_frac. + for order in np.linspace(58.5, 59.5, 21): + log_a = rdp_privacy_accountant._compute_log_a_frac(0.4, 12, order) + self.assertGreater(log_a, 0) + + def test_log_a_frac_early_termination(self): + # Test an event that is known to not converge for small orders. + event = dp_event.PoissonSampledDpEvent(0.1, dp_event.GaussianDpEvent(1.0)) + accountant = rdp_privacy_accountant.RdpAccountant() + with self.assertLogs(level='WARNING') as log: + accountant.compose(event) + self.assertNotEmpty([l for l in log.output if 'failed to converge' in l]) + self.assertIn(np.inf, accountant._rdp) + if __name__ == '__main__': absltest.main() diff --git a/python/dp_accounting/requirements.txt b/python/dp_accounting/requirements.txt index 89264f11..6cb58dad 100644 --- a/python/dp_accounting/requirements.txt +++ b/python/dp_accounting/requirements.txt @@ -2,7 +2,7 @@ # the dependenices from `../learning/requirements.txt`. absl-py~=1.0 -attrs>=22 +attrs>=22,<24 dm-tree~=0.1.8 mpmath~=1.2 numpy~=1.21