From e61a5e23b1f3edff50aeb958bda513f0dfbd4092 Mon Sep 17 00:00:00 2001 From: Differential Privacy Team Date: Fri, 9 Feb 2024 09:25:54 -0800 Subject: [PATCH] Change in test setup for Java and Privacy on Beam Java DP Lib: * Migrate usages of `Truth8.assertThat` to equivalent usages of `Truth.assertThat` Privacy on Beam: * Use ptest.MainWithDefault for pbeamtest similar to pbeam * Deprecate pbeamtest usage for enabling test mode in favor of the new PrivacySpec API. Tests and examples now use the new API * Switch Privacy on Beam tests to the new PrivacySpec API, also includes minor test fixes ZetaSQL Examples: * Update dependencies PiperOrigin-RevId: 605398011 Change-Id: I761849299652918132bad28b67d121de40f8b056 GitOrigin-RevId: cc25bcfe0b9e79d82ae9a07193eaee2bb9e818ba --- examples/zetasql/.bazelversion | 2 +- examples/zetasql/WORKSPACE | 49 +- java/dp_java_deps.bzl | 6 +- java/maven_install.json | 40 +- .../differentialprivacy/CountTest.java | 11 +- privacy-on-beam/pbeam/count_test.go | 270 ++++------ privacy-on-beam/pbeam/distinct_id_test.go | 268 +++++----- .../pbeam/distinct_per_key_test.go | 266 ++++------ .../pbeam/example_pbeamtest_test.go | 36 +- privacy-on-beam/pbeam/mean_test.go | 343 +++++-------- privacy-on-beam/pbeam/pardo_test.go | 66 +-- privacy-on-beam/pbeam/pbeam_main_test.go | 12 +- privacy-on-beam/pbeam/pbeam_test.go | 35 +- .../pbeam/pbeamtest/pbeamtest_test.go | 133 ++--- .../pbeam/public_partitions_test.go | 16 +- privacy-on-beam/pbeam/quantiles_test.go | 291 ++++------- .../pbeam/select_partitions_test.go | 35 +- privacy-on-beam/pbeam/sum_test.go | 461 ++++++++---------- 18 files changed, 957 insertions(+), 1383 deletions(-) diff --git a/examples/zetasql/.bazelversion b/examples/zetasql/.bazelversion index fcdb2e10..f22d756d 100644 --- a/examples/zetasql/.bazelversion +++ b/examples/zetasql/.bazelversion @@ -1 +1 @@ -4.0.0 +6.5.0 diff --git a/examples/zetasql/WORKSPACE b/examples/zetasql/WORKSPACE index 74b75a9c..a6dd026b 100644 --- a/examples/zetasql/WORKSPACE +++ b/examples/zetasql/WORKSPACE @@ -22,41 +22,11 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( name = "com_google_zetasql", - sha256 = "9d3c2149ffe21512fd50f873f718fe663546f2c2ee355af479b06ff31e228b8c", - strip_prefix = "zetasql-2023.03.2", - url = "https://github.com/google/zetasql/archive/refs/tags/2023.03.2.tar.gz", + strip_prefix = "zetasql-2023.11.1", + url = "https://github.com/google/zetasql/archive/refs/tags/2023.11.1.tar.gz", ) # Transitive dependencies from the ZetaSQL WORKSPACE file: -RULES_JVM_EXTERNAL_TAG = "4.5" - -RULES_JVM_EXTERNAL_SHA = "b17d7388feb9bfa7f2fa09031b32707df529f26c91ab9e5d909eb1676badd9a6" - -http_archive( - name = "rules_jvm_external", - sha256 = RULES_JVM_EXTERNAL_SHA, - strip_prefix = "rules_jvm_external-%s" % RULES_JVM_EXTERNAL_TAG, - url = "https://github.com/bazelbuild/rules_jvm_external/archive/refs/tags/%s.zip" % RULES_JVM_EXTERNAL_TAG, -) - -http_archive( - name = "io_grpc_grpc_java", - sha256 = "6c39c5feecda4f1ccafe88d8928d9a0f2a686d9a9a9c03888a2e5ac92f7ee34a", - strip_prefix = "grpc-java-1.43.2", - url = "https://github.com/grpc/grpc-java/archive/v1.43.2.tar.gz", -) - -load("@rules_jvm_external//:repositories.bzl", "rules_jvm_external_deps") - -rules_jvm_external_deps() - -load("@rules_jvm_external//:setup.bzl", "rules_jvm_external_setup") - -rules_jvm_external_setup() - -load("@com_google_zetasql//bazel:zetasql_java_deps.bzl", "zetasql_java_deps") - -zetasql_java_deps() load("@com_google_zetasql//bazel:zetasql_deps_step_1.bzl", "zetasql_deps_step_1") @@ -64,11 +34,20 @@ zetasql_deps_step_1() load("@com_google_zetasql//bazel:zetasql_deps_step_2.bzl", "zetasql_deps_step_2") -zetasql_deps_step_2() +zetasql_deps_step_2( + java_deps = False, + testing_deps = False, +) + +# Only need to load a subset of zetasql_deps_step_3: + +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") + +protobuf_deps() -load("@com_google_zetasql//bazel:zetasql_deps_step_3.bzl", "zetasql_deps_step_3") +load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") -zetasql_deps_step_3() +grpc_deps() load("@com_google_zetasql//bazel:zetasql_deps_step_4.bzl", "zetasql_deps_step_4") diff --git a/java/dp_java_deps.bzl b/java/dp_java_deps.bzl index 5336d48d..ffcbaf1d 100644 --- a/java/dp_java_deps.bzl +++ b/java/dp_java_deps.bzl @@ -12,13 +12,13 @@ def dp_java_deps(): "com.google.auto.value:auto-value:1.10.4", "com.google.code.findbugs:jsr305:3.0.2", "com.google.errorprone:error_prone_annotations:2.23.0", - "com.google.guava:guava:32.1.3-jre", + "com.google.guava:guava:33.0.0-jre", "com.google.protobuf:protobuf-java:3.25.0", # artifacts for testing only "org.mockito:mockito-core:5.7.0", "junit:junit:4.13.2", - "com.google.truth:truth:1.1.5", - "com.google.truth.extensions:truth-java8-extension:1.1.5", + "com.google.truth:truth:1.4.0", + "com.google.truth.extensions:truth-java8-extension:1.4.0", "com.google.testparameterinjector:test-parameter-injector:1.15", ], repositories = [ diff --git a/java/maven_install.json b/java/maven_install.json index cc2393dd..69714383 100644 --- a/java/maven_install.json +++ b/java/maven_install.json @@ -1,7 +1,10 @@ { "__AUTOGENERATED_FILE_DO_NOT_MODIFY_THIS_FILE_MANUALLY": "THERE_IS_NO_DATA_ONLY_ZUUL", - "__INPUT_ARTIFACTS_HASH": -131668409, - "__RESOLVED_ARTIFACTS_HASH": -1554454602, + "__INPUT_ARTIFACTS_HASH": -303031740, + "__RESOLVED_ARTIFACTS_HASH": 903391609, + "conflict_resolution": { + "com.google.errorprone:error_prone_annotations:2.23.0": "com.google.errorprone:error_prone_annotations:2.24.1" + }, "artifacts": { "com.google.auto.value:auto-value": { "shasums": { @@ -23,21 +26,21 @@ }, "com.google.errorprone:error_prone_annotations": { "shasums": { - "jar": "ec6f39f068b6ff9ac323c68e28b9299f8c0a80ca512dccb1d4a70f40ac3ec054" + "jar": "19fe2f7155d20ea093168527999da98108103ee546d1e8b726bc4b27c31a3c30" }, - "version": "2.23.0" + "version": "2.24.1" }, "com.google.guava:failureaccess": { "shasums": { - "jar": "a171ee4c734dd2da837e4b16be9df4661afab72a41adaf31eb84dfdaf936ca26" + "jar": "8a8f81cf9b359e3f6dfa691a1e776985c061ef2f223c9b2c80753e1b458e8064" }, - "version": "1.0.1" + "version": "1.0.2" }, "com.google.guava:guava": { "shasums": { - "jar": "6d4e2b5a118aab62e6e5e29d185a0224eed82c85c40ac3d33cf04a270c3b3744" + "jar": "f4d85c3e4d411694337cb873abea09b242b664bb013320be6105327c45991537" }, - "version": "32.1.3-jre" + "version": "33.0.0-jre" }, "com.google.guava:listenablefuture": { "shasums": { @@ -65,15 +68,15 @@ }, "com.google.truth.extensions:truth-java8-extension": { "shasums": { - "jar": "9e3c437ef76c0028d1c87d9f81d599301459333cfb3b50e5bf815ed712745140" + "jar": "293f4e4c59ce48e8b68651321d2d9f2355534412b221369b2af8ff76e6acf381" }, - "version": "1.1.5" + "version": "1.4.0" }, "com.google.truth:truth": { "shasums": { - "jar": "7f6d50d6f43a102942ef2c5a05f37a84f77788bb448cf33cceebf86d34e575c0" + "jar": "235c28e96ee6701ab01cc852fb294cb0f34756f636a8154b9aef08fb1215bbc4" }, - "version": "1.1.5" + "version": "1.4.0" }, "junit:junit": { "shasums": { @@ -101,9 +104,9 @@ }, "org.checkerframework:checker-qual": { "shasums": { - "jar": "e4ce1376cc2735e1dde220b62ad0913f51297704daad155a33f386bc5db0d9f7" + "jar": "ccaedd33af0b7894d9f2f3b644f4d19e43928e32902e61ac4d10777830f5aac7" }, - "version": "3.37.0" + "version": "3.42.0" }, "org.hamcrest:hamcrest-core": { "shasums": { @@ -125,9 +128,9 @@ }, "org.ow2.asm:asm": { "shasums": { - "jar": "b62e84b5980729751b0458c534cf1366f727542bb8d158621335682a460f0353" + "jar": "3c6fac2424db3d4a853b669f4e3d1d9c3c552235e19a319673f887083c2303a1" }, - "version": "9.5" + "version": "9.6" }, "org.yaml:snakeyaml": { "shasums": { @@ -299,9 +302,6 @@ "com.google.testparameterinjector:test-parameter-injector": [ "com.google.testing.junit.testparameterinjector" ], - "com.google.truth.extensions:truth-java8-extension": [ - "com.google.common.truth" - ], "com.google.truth:truth": [ "com.google.common.truth" ], @@ -487,7 +487,7 @@ "org.checkerframework.common.reflection.qual", "org.checkerframework.common.returnsreceiver.qual", "org.checkerframework.common.subtyping.qual", - "org.checkerframework.common.util.report.qual", + "org.checkerframework.common.util.count.report.qual", "org.checkerframework.common.value.qual", "org.checkerframework.dataflow.qual", "org.checkerframework.framework.qual" diff --git a/java/tests/com/google/privacy/differentialprivacy/CountTest.java b/java/tests/com/google/privacy/differentialprivacy/CountTest.java index 6fe1961f..7a580844 100644 --- a/java/tests/com/google/privacy/differentialprivacy/CountTest.java +++ b/java/tests/com/google/privacy/differentialprivacy/CountTest.java @@ -28,7 +28,6 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.google.common.truth.Truth8; import com.google.privacy.differentialprivacy.proto.SummaryOuterClass.CountSummary; import com.google.privacy.differentialprivacy.proto.SummaryOuterClass.MechanismType; import com.google.protobuf.InvalidProtocolBufferException; @@ -531,7 +530,7 @@ public void computeThresholdedResult_countGreaterThanThreshold_returnsCount() { // noise) count is equal to 12, which passes the threshold and therefore 12 should be returned. count.incrementBy(12); Optional actualResult = count.computeThresholdedResult(THRESHOLD_DELTA); - Truth8.assertThat(actualResult).hasValue(12); + assertThat(actualResult).hasValue(12); } @Test @@ -573,8 +572,8 @@ public void computeThresholdedResult_forLaplace_appliesCorrectThreshold() { count.incrementBy(122); Optional ceiledThreshold = count.computeThresholdedResult(0.1); - Truth8.assertThat(flooredThreshold).isEmpty(); - Truth8.assertThat(ceiledThreshold).hasValue(122); + assertThat(flooredThreshold).isEmpty(); + assertThat(ceiledThreshold).hasValue(122); } @Test @@ -613,8 +612,8 @@ public void computeThresholdedResult_forGaussian_appliesCorrectThreshold() { count.incrementBy(72); Optional ceiledThreshold = count.computeThresholdedResult(0.1); - Truth8.assertThat(flooredThreshold).isEmpty(); - Truth8.assertThat(ceiledThreshold).hasValue(72); + assertThat(flooredThreshold).isEmpty(); + assertThat(ceiledThreshold).hasValue(72); } private Count.Params.Builder getCountBuilderWithFields() { diff --git a/privacy-on-beam/pbeam/count_test.go b/privacy-on-beam/pbeam/count_test.go index 45623d52..bc35cef6 100644 --- a/privacy-on-beam/pbeam/count_test.go +++ b/privacy-on-beam/pbeam/count_test.go @@ -61,8 +61,12 @@ func TestCountNoNoise(t *testing.T) { // To see the logic and the math behind flakiness and tolerance calculation, // See https://github.com/google/differential-privacy/blob/main/privacy-on-beam/docs/Tolerance_Calculation.pdf. epsilon, delta, k, l1Sensitivity := 25.0, 1e-200, 24.0, 2.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) got := Count(s, pcol, CountParams{MaxValue: 2, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) @@ -105,7 +109,11 @@ func TestCountWithPartitionsNoNoise(t *testing.T) { // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 2.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) countParams := CountParams{MaxValue: 2, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := Count(s, pcol, countParams) @@ -121,21 +129,22 @@ func TestCountWithPartitionsNoNoise(t *testing.T) { // Checks that Count applies partition selection. func TestCountPartitionSelection(t *testing.T) { for _, tc := range []struct { - name string - noiseKind NoiseKind - epsilon float64 - delta float64 - numPartitions int - countPerValue int + name string + noiseKind NoiseKind + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 + numPartitions int + countPerValue int }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in a ε=1, - // δ=0.3 partition selection budget. - epsilon: 2, - delta: 0.6, + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.3, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // countPerValue=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). countPerValue: 1, @@ -144,14 +153,11 @@ func TestCountPartitionSelection(t *testing.T) { numPartitions: 143, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in the - // partition selection portion of the budget being ε_selectPartition=1, - // δ_selectPartition=0.3. - epsilon: 2, - delta: 0.3, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // countPerValue=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). countPerValue: 1, @@ -183,7 +189,13 @@ func TestCountPartitionSelection(t *testing.T) { col = beam.ParDo(s, testutils.PairToKV, col) // Run Count on pairs - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) got := Count(s, pcol, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairII64, got) @@ -202,20 +214,25 @@ func TestCountAddsNoise(t *testing.T) { name string noiseKind NoiseKind // Differential privacy params used. - epsilon float64 - delta float64 + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 2 * 1e-15, // It is split by 2: 1e-15 for the noise and 1e-15 for the partition selection. - delta: 2 * 1e-5, // It is split by 2: 1e-5 for the noise and 1e-5 for the partition selection. + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1e-15, + aggregationDelta: 1e-5, + partitionSelectionEpsilon: 1e-15, + partitionSelectionDelta: 1e-5, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 2 * 1e-15, // It is split by 2: 1e-15 for the noise and 1e-15 for the partition selection. - delta: 0.01, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1e-15, + partitionSelectionEpsilon: 1e-15, + partitionSelectionDelta: 1e-5, }, } { // Because this is an integer aggregation, we can't use the regular complementary @@ -235,16 +252,12 @@ func TestCountAddsNoise(t *testing.T) { // about tests taking long. tolerance := 0.0 l0Sensitivity, lInfSensitivity := int64(1), int64(1) - partitionSelectionEpsilon, partitionSelectionDelta := tc.epsilon/2, tc.delta - if tc.noiseKind == gaussianNoise { - partitionSelectionDelta = tc.delta / 2 - } // Compute the number of IDs needed to keep the partition. sp, err := dpagg.NewPreAggSelectPartition( &dpagg.PreAggSelectPartitionOptions{ - Epsilon: partitionSelectionEpsilon, - Delta: partitionSelectionDelta, + Epsilon: tc.partitionSelectionEpsilon, + Delta: tc.partitionSelectionDelta, MaxPartitionsContributed: l0Sensitivity, }) if err != nil { @@ -260,7 +273,13 @@ func TestCountAddsNoise(t *testing.T) { p, s, col := ptest.CreateList(pairs) col = beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) got := Count(s, pcol, CountParams{MaxPartitionsContributed: l0Sensitivity, MaxValue: lInfSensitivity, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairII64, got) testutils.CheckInt64MetricsAreNoisy(s, got, numIDs, tolerance) @@ -340,7 +359,11 @@ func TestCountAddsNoiseWithPartitions(t *testing.T) { } col = beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.epsilon, + AggregationDelta: tc.delta, + })) countParams := CountParams{MaxPartitionsContributed: l0Sensitivity, MaxValue: lInfSensitivity, NoiseKind: tc.noiseKind, PublicPartitions: publicPartitions} got := Count(s, pcol, countParams) got = beam.ParDo(s, testutils.KVToPairII64, got) @@ -368,8 +391,12 @@ func TestCountCrossPartitionContributionBounding(t *testing.T) { // We have 10 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.01, 25.0, 3.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) got := Count(s, pcol, CountParams{MaxPartitionsContributed: 3, MaxValue: 1, NoiseKind: LaplaceNoise{}}) // With a max contribution of 3, 70% of the data should be // dropped. The sum of all elements must then be 150. @@ -415,7 +442,11 @@ func TestCountWithPartitionsCrossPartitionContributionBounding(t *testing.T) { // We have 5 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 3.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) countParams := CountParams{MaxPartitionsContributed: 3, MaxValue: 1, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := Count(s, pcol, countParams) // With a max contribution of 3, 40% of the data from the public partitions should be dropped. @@ -431,47 +462,6 @@ func TestCountWithPartitionsCrossPartitionContributionBounding(t *testing.T) { } } -// Check that no negative values are returned from Count with partitions. -func TestCountWithPartitionsReturnsNonNegative(t *testing.T) { - // We have two test cases, one for public partitions as a PCollection and one for public partitions as a slice (i.e., in-memory). - for _, tc := range []struct { - inMemory bool - }{ - {true}, - {false}, - } { - var pairs []testutils.PairII - var publicPartitionsSlice []int - for i := 0; i < 100; i++ { - pairs = append(pairs, testutils.PairII{i, i}) - } - for i := 0; i < 200; i++ { - publicPartitionsSlice = append(publicPartitionsSlice, i) - } - p, s, col := ptest.CreateList(pairs) - var publicPartitions any - if tc.inMemory { - publicPartitions = publicPartitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicPartitionsSlice) - } - - col = beam.ParDo(s, testutils.PairToKV, col) - // Using a low epsilon and high maxValue adds a lot of noise and using - // a high delta keeps many partitions. - epsilon, delta, maxValue := 0.001, 0.999, int64(1e8) - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) - countParams := CountParams{MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}, PublicPartitions: publicPartitions} - counts := Count(s, pcol, countParams) - values := beam.DropKey(s, counts) - // Check if we have negative elements. - beam.ParDo0(s, testutils.CheckNoNegativeValuesInt64, values) - if err := ptest.Run(p); err != nil { - t.Errorf("TestCountWithPartitionsReturnsNonNegative in-memory=%t returned errors: %v", tc.inMemory, err) - } - } -} - func TestCheckCountParams(t *testing.T) { _, _, partitions := ptest.CreateList([]int{0}) for _, tc := range []struct { @@ -829,97 +819,6 @@ func TestCheckCountParams(t *testing.T) { } } -// The logic mirrors TestCountNoNoise, but with the new privacy budget API. -func TestCountNoNoiseTemp(t *testing.T) { - // In this test, we set the per-partition l1Sensitivity to 2, and: - // - value 0 is associated with 7 privacy units, so it should be thresholded; - // - value 1 is associated with 30 privacy units appearing twice each, so each of - // them should be counted twice; - // - value 2 is associated with 50 privacy units appearing 3 times each, but the - // l1Sensitivity is 2, so each should only be counted twice. - // Each privacy unit contributes to at most 1 partition. - pairs := testutils.ConcatenatePairs( - testutils.MakePairsWithFixedVStartingFromKey(0, 7, 0), - testutils.MakePairsWithFixedVStartingFromKey(7, 30, 1), - testutils.MakePairsWithFixedVStartingFromKey(7, 30, 1), - testutils.MakePairsWithFixedVStartingFromKey(7+30, 50, 2), - testutils.MakePairsWithFixedVStartingFromKey(7+30, 50, 2), - testutils.MakePairsWithFixedVStartingFromKey(7+30, 50, 2), - ) - result := []testutils.PairII64{ - {1, 60}, // 30*2 - {2, 100}, // 50*2 - } - p, s, col, want := ptest.CreateList2(pairs, result) - col = beam.ParDo(s, testutils.PairToKV, col) - - // ε=25, δ=10⁻²⁰⁰ and l0Sensitivity=2 gives a threshold of ≈21. - // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, - // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 25.0, 1e-200, 24.0, 2.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) - if err != nil { - t.Fatalf("TestCountNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - got := Count(s, pcol, CountParams{MaxValue: 2, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestCountNoNoiseTemp: Count(%v) = %v, expected %v: %v", col, got, want, err) - } -} - -// The logic mirrors TestCountWithPartitionsNoNoise, but with the new privacy budget API. -func TestCountWithPartitionsNoNoiseTemp(t *testing.T) { - // We have two test cases, one for public partitions as a PCollection and one for public partitions as a slice (i.e., in-memory). - for _, tc := range []struct { - inMemory bool - }{ - {true}, - {false}, - } { - var pairs []testutils.PairII - for i := 0; i < 10; i++ { - pairs = append(pairs, testutils.PairII{1, i}) - } - result := []testutils.PairII64{ - // Drop partitions 0 to 8 as they are not in public - // partitions. - {9, 1}, // Keep partition 9. - {10, 0}, // Add partition 10. - } - - p, s, col, want := ptest.CreateList2(pairs, result) - col = beam.ParDo(s, testutils.PairToKV, col) - publicPartitionsSlice := []int{9, 10} - var publicPartitions any - if tc.inMemory { - publicPartitions = publicPartitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicPartitionsSlice) - } - - // We use ε=50, δ=0 and l1Sensitivity=2. - // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, - // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, k, l1Sensitivity := 50.0, 24.0, 2.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon}) - if err != nil { - t.Fatalf("TestCountWithPartitionsNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - countParams := CountParams{MaxValue: 2, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} - - got := Count(s, pcol, countParams) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestCountWithPartitionsNoNoiseTemp in-memory=%t: Count(%v) = %v, expected %v: %v", tc.inMemory, col, got, want, err) - } - } -} - func TestCountPreThresholding(t *testing.T) { // In this test, we set pre-threshold to 10, per-partition l1 sensitivity to 2, and: // - value 0 is associated with 9 privacy units, so it should be thresholded; @@ -943,15 +842,12 @@ func TestCountPreThresholding(t *testing.T) { // we can have each partition fail with 10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 1e9, dpagg.LargestRepresentableDelta, 23.0, 2.0 preThreshold := int64(10) - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ - AggregationEpsilon: epsilon, - PartitionSelectionEpsilon: epsilon, - PartitionSelectionDelta: delta, - PreThreshold: preThreshold}) - if err != nil { - t.Fatalf("TestCountPreThresholding: %v", err) - } - pcol := MakePrivate(s, col, spec) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + PreThreshold: preThreshold})) got := Count(s, pcol, CountParams{MaxValue: int64(l1Sensitivity), MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) diff --git a/privacy-on-beam/pbeam/distinct_id_test.go b/privacy-on-beam/pbeam/distinct_id_test.go index eefa1975..c1f72c6f 100644 --- a/privacy-on-beam/pbeam/distinct_id_test.go +++ b/privacy-on-beam/pbeam/distinct_id_test.go @@ -53,7 +53,17 @@ func TestProtoAggregation(t *testing.T) { {Bar: proto.String("zero")}, } p, s, col := ptest.CreateList(values) - pcol := MakePrivateFromProto(s, col, NewPrivacySpec(1, 1e-10), "foo") + pcol := MakePrivateFromProto( + s, + col, + privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: 1, + PartitionSelectionEpsilon: 1, + PartitionSelectionDelta: 1e-10, + }), + "foo", + ) got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) // All values are distinct and should be thresholded. passert.Empty(s, got) @@ -85,7 +95,11 @@ func TestDistinctPrivacyIDNoNoise(t *testing.T) { // To see the logic and the math behind flakiness and tolerance calculation, // See https://github.com/google/differential-privacy/blob/main/privacy-on-beam/docs/Tolerance_Calculation.pdf. epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 4.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 4, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) @@ -135,7 +149,11 @@ func TestDistinctPrivacyIDWithPartitionsNoNoise(t *testing.T) { // We have 4 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 500.0, 0.0, 24.0, 4.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) distinctPrivacyIDParams := DistinctPrivacyIDParams{MaxPartitionsContributed: 4, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := DistinctPrivacyID(s, pcol, distinctPrivacyIDParams) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -147,12 +165,13 @@ func TestDistinctPrivacyIDWithPartitionsNoNoise(t *testing.T) { } type distinctThresholdTestCase struct { - name string - noiseKind NoiseKind - epsilon float64 - delta float64 - numPartitions int - minAllowedValue int + name string + noiseKind NoiseKind + epsilon float64 + aggregationDelta float64 + partitionSelectionDelta float64 + numPartitions int + minAllowedValue int } func computeGaussianThreshold(l0Sensitivity int64, lInfSensitivity, epsilon, noiseDelta, thresholdDelta float64) float64 { @@ -167,22 +186,22 @@ func computeLaplaceThreshold(l0Sensitivity int64, lInfSensitivity, epsilon, nois var distinctThresholdTestCases = []distinctThresholdTestCase{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 1, - delta: 0.01, - numPartitions: 25, - // We use δ = 0.005 in these calculations since the δ = 0.01 budget is split - // in half (50% for adding noise, 50% for thresholding). - minAllowedValue: int(computeGaussianThreshold(25, 1, 1, 0.005, 0.005)), + name: "Gaussian", + noiseKind: GaussianNoise{}, + epsilon: 1, + aggregationDelta: 0.005, + partitionSelectionDelta: 0.005, + numPartitions: 25, + minAllowedValue: int(computeGaussianThreshold(25, 1, 1, 0.005, 0.005)), }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 1, - delta: 0.01, - numPartitions: 25, - minAllowedValue: int(computeLaplaceThreshold(25, 1, 1, 0, 0.01)), + name: "Laplace", + noiseKind: LaplaceNoise{}, + epsilon: 1, + aggregationDelta: 0, + partitionSelectionDelta: 0.005, + numPartitions: 25, + minAllowedValue: int(computeLaplaceThreshold(25, 1, 1, 0, 0.01)), }, } @@ -197,7 +216,8 @@ func (fn *checkNothingBelowThresholdFn) ProcessElement(c testutils.PairII64) err return nil } -func buildDistinctPrivacyIDThresholdPipeline(tc distinctThresholdTestCase) (p *beam.Pipeline, s beam.Scope, col beam.PCollection, got beam.PCollection) { +func buildDistinctPrivacyIDThresholdPipeline(t *testing.T, tc distinctThresholdTestCase) (p *beam.Pipeline, s beam.Scope, col beam.PCollection, got beam.PCollection) { + t.Helper() // pairs contains {1,0}, {2,0}, …, {minAllowedValue,0}, {1,1}, …, {minAllowedValue,1}, {1,2}, …, {minAllowedValue,9}. var pairs []testutils.PairII for i := 0; i < tc.numPartitions; i++ { @@ -208,7 +228,12 @@ func buildDistinctPrivacyIDThresholdPipeline(tc distinctThresholdTestCase) (p *b p, s, col = ptest.CreateList(pairs) col = beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.epsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) got = DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: int64(tc.numPartitions), NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairII64, got) return p, s, col, got @@ -223,7 +248,7 @@ func TestDistinctPrivacyIDThresholdsSmallEntries(t *testing.T) { t.Errorf("Invalid test case: minAllowedValue must be positive. Got: %d", tc.minAllowedValue) } - p, s, col, got := buildDistinctPrivacyIDThresholdPipeline(tc) + p, s, col, got := buildDistinctPrivacyIDThresholdPipeline(t, tc) beam.ParDo0(s, &checkNothingBelowThresholdFn{tc.minAllowedValue}, got) if err := ptest.Run(p); err != nil { t.Errorf("%s: DistinctPrivacyID(%v) = %v, found an unexpected value below minAllowedValue: %v", tc.name, col, got, err) @@ -242,7 +267,7 @@ func TestDistinctPrivacyIDThresholdLeavesSomeEntries(t *testing.T) { t.Errorf("Invalid test case: minAllowedValue must be positive. Got: %d", tc.minAllowedValue) } - p, s, col, got := buildDistinctPrivacyIDThresholdPipeline(tc) + p, s, col, got := buildDistinctPrivacyIDThresholdPipeline(t, tc) passert.Empty(s, got) // We want this to be an error. if err := ptest.Run(p); err == nil { t.Errorf("%s: DistinctPrivacyID(%v) returned an empty result.", tc.name, col) @@ -256,21 +281,23 @@ func TestDistinctPrivacyIDAddsNoise(t *testing.T) { for _, tc := range []struct { name string noiseKind NoiseKind - // Differential privacy params used. The test assumes sensitivities of 1. - epsilon float64 - delta float64 + // Differential privacy params used. + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 2 * 1e-5, - delta: 2 * 1e-5, // It is split by 2: 1e-5 for the noise and 1e-5 for the partition selection + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 2 * 1e-5, + aggregationDelta: 1e-5, + partitionSelectionDelta: 1e-5, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 4 * 1e-5, - delta: 0.5, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 4 * 1e-5, + partitionSelectionDelta: 0.5, }, } { // Because this is an integer aggregation, we can't use the regular complementary @@ -287,25 +314,26 @@ func TestDistinctPrivacyIDAddsNoise(t *testing.T) { // We want to keep numIDs low (otherwise the tests take a long time) while // also keeping P low. This means we can't have a tiny ε & δ. tolerance := 0.0 - noiseEpsilon, noiseDelta := tc.epsilon, 0.0 k := 5.0 // k leads to 1e-5 and both P's are close to 1e-5. l0Sensitivity, lInfSensitivity := 1.0, 1.0 - partitionSelectionDelta := tc.delta l1Sensitivity := l0Sensitivity * lInfSensitivity - thresholdTolerance := testutils.LaplaceTolerance(k, l1Sensitivity, noiseEpsilon) - numIDs := int(math.Ceil(computeLaplaceThreshold(int64(l0Sensitivity), lInfSensitivity, noiseEpsilon, noiseDelta, partitionSelectionDelta) + thresholdTolerance)) + thresholdTolerance := testutils.LaplaceTolerance(k, l1Sensitivity, tc.aggregationEpsilon) + numIDs := int(math.Ceil(computeLaplaceThreshold(int64(l0Sensitivity), lInfSensitivity, tc.aggregationEpsilon, tc.aggregationDelta, tc.partitionSelectionDelta) + thresholdTolerance)) if tc.noiseKind == gaussianNoise { - noiseDelta = tc.delta / 2 - partitionSelectionDelta = tc.delta / 2 - thresholdTolerance = testutils.GaussianTolerance(k, l0Sensitivity, lInfSensitivity, noiseEpsilon, noiseDelta) - numIDs = int(math.Ceil(computeGaussianThreshold(int64(l0Sensitivity), lInfSensitivity, noiseEpsilon, noiseDelta, partitionSelectionDelta) + thresholdTolerance)) + thresholdTolerance = testutils.GaussianTolerance(k, l0Sensitivity, lInfSensitivity, tc.aggregationEpsilon, tc.aggregationDelta) + numIDs = int(math.Ceil(computeGaussianThreshold(int64(l0Sensitivity), lInfSensitivity, tc.aggregationEpsilon, tc.aggregationDelta, tc.partitionSelectionDelta) + thresholdTolerance)) } // pairs{privacy_id, partition_key} contains {1,0}, {2,0}, …, {numIDs,0}. pairs := testutils.MakePairsWithFixedV(numIDs, 0) p, s, col := ptest.CreateList(pairs) col = beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: int64(lInfSensitivity), NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairII64, got) @@ -377,7 +405,11 @@ func TestDistinctPrivacyIDWithPartitionsAddsNoise(t *testing.T) { p, s, col := ptest.CreateList(pairs) col = beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.epsilon, + AggregationDelta: tc.delta, + })) publicPartitionsSlice := []int{0} var publicPartitions any if tc.inMemory { @@ -414,7 +446,11 @@ func TestDistinctPrivacyIDCrossPartitionContributionBounding(t *testing.T) { // We have 10 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.01, 24.0, 3.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}}) // With a max contribution of 3, 70% of the data should be // dropped. The sum of all elements must then be 150. @@ -461,7 +497,11 @@ func TestDistinctPrivacyIDWithPartitionsCrossPartitionContributionBounding(t *te // We have 5 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 3.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + AggregationDelta: delta, + })) distinctPrivacyIDParams := DistinctPrivacyIDParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := DistinctPrivacyID(s, pcol, distinctPrivacyIDParams) // With a max contribution of 3, 40% of the public partitions should be dropped. @@ -488,8 +528,12 @@ func TestDistinctPrivacyIDReturnsNonNegative(t *testing.T) { // Using a low epsilon adds a lot of noise and using a high delta keeps // many partitions. epsilon, delta := 0.001, 0.999 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) - counts := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}}) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) + counts := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) values := beam.DropKey(s, counts) // Check if we have negative elements. beam.ParDo0(s, testutils.CheckNoNegativeValuesInt64, values) @@ -525,11 +569,10 @@ func TestDistinctPrivacyIDWithPartitionsReturnsNonNegative(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - // Using a low epsilon adds a lot of noise and using a high delta keeps - // many partitions. - epsilon, delta := 0.001, 0.999 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) - distinctPrivacyIDParams := DistinctPrivacyIDParams{MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}, PublicPartitions: publicPartitions} + // Using a low epsilon adds a lot of noise. + epsilon := 0.001 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) + distinctPrivacyIDParams := DistinctPrivacyIDParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} counts := DistinctPrivacyID(s, pcol, distinctPrivacyIDParams) values := beam.DropKey(s, counts) // Check if we have negative elements. @@ -568,7 +611,11 @@ func TestDistinctPrivacyIDOptimizedContrib(t *testing.T) { // We have 4 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 4.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 4, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) @@ -1077,96 +1124,6 @@ func TestCheckDistinctPrivacyIDParams(t *testing.T) { } } -// The logic mirrors TestDistinctPrivacyIDNoNoiseTemp, but with the new privacy budget API. -func TestDistinctPrivacyIDNoNoiseTemp(t *testing.T) { - // pairs{privacy_id, partition_key} contain input data belonging to partitions 0, 1, and 2. - pairs := testutils.ConcatenatePairs( - testutils.MakePairsWithFixedV(7, 0), - testutils.MakePairsWithFixedV(52, 1), - testutils.MakePairsWithFixedV(99, 2), - testutils.MakePairsWithFixedV(7, 0)) // duplicated values should have no influence. - result := []testutils.PairII64{ - // Only 7 privacy units are associated with value 0: should be thresholded. - {1, 52}, - {2, 99}, - } - p, s, col, want := ptest.CreateList2(pairs, result) - col = beam.ParDo(s, testutils.PairToKV, col) - - // ε=50, δ=10⁻²⁰⁰ and l1Sensitivity=4 gives a post-aggregation threshold of 38. - // We have 4 partitions. So, to get an overall flakiness of 10⁻²³, - // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - // To see the logic and the math behind flakiness and tolerance calculation, - // See https://github.com/google/differential-privacy/blob/main/privacy-on-beam/docs/Tolerance_Calculation.pdf. - epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 4.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) - if err != nil { - t.Fatalf("TestDistinctPrivacyIDNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 4, NoiseKind: LaplaceNoise{}}) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestDistinctPrivacyIDNoNoiseTemp: DistinctPrivacyID(%v) = %v, expected %v: %v", col, got, want, err) - } -} - -// The logic mirrors TestDistinctPrivacyIDWithPartitionsNoNoise, but with the new privacy budget API. -func TestDistinctPrivacyIDWithPartitionsNoNoiseTemp(t *testing.T) { - // We have two test cases, one for public partitions as a PCollection and one for public partitions as a slice (i.e., in-memory). - for _, tc := range []struct { - inMemory bool - }{ - {true}, - {false}, - } { - // pairs{privacy_id, partition_key} contain input data belonging to partitions 0, 1, 2 and 3. - pairs := testutils.ConcatenatePairs( - testutils.MakePairsWithFixedV(7, 0), - testutils.MakePairsWithFixedV(52, 1), - testutils.MakePairsWithFixedV(99, 2), - testutils.MakePairsWithFixedV(7, 0), // duplicated values should have no influence. - testutils.MakePairsWithFixedV(20, 3)) - result := []testutils.PairII64{ - // Public partitions include 0, which would otherwise be thresholded. - {0, 7}, - {1, 52}, - // Drop non-public partition 2. - {3, 20}, - {4, 0}, // Add public partition 4. - } - - p, s, col, want := ptest.CreateList2(pairs, result) - col = beam.ParDo(s, testutils.PairToKV, col) - - publicPartitionsSlice := []int{0, 1, 3, 4} - var publicPartitions any - if tc.inMemory { - publicPartitions = publicPartitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicPartitionsSlice) - } - - // We have ε=500, δ=0, and l1Sensitivity=4. - // We have 4 partitions. So, to get an overall flakiness of 10⁻²³, - // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, k, l1Sensitivity := 500.0, 24.0, 4.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon}) - if err != nil { - t.Fatalf("TestDistinctPrivacyIDWithPartitionsNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - distinctPrivacyIDParams := DistinctPrivacyIDParams{MaxPartitionsContributed: 4, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} - got := DistinctPrivacyID(s, pcol, distinctPrivacyIDParams) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestDistinctPrivacyIDWithPartitionsNoNoiseTemp in-memory=%t: DistinctPrivacyID(%v) = %v, expected %v: %v", tc.inMemory, col, got, want, err) - } - } -} - func TestDistinctPrivacyIDPreThresholding(t *testing.T) { // In this test, we set pre-threshold to 10, per-partition l1 sensitivity to 2, and: // - value 0 is associated with 9 privacy units, so it should be thresholded; @@ -1189,15 +1146,12 @@ func TestDistinctPrivacyIDPreThresholding(t *testing.T) { // we can have each partition fail with 10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 1e9, dpagg.LargestRepresentableDelta, 23.0, 1.0 preThreshold := int64(10) - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ - AggregationEpsilon: epsilon, - PartitionSelectionEpsilon: epsilon, - PartitionSelectionDelta: delta, - PreThreshold: preThreshold}) - if err != nil { - t.Fatalf("TestDistinctPrivacyIDPreThresholding: %v", err) - } - pcol := MakePrivate(s, col, spec) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + PreThreshold: preThreshold})) got := DistinctPrivacyID(s, pcol, DistinctPrivacyIDParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) diff --git a/privacy-on-beam/pbeam/distinct_per_key_test.go b/privacy-on-beam/pbeam/distinct_per_key_test.go index 60eb2151..d1b1f530 100644 --- a/privacy-on-beam/pbeam/distinct_per_key_test.go +++ b/privacy-on-beam/pbeam/distinct_per_key_test.go @@ -66,8 +66,12 @@ func TestDistinctPerKeyNoNoise(t *testing.T) { // To see the logic and the math behind flakiness and tolerance calculation, // See https://github.com/google/differential-privacy/blob/main/privacy-on-beam/docs/Tolerance_Calculation.pdf. epsilon, delta, k, l1Sensitivity := 50.0, 1e-100, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 2}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -83,20 +87,25 @@ func TestDistinctPerKeyAddsNoise(t *testing.T) { name string noiseKind NoiseKind // Differential privacy params used. - epsilon float64 - delta float64 + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 2 * 1e-15, // It is split by 2: 1e-15 for the noise and 1e-15 for the partition selection. - delta: 2 * 1e-5, // It is split by 2: 1e-5 for the noise and 1e-5 for the partition selection. + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1e-15, + aggregationDelta: 1e-5, + partitionSelectionEpsilon: 1e-15, + partitionSelectionDelta: 1e-5, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 2 * 1e-15, // It is split by 2: 1e-15 for the noise and 1e-15 for the partition selection. - delta: 0.01, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1e-15, + partitionSelectionEpsilon: 1e-15, + partitionSelectionDelta: 0.01, }, } { // Because this is an integer aggregation, we can't use the regular complementary @@ -116,16 +125,12 @@ func TestDistinctPerKeyAddsNoise(t *testing.T) { // about tests taking long. tolerance := 0.0 l0Sensitivity, lInfSensitivity := int64(1), int64(1) - partitionSelectionEpsilon, partitionSelectionDelta := tc.epsilon/2, tc.delta - if tc.noiseKind == gaussianNoise { - partitionSelectionDelta = tc.delta / 2 - } // Compute the number of IDs needed to keep the partition. sp, err := dpagg.NewPreAggSelectPartition( &dpagg.PreAggSelectPartitionOptions{ - Epsilon: partitionSelectionEpsilon, - Delta: partitionSelectionDelta, + Epsilon: tc.partitionSelectionEpsilon, + Delta: tc.partitionSelectionDelta, MaxPartitionsContributed: l0Sensitivity, }) if err != nil { @@ -143,9 +148,15 @@ func TestDistinctPerKeyAddsNoise(t *testing.T) { p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: int64(l0Sensitivity), NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: int64(lInfSensitivity)}) + got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: int64(l0Sensitivity), NoiseKind: tc.noiseKind, MaxContributionsPerPartition: int64(lInfSensitivity)}) got = beam.ParDo(s, testutils.KVToPairII64, got) testutils.CheckInt64MetricsAreNoisy(s, got, numIDs, tolerance) @@ -175,8 +186,12 @@ func TestDistinctPerKeyPerKeyCrossPartitionContributionBounding(t *testing.T) { // We have 10 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.01, 24.0, 3.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 1}) // With a max contribution of 3, 70% of the data should have be @@ -214,7 +229,6 @@ func TestDistinctPerKeyWithPartitionsCrossPartitionContributionBounding(t *testi k := 25.0 l1Sensitivity := 6.0 epsilon := 50.0 - delta := 0.0 publicPartitionsSlice := []int{0, 1, 2, 3, 4} var publicPartitions any if tc.inMemory { @@ -223,7 +237,7 @@ func TestDistinctPerKeyWithPartitionsCrossPartitionContributionBounding(t *testi publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 2, PublicPartitions: publicPartitions}) maxs := beam.DropKey(s, got) @@ -278,14 +292,20 @@ func TestDistinctPerKeyWithPartitionsPerPartitionContributionBounding(t *testing p, s, col, want := ptest.CreateList2(triples, result) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - // ε=50, δ=10⁻¹⁰⁰ and l0Sensitivity=3 gives a threshold of ≈17. + publicPartitionsSlice := []int{0, 1, 2} + var publicPartitions any + if tc.inMemory { + publicPartitions = publicPartitionsSlice + } else { + publicPartitions = beam.CreateList(s, publicPartitionsSlice) + } + // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, // we can have each partition fail with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 1e-100, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 24.0, 6.0 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 2}) + got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 2, PublicPartitions: publicPartitions}) want = beam.ParDo(s, testutils.PairII64ToKV, want) testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.LaplaceTolerance(k, l1Sensitivity, epsilon)) if err := ptest.Run(p); err != nil { @@ -324,8 +344,12 @@ func TestDistinctPerKeyCrossPartitionContributionBounding_IsAppliedBeforeDedupli // We have 100 partitions. So, to get an overall flakiness of 10⁻²³, // we can have each partition fail with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 1-1e-15, 24.0, 1.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 1}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -371,8 +395,12 @@ func TestDistinctPerKeyPerPartitionContributionBounding(t *testing.T) { // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, // we can have each partition fail with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 1e-100, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 2}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -409,8 +437,12 @@ func TestDistinctPerKeyPerPartitionContributionBounding_IsAppliedBeforeDeduplica // We have 1 partition. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 50.0, 1e-100, 23.0, 1.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 1}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -421,21 +453,22 @@ func TestDistinctPerKeyPerPartitionContributionBounding_IsAppliedBeforeDeduplica } var distinctPerKeyPartitionSelectionTestCases = []struct { - name string - noiseKind NoiseKind - epsilon float64 - delta float64 - numPartitions int - entriesPerPartition int + name string + noiseKind NoiseKind + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 + numPartitions int + entriesPerPartition int }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in a ε=1, - // δ=0.3 partition selection budget. - epsilon: 2, - delta: 0.6, + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.3, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -444,14 +477,11 @@ var distinctPerKeyPartitionSelectionTestCases = []struct { numPartitions: 143, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in the - // partition selection portion of the budget being ε_selectPartition=1, - // δ_selectPartition=0.3. - epsilon: 2, - delta: 0.3, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -488,7 +518,13 @@ func TestDistinctPerKeyPartitionSelection(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) // Run DistinctPerKey on triples - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: int64(tc.numPartitions), NoiseKind: tc.noiseKind, MaxContributionsPerPartition: 1}) got = beam.ParDo(s, testutils.KVToPairII64, got) @@ -520,8 +556,12 @@ func TestDistinctPerKeyThresholdsOnPrivacyIDs(t *testing.T) { // We have 1 partition. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 50.0, 1e-10, 23.0, 1.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 1}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -695,102 +735,6 @@ func TestCheckDistinctPerKeyParams(t *testing.T) { } } -// The logic mirrors TestDistinctPerKeyNoNoise, but with the new privacy budget API. -func TestDistinctPerKeyNoNoiseTemp(t *testing.T) { - var triples []testutils.TripleWithIntValue - for i := 0; i < 100; i++ { // Add 200 distinct values to Partition 0. - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 0, Value: i}) - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 0, Value: 100 + i}) - } - for i := 100; i < 200; i++ { // Add 200 additional values, all of which are duplicates of the existing distinct values, to Partition 0. - // The duplicates come from users different from the 100 users above in order to not discard - // any distinct values during the initial per-partition contribution bounding step. - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 0, Value: i - 100}) // Duplicate. Should be discarded by DistinctPerKey. - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 0, Value: i}) // Duplicate. Should be discarded by DistinctPerKey. - } - for i := 0; i < 50; i++ { // Add 200 values of which 100 are distinct to Partition 1. - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 1, Value: i}) - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 1, Value: 50 + i}) - // Have 2 users contribute to the same 100 distinct values. - triples = append(triples, testutils.TripleWithIntValue{ID: 100 + i, Partition: 1, Value: i}) // Should be discarded. - triples = append(triples, testutils.TripleWithIntValue{ID: 100 + i, Partition: 1, Value: 50 + i}) // Should be discarded. - } - for i := 0; i < 7; i++ { // Add 7 distinct values to Partition 2. Should be thresholded. - triples = append(triples, testutils.TripleWithIntValue{ID: i, Partition: 2, Value: i}) - } - result := []testutils.PairII64{ - {0, 200}, - {1, 100}, - // Only 7 distinct values in partition 2: should be thresholded. - } - p, s, col, want := ptest.CreateList2(triples, result) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - - // ε=50, δ=10⁻¹⁰⁰ and l0Sensitivity=3 gives a threshold of ≈17. - // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, - // we can have each partition fail with 1-10⁻²⁴ probability (k=24). - // To see the logic and the math behind flakiness and tolerance calculation, - // See https://github.com/google/differential-privacy/blob/main/privacy-on-beam/docs/Tolerance_Calculation.pdf. - epsilon, delta, k, l1Sensitivity := 50.0, 1e-100, 24.0, 6.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta}) - if err != nil { - t.Fatalf("TestDistinctPerKeyNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - got := DistinctPerKey(s, pcol, DistinctPerKeyParams{MaxPartitionsContributed: 3, NoiseKind: LaplaceNoise{}, MaxContributionsPerPartition: 2}) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestDistinctPerKeyNoNoiseTemp: DistinctPerKey(%v) = %v, expected %v: %v", col, got, want, err) - } -} - -// The logic mirrors TestDistinctPerKeyWithPartitionNoNoise, but with the new privacy budget API. -func TestDistinctPerKeyWithPartitionNoNoiseTemp(t *testing.T) { - for _, tc := range []struct { - inMemory bool - }{ - {true}, - {false}, - } { - var triples []testutils.TripleWithIntValue // this is causing the int struct - for i := 0; i < 10; i++ { - triples = append(triples, testutils.TripleWithIntValue{1, i, i}) - } - result := []testutils.PairII64{ - {9, 1}, // keep partition 9. - {10, 0}, // Add partition 10. - } - p, s, col, want := ptest.CreateList2(triples, result) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - publicParitionsSlice := []int{9, 10} - var publicPartitions any - if tc.inMemory { - publicPartitions = publicParitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicParitionsSlice) - } - // We use ε=50, δ=0 and l1Sensitivity=1. - // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, - // we need to have each partition pass with 1-10⁻²⁴ probability (k=24) - epsilon, k, l1Sensitivity := 50.0, 24.0, 1.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon}) - if err != nil { - t.Fatalf("TestDistinctPerKeyWithPartitionNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - DistinctPerKeyParams := DistinctPerKeyParams{MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} - got := DistinctPerKey(s, pcol, DistinctPerKeyParams) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestDistinctPerKeyWithPartitionNoNoiseTemp in-memory=%t: Count(%v) = %v, expected %v: %v", tc.inMemory, col, got, want, err) - } - } -} - func TestDistinctPerKeyPreThresholding(t *testing.T) { // Arrange var triples []testutils.TripleWithIntValue @@ -813,11 +757,13 @@ func TestDistinctPerKeyPreThresholding(t *testing.T) { // We have 1 partition. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 1e9, dpagg.LargestRepresentableDelta, 23.0, 2.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta, PreThreshold: 10}) - if err != nil { - t.Fatalf("TestDistinctPerKeyPreThresholding: %v", err) - } - pcol := MakePrivate(s, col, spec) + preThreshold := int64(10) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + PreThreshold: preThreshold})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) // Act diff --git a/privacy-on-beam/pbeam/example_pbeamtest_test.go b/privacy-on-beam/pbeam/example_pbeamtest_test.go index 46e88c15..4a506c6a 100644 --- a/privacy-on-beam/pbeam/example_pbeamtest_test.go +++ b/privacy-on-beam/pbeam/example_pbeamtest_test.go @@ -21,23 +21,24 @@ import ( "fmt" "github.com/google/differential-privacy/privacy-on-beam/v2/pbeam" - "github.com/google/differential-privacy/privacy-on-beam/v2/pbeam/pbeamtest" "github.com/apache/beam/sdks/v2/go/pkg/beam" "github.com/apache/beam/sdks/v2/go/pkg/beam/io/textio" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/direct" ) -// This example demonstrates how to write test pipelines for pbeam using package -// pbeamtest where pbeam does not add any noise, disables partition selection +// This example demonstrates how to write test pipelines for pbeam using test +// modes where pbeam does not add any noise, disables partition selection // and might disable or enable contribution bounding depending on the particular // test mode used. // // This mirrors the default example with two differences: -// 1. pbeamtest is used to create a PrivacySpec instead of pbeam. +// 1. TestMode is specified when creating the PrivacySpec. // 2. Code comments are different. // -// Package pbeamtest does not provide any privacy guarantees and is only meant -// to be used in test code. DO NOT use this for production code. +// Using Privacy on Beam with test mode enabled does not provide any privacy guarantees +// and is only meant to be used in test code or for performing an analysis of +// the utility of differential privacy by comparing "true" results with "private" +// results. DO NOT use this for production pipelines. func Example_testPipelines() { // This example computes the "Sum-up revenue per day of the week" example // from the Go Differential Privacy Library documentation, available at @@ -75,17 +76,26 @@ func Example_testPipelines() { // ε and δ are the differential privacy parameters that quantify the privacy // provided by the pipeline. Even though noise will not be added since we are using - // pbeamtest, ε and δ will still be used for validation of parameters; so use the + // test mode, ε and δ will still be used for validation of parameters; so use the // same parameters you use for production. const ε, δ = 1, 1e-3 - // Instead of calling pbeam.NewPrivacySpec(), we call the corresponding function in - // package pbeamtest. This is the only difference with a production pipeline with - // privacy that uses pbeam.NewPrivacySpec(), everything else remains the same. + // We enable test mode by setting TestMode field to pbeam.TestModeWithContributionBounding. + // This is the only difference with a production pipeline with privacy + // that uses pbeam.NewPrivacySpec(), everything else remains the same. // This enables per-partition and cross-partition contribution bounding. If you // wish to disable both types of contribution bounding altogether, use - // pbeamtest.NewPrivacySpecNoNoiseWithoutContributionBounding() instead. - privacySpec := pbeamtest.NewPrivacySpecNoNoiseWithContributionBounding(ε, δ) + // pbeam.TestModeWithoutContributionBounding instead. + privacySpec, err := pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + AggregationEpsilon: ε / 2, + AggregationDelta: δ / 2, + PartitionSelectionEpsilon: ε / 2, + PartitionSelectionDelta: δ / 2, + TestMode: pbeam.TestModeWithContributionBounding, + }) + if err != nil { + fmt.Printf("Couldn't create PrivacySpec: %v", err) + } pcol := pbeam.MakePrivateFromStruct(s, icol, privacySpec, "visitorID") // pcol is now a PrivatePCollection. @@ -116,7 +126,7 @@ func Example_testPipelines() { MinValue: 0, MaxValue: 50, } - // Since pbeamtest is used, this will produce a non-differentially private + // Since test mode is used, this will produce a non-differentially private // sum of revenue per day. ocol := pbeam.SumPerKey(s, pWeekdayEuros, sumParams) diff --git a/privacy-on-beam/pbeam/mean_test.go b/privacy-on-beam/pbeam/mean_test.go index 5a334bc3..5baf05f9 100644 --- a/privacy-on-beam/pbeam/mean_test.go +++ b/privacy-on-beam/pbeam/mean_test.go @@ -381,35 +381,36 @@ func TestMeanPerKeyAddsNoise(t *testing.T) { name string noiseKind NoiseKind // Differential privacy params used - epsilon float64 - delta float64 + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 2, // It is split by 2: 1 for the noise and 1 for the partition selection. - delta: 0.01, // It is split by 2: 0.005 for the noise and 0.005 for the partition selection. + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.005, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.005, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 0.2, // It is split by 2: 0.1 for the noise and 0.1 for the partition selection. - delta: 0.01, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 0.1, + partitionSelectionEpsilon: 0.1, + partitionSelectionDelta: 0.01, }, } { minValue := 0.0 maxValue := 3.0 maxPartitionsContributed, maxContributionsPerPartition := int64(1), int64(1) - partitionSelectionEpsilon, partitionSelectionDelta := tc.epsilon/2, tc.delta - if tc.noiseKind == gaussianNoise { - partitionSelectionDelta = tc.delta / 2 - } // Compute the number of IDs needed to keep the partition. sp, err := dpagg.NewPreAggSelectPartition( &dpagg.PreAggSelectPartitionOptions{ - Epsilon: partitionSelectionEpsilon, - Delta: partitionSelectionDelta, + Epsilon: tc.partitionSelectionEpsilon, + Delta: tc.partitionSelectionDelta, MaxPartitionsContributed: 1, }) if err != nil { @@ -425,7 +426,13 @@ func TestMeanPerKeyAddsNoise(t *testing.T) { p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -503,7 +510,11 @@ func TestMeanPerKeyWithPartitionsAddsNoise(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.epsilon, + AggregationDelta: tc.delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) meanParams := MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -552,8 +563,12 @@ func TestMeanPerKeyNoNoiseFloat(t *testing.T) { maxValue := 3.0 // Act - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -604,8 +619,12 @@ func TestMeanPerKeyNoNoiseInt(t *testing.T) { maxValue := 2.0 // Act - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -694,11 +713,9 @@ func TestMeanPerKeyWithPartitionsNoNoiseFloat(t *testing.T) { maxContributionsPerPartition := int64(1) maxPartitionsContributed := int64(1) epsilon := 50.0 - delta := 0.0 // Act - // ε is not split because partitions are public. - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) meanParams := MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -780,7 +797,6 @@ func TestMeanPerKeyWithPartitionsNoNoiseInt(t *testing.T) { maxContributionsPerPartition := int64(1) maxPartitionsContributed := int64(1) epsilon := 50.0 - delta := 0.0 // Using Laplace noise, and partitions are public. result := []testutils.PairIF64{ // Partition 0 will be dropped because it's not in the list of public partitions. @@ -798,8 +814,7 @@ func TestMeanPerKeyWithPartitionsNoNoiseInt(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - // ε is not split, because partitions are public. - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) meanParams := MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -857,8 +872,12 @@ func TestMeanPerKeyCountsPrivacyUnitIDsWithMultipleContributionsCorrectly(t *tes minValue := 1.0 maxValue := 3.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -880,21 +899,22 @@ func TestMeanPerKeyCountsPrivacyUnitIDsWithMultipleContributionsCorrectly(t *tes } var meanPartitionSelectionTestCases = []struct { - name string - noiseKind NoiseKind - epsilon float64 - delta float64 - numPartitions int - entriesPerPartition int + name string + noiseKind NoiseKind + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 + numPartitions int + entriesPerPartition int }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in a ε=1, - // δ=0.3 partition selection budget. - epsilon: 2, - delta: 0.6, + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.3, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -903,14 +923,11 @@ var meanPartitionSelectionTestCases = []struct { numPartitions: 143, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in the - // partition selection portion of the budget being ε_selectPartition=1, - // δ_selectPartition=0.3. - epsilon: 2, - delta: 0.3, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -947,7 +964,13 @@ func TestMeanPartitionSelection(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) // Run MeanPerKey on triples - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MinValue: 0.0, @@ -992,8 +1015,12 @@ func TestMeanKeyNegativeBounds(t *testing.T) { minValue := -6.0 maxValue := -2.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -1047,8 +1074,12 @@ func TestMeanPerKeyCrossPartitionContributionBounding(t *testing.T) { minValue := 0.0 maxValue := 150.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -1112,8 +1143,12 @@ func TestMeanPerKeyPerPartitionContributionBounding(t *testing.T) { minValue := 0.0 maxValue := 100.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := MeanPerKey(s, pcol, MeanParams{ MaxContributionsPerPartition: maxContributionsPerPartition, @@ -1155,8 +1190,12 @@ func TestMeanPerKeyReturnsNonNegative(t *testing.T) { minValue := 0.0 maxValue := 1e8 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) means := MeanPerKey(s, pcol, MeanParams{ MaxContributionsPerPartition: maxContributionsPerPartition, @@ -1199,15 +1238,13 @@ func TestMeanPerKeyWithPartitionsReturnsNonNegative(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - // Using a low ε, zero δ, and a high maxValue to add a lot of noise. + // Using a low ε and a high maxValue to add a lot of noise. maxContributionsPerPartition := int64(1) epsilon := 0.001 - delta := 0.0 minValue := 0.0 maxValue := 1e8 - // ε is not split, because partitions are public. - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) meanParams := MeanParams{ MaxContributionsPerPartition: maxContributionsPerPartition, @@ -1244,8 +1281,12 @@ func TestMeanPerKeyNoClampingForNegativeMinValue(t *testing.T) { minValue := -100.0 maxValue := 100.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) means := MeanPerKey(s, pcol, MeanParams{ MaxContributionsPerPartition: maxContributionsPerPartition, @@ -1302,12 +1343,11 @@ func TestMeanPerKeyWithPartitionsCrossPartitionContributionBounding(t *testing.T maxContributionsPerPartition := int64(1) maxPartitionsContributed := int64(1) epsilon := 60.0 - delta := 0.0 // Zero delta because partitions are public. minValue := 0.0 maxValue := 150.0 // ε is not split, because partitions are public. - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) meanParams := MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -1393,7 +1433,6 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { maxContributionsPerPartition := int64(1) maxPartitionsContributed := int64(1) epsilon := 50.0 - delta := 0.0 // Using Laplace noise, and partitions are public. result := []testutils.PairIF64{ {1, midpoint}, @@ -1412,8 +1451,7 @@ func TestMeanPerKeyWithEmptyPartitionsNoNoise(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - // ε is not split, because partitions are public. - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) meanParams := MeanParams{ MaxPartitionsContributed: maxPartitionsContributed, @@ -1860,166 +1898,3 @@ func TestCheckMeanPerKeyParams(t *testing.T) { } } } - -// The logic mirrors TestMeanPerKeyNoNoiseFloat, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestMeanPerKeyNoNoiseFloatTemp(t *testing.T) { - // Arrange - triples := testutils.ConcatenateTriplesWithFloatValue( - testutils.MakeTripleWithFloatValue(7, 0, 2.0), - testutils.MakeTripleWithFloatValueStartingFromKey(7, 100, 1, 1.3), - testutils.MakeTripleWithFloatValueStartingFromKey(107, 150, 1, 2.5)) - - exactCount := 250.0 - exactMean := (1.3*100 + 2.5*150) / exactCount - result := []testutils.PairIF64{ - {1, exactMean}, - } - p, s, col, want := ptest.CreateList2(triples, result) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - - // ε=50, δ=10⁻²⁰⁰ and l0Sensitivity=1 gives a threshold of =11. - // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, - // we can have each partition fail with 10⁻²⁴ probability (k=24). - maxContributionsPerPartition := int64(1) - maxPartitionsContributed := int64(1) - epsilon := 50.0 - delta := 1e-200 - minValue := 1.0 - maxValue := 3.0 - - // Act - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ - AggregationEpsilon: epsilon, - AggregationDelta: 0, - PartitionSelectionEpsilon: epsilon, - PartitionSelectionDelta: delta, - }) - if err != nil { - t.Fatalf("TestMeanPerKeyNoNoiseFloatTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) - got := MeanPerKey(s, pcol, MeanParams{ - MaxPartitionsContributed: maxPartitionsContributed, - MaxContributionsPerPartition: maxContributionsPerPartition, - MinValue: minValue, - MaxValue: maxValue, - NoiseKind: LaplaceNoise{}, - }) - - // Assert - want = beam.ParDo(s, testutils.PairIF64ToKV, want) - tolerance, err := testutils.LaplaceToleranceForMean(24, minValue, maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, 5, exactCount, exactMean) - if err != nil { - t.Fatalf("LaplaceToleranceForMean: got error %v", err) - } - testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) - if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyNoNoiseFloatTemp: MeanPerKey(%v) = %v, want %v, error %v", col, got, want, err) - } -} - -// The logic mirrors TestMeanPerKeyWithPartitionsNoNoiseInt, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestMeanPerKeyWithPartitionsNoNoiseIntTemp(t *testing.T) { - for _, tc := range []struct { - minValue float64 - maxValue float64 - inMemory bool - }{ - { - minValue: 1.0, - maxValue: 3.0, - inMemory: false, - }, - { - minValue: 1.0, - maxValue: 3.0, - inMemory: true, - }, - { - minValue: 0.0, - maxValue: 2.0, - inMemory: false, - }, - { - minValue: 0.0, - maxValue: 2.0, - inMemory: true, - }, - { - minValue: -10.0, - maxValue: 10.0, - inMemory: false, - }, - { - minValue: -10.0, - maxValue: 10.0, - inMemory: true, - }, - } { - // Arrange - triples := testutils.ConcatenateTriplesWithIntValue( - testutils.MakeTripleWithIntValue(7, 0, 2), - testutils.MakeTripleWithIntValueStartingFromKey(7, 100, 1, 1), - testutils.MakeTripleWithIntValueStartingFromKey(107, 150, 1, 2), - ) - - exactCount := 250.0 - exactMean := (100.0 + 2.0*150.0) / exactCount - - // We have ε=50, δ=0 and l0Sensitivity=1. - // We do not use thresholding because partitions are public. - // We have 1 partition. So, to get an overall flakiness of 10⁻²³, - // we can have each partition fail with 10⁻²³ probability (k=23). - maxContributionsPerPartition := int64(1) - maxPartitionsContributed := int64(1) - epsilon := 50.0 - - result := []testutils.PairIF64{ - // Partition 0 will be dropped because it's not in the list of public partitions. - {1, exactMean}, - } - publicPartitionsSlice := []int{1} - - p, s, col, want := ptest.CreateList2(triples, result) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - want = beam.ParDo(s, testutils.PairIF64ToKV, want) - - var publicPartitions any - if tc.inMemory { - publicPartitions = publicPartitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicPartitionsSlice) - } - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, AggregationDelta: 0}) - if err != nil { - t.Fatalf("TestMeanPerKeyWithPartitionsNoNoiseIntTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - meanParams := MeanParams{ - MaxPartitionsContributed: maxPartitionsContributed, - MaxContributionsPerPartition: maxContributionsPerPartition, - MinValue: tc.minValue, - MaxValue: tc.maxValue, - NoiseKind: LaplaceNoise{}, - PublicPartitions: publicPartitions, - } - - // Act - got := MeanPerKey(s, pcol, meanParams) - - // Assert - exactNormalizedSum := (1.0-(tc.maxValue+tc.minValue)/2)*100 + (2.0-(tc.maxValue+tc.minValue)/2)*150 - tolerance, err := testutils.LaplaceToleranceForMean(23, tc.minValue, tc.maxValue, maxContributionsPerPartition, maxPartitionsContributed, epsilon, exactNormalizedSum, exactCount, exactMean) - if err != nil { - t.Fatalf("LaplaceToleranceForMean: test case=%+v got error %v", tc, err) - } - testutils.ApproxEqualsKVFloat64(t, s, got, want, tolerance) - if err := ptest.Run(p); err != nil { - t.Errorf("TestMeanPerKeyWithPartitionsNoNoiseIntTemp test case=%+v: MeanPerKey(%v) = %v, want %v, error %v", tc, col, got, want, err) - } - } -} diff --git a/privacy-on-beam/pbeam/pardo_test.go b/privacy-on-beam/pbeam/pardo_test.go index c314bead..a41f82ba 100644 --- a/privacy-on-beam/pbeam/pardo_test.go +++ b/privacy-on-beam/pbeam/pardo_test.go @@ -147,7 +147,7 @@ func TestParDo1x1(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→26 and 99→5 in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -165,7 +165,7 @@ func TestParDo1x2(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -203,7 +203,7 @@ func TestParDoCtx1x2(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -238,7 +238,7 @@ func TestParDo1x2Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -288,7 +288,7 @@ func TestParDoCtx1x2Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -315,7 +315,7 @@ func TestParDo2x1(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -350,7 +350,7 @@ func TestParDoCtx2x1(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -381,7 +381,7 @@ func TestParDo2x1Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -427,7 +427,7 @@ func TestParDoCtx2x1Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -452,7 +452,7 @@ func TestParDo2x2(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -490,7 +490,7 @@ func TestParDoCtx2x2(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -525,7 +525,7 @@ func TestParDo2x2Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -575,7 +575,7 @@ func TestParDoCtx2x2Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -646,7 +646,7 @@ func TestParDoCtx1x1(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // If the context was cancelled, we change that to 17→0 and 99→0 in the PrivatePCollection, // Otherwise, we change that to 17→26 and 99→5 pcol = ParDo(s, doFnWithContext, pcol) @@ -666,7 +666,7 @@ func TestParDo1x1Err(t *testing.T) { p, s, col, wantCol := ptest.CreateList2(values, goodResult) colKV := beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) passert.Equals(s, gotCol, wantCol) @@ -682,7 +682,7 @@ func TestParDo1x1ErrReturnsError(t *testing.T) { p, s, col, wantCol := ptest.CreateList2(values, zeroResult) colKV := beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) passert.Equals(s, gotCol, wantCol) @@ -711,7 +711,7 @@ func TestParDoCtx1x1Err(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // If the context was cancelled, we change that to 17→0 and 99→0 in the PrivatePCollection, // Otherwise, we change that to 17→26 and 99→5 pcol = ParDo(s, doFn, pcol) @@ -743,7 +743,7 @@ func TestParDo1x1Emit(t *testing.T) { p, s, col := ptest.CreateList(values) colKV := beam.ParDo(s, testutils.PairToKV, col) - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) pcol = ParDo(s, doFnPair, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) passert.Equals(s, gotCol, col) @@ -752,7 +752,7 @@ func TestParDo1x1Emit(t *testing.T) { } // Check for values with ctx passed in doFn - pcol = MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol = MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) pcol = ParDo(s, doFnPairWithCtx, pcol) gotCol = beam.ParDo(s, testutils.KVToPair, pcol.col) passert.Equals(s, gotCol, col) @@ -783,7 +783,7 @@ func TestParDo1x1ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→26 and 99→5 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -831,7 +831,7 @@ func TestParDoCtx1x1ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→26 and 99→5 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -887,7 +887,7 @@ func TestParDo1x2Emit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -926,7 +926,7 @@ func TestParDoCtx1x2Emit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -962,7 +962,7 @@ func TestParDo1x2ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -1014,7 +1014,7 @@ func TestParDoCtx1x2ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -1073,7 +1073,7 @@ func TestParDo2x1Emit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -1108,7 +1108,7 @@ func TestParDoCtx2x1Emit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -1140,7 +1140,7 @@ func TestParDo2x1ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -1188,7 +1188,7 @@ func TestParDoCtx2x1ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→106 and 99→1 in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPair, pcol.col) @@ -1245,7 +1245,7 @@ func TestParDo2x2Emit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -1284,7 +1284,7 @@ func TestParDoCtx2x2Emit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -1320,7 +1320,7 @@ func TestParDo2x2ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) @@ -1372,7 +1372,7 @@ func TestParDoCtx2x2ErrEmit(t *testing.T) { colKV := beam.ParDo(s, testutils.PairICodedKVToKV, col) // pcol should contain 17→ and 99→. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) // We change that to 17→ and 99→ in the PrivatePCollection pcol = ParDo(s, tc.doFn, pcol) gotCol := beam.ParDo(s, testutils.KVToPairICodedKV, pcol.col) diff --git a/privacy-on-beam/pbeam/pbeam_main_test.go b/privacy-on-beam/pbeam/pbeam_main_test.go index 8e192536..b368c202 100644 --- a/privacy-on-beam/pbeam/pbeam_main_test.go +++ b/privacy-on-beam/pbeam/pbeam_main_test.go @@ -25,5 +25,15 @@ func TestMain(m *testing.M) { ptest.MainWithDefault(m, "direct") } -// Used in various tests. +// Below are used in various tests. var gaussianNoise = GaussianNoise{} + +// Helper function to create a PrivacySpec that deals with error handling. +func privacySpec(t *testing.T, params PrivacySpecParams) *PrivacySpec { + t.Helper() + spec, err := NewPrivacySpecTemp(params) + if err != nil { + t.Fatalf("Failed to create PrivacySpec") + } + return spec +} diff --git a/privacy-on-beam/pbeam/pbeam_test.go b/privacy-on-beam/pbeam/pbeam_test.go index 32871cfc..499cd988 100644 --- a/privacy-on-beam/pbeam/pbeam_test.go +++ b/privacy-on-beam/pbeam/pbeam_test.go @@ -171,7 +171,7 @@ func TestMakePrivate(t *testing.T) { colKV := beam.ParDo(s, testutils.PairToKV, col) // pcol should contain 17→42 and 99→0. - pcol := MakePrivate(s, colKV, NewPrivacySpec(1, 1e-10)) + pcol := MakePrivate(s, colKV, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1})) got := beam.ParDo(s, testutils.KVToPair, pcol.col) passert.Equals(s, got, col) if err := ptest.Run(p); err != nil { @@ -248,7 +248,7 @@ func TestMakePrivateFromStruct(t *testing.T) { } { p, s, col, want := ptest.CreateList2(tc.values, tc.want) - pcol := MakePrivateFromStruct(s, col, NewPrivacySpec(1, 1e-10), tc.idFieldPath) + pcol := MakePrivateFromStruct(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1}), tc.idFieldPath) got := beam.ParDo(s, kvToStructPair, pcol.col) passert.Equals(s, got, want) if err := ptest.Run(p); err != nil { @@ -325,7 +325,7 @@ func TestMakePrivateFromProto(t *testing.T) { } p, s, col, want := ptest.CreateList2(values, result) - pcol := MakePrivateFromProto(s, col, NewPrivacySpec(1, 1e-10), "foo") + pcol := MakePrivateFromProto(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1}), "foo") got := beam.ParDo(s, kvToProtoPair, pcol.col) passert.Equals(s, got, want) if err := ptest.Run(p); err != nil { @@ -483,7 +483,7 @@ func TestBudgetFullyConsumed(t *testing.T) { } p, s, col := ptest.CreateList(values) colKV := beam.ParDo(s, testutils.PairToKV, col) - spec := NewPrivacySpec(1, 1e-30) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1, PartitionSelectionEpsilon: 1, PartitionSelectionDelta: 1e-30}) pcol := MakePrivate(s, colKV, spec) got := Count(s, pcol, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) passert.Empty(s, got) @@ -491,7 +491,10 @@ func TestBudgetFullyConsumed(t *testing.T) { t.Errorf("expected no error but got error: %v", err) } // Try consuming 1% of the initial budget. - if eps, del, err := spec.budget.consume(0.01, 1e-32); err == nil { + if eps, del, err := spec.aggregationBudget.consume(0.01, 0); err == nil { + t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del) + } + if eps, del, err := spec.partitionSelectionBudget.consume(0.01, 1e-32); err == nil { t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del) } } @@ -504,8 +507,8 @@ func TestTwoDistinctBudgets(t *testing.T) { } p, s, col := ptest.CreateList(values) colKV := beam.ParDo(s, testutils.PairToKV, col) - spec1 := NewPrivacySpec(1, 1e-30) - spec2 := NewPrivacySpec(1, 1e-30) + spec1 := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1, PartitionSelectionEpsilon: 1, PartitionSelectionDelta: 1e-30}) + spec2 := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1, PartitionSelectionEpsilon: 1, PartitionSelectionDelta: 1e-30}) pcol1 := MakePrivate(s, colKV, spec1) pcol2 := MakePrivate(s, colKV, spec2) got1 := Count(s, pcol1, CountParams{MaxValue: 1, MaxPartitionsContributed: 1, NoiseKind: LaplaceNoise{}}) @@ -516,11 +519,17 @@ func TestTwoDistinctBudgets(t *testing.T) { t.Errorf("expected no error but got error: %v", err) } // Try consuming 1% of the initial budget independently for ε and δ. - if eps, del, err := spec1.budget.consume(0, 1e-32); err == nil { - t.Errorf("expected spec1 to be out of budget, but could consume (%f,%e) without any error", eps, del) + if eps, del, err := spec1.aggregationBudget.consume(0.01, 0); err == nil { + t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del) + } + if eps, del, err := spec1.partitionSelectionBudget.consume(0.01, 1e-32); err == nil { + t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del) + } + if eps, del, err := spec2.aggregationBudget.consume(0.01, 0); err == nil { + t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del) } - if eps, del, err := spec2.budget.consume(0.01, 0); err == nil { - t.Errorf("expected spec2 to be out of budget, but could consume (%f,%e) without any error", eps, del) + if eps, del, err := spec2.partitionSelectionBudget.consume(0.01, 1e-32); err == nil { + t.Errorf("expected spec to be out of budget, but could consume (%f,%e) without any error", eps, del) } } @@ -557,7 +566,7 @@ func TestDropKey(t *testing.T) { } p, s, col, want := ptest.CreateList2(values, result) colKV := beam.ParDo(s, testutils.PairToKV, col) - spec := NewPrivacySpec(1e10, 0) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e10}) pcol := MakePrivate(s, colKV, spec) pcol = ParDo(s, addZeroIntKeyFn, pcol) pcol = DropKey(s, pcol) @@ -589,7 +598,7 @@ func TestDropValue(t *testing.T) { } p, s, col, want := ptest.CreateList2(values, result) colKV := beam.ParDo(s, testutils.PairToKV, col) - spec := NewPrivacySpec(1e10, 0) + spec := privacySpec(t, PrivacySpecParams{AggregationEpsilon: 1e10}) pcol := MakePrivate(s, colKV, spec) pcol = ParDo(s, addZeroIntValueFn, pcol) pcol = DropValue(s, pcol) diff --git a/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go b/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go index e586aa9e..8ae2bca4 100644 --- a/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go +++ b/privacy-on-beam/pbeam/pbeamtest/pbeamtest_test.go @@ -29,7 +29,7 @@ import ( ) func TestMain(m *testing.M) { - ptest.Main(m) + ptest.MainWithDefault(m, "direct") } const ( @@ -40,6 +40,29 @@ const ( zeroDelta = 0.0 ) +func privacySpec(t *testing.T, testMode pbeam.TestMode, publicPartitions bool) *pbeam.PrivacySpec { + t.Helper() + var spec *pbeam.PrivacySpec + var err error + if publicPartitions { + spec, err = pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + AggregationEpsilon: tinyEpsilon, + TestMode: testMode, + }) + } else { + spec, err = pbeam.NewPrivacySpecTemp(pbeam.PrivacySpecParams{ + AggregationEpsilon: tinyEpsilon / 2, + PartitionSelectionEpsilon: tinyEpsilon / 2, + PartitionSelectionDelta: tinyDelta, + TestMode: testMode, + }) + } + if err != nil { + t.Fatalf("NewPrivacySpecTemp: %v", err) + } + return spec +} + // Tests that DistinctPrivacyID bounds per-partition and cross-partition contributions // correctly, adds no noise and keeps all partitions in test mode. func TestDistinctPrivacyIDTestMode(t *testing.T) { @@ -51,7 +74,7 @@ func TestDistinctPrivacyIDTestMode(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 3, // The same privacy ID contributes to 10 partitions, which implies that count of each // partition is 1. With a max contribution of 3, 7 partitions should be dropped. The sum @@ -61,7 +84,7 @@ func TestDistinctPrivacyIDTestMode(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 3, // The same privacy ID contributes to 10 partitions, which implies that count of each // partition is 1. Contribution bounding is disabled. The sum of all counts must then be 10. @@ -108,7 +131,7 @@ func TestDistinctPrivacyIDWithPartitionsTestMode(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxPartitionsContributed: 3, // The same privacy ID contributes to 10 partitions, which implies that count of each // partition is 1. With a max contribution of 3, 2 out of 5 public partitions should be @@ -117,7 +140,7 @@ func TestDistinctPrivacyIDWithPartitionsTestMode(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxPartitionsContributed: 3, // The same privacy ID contributes to 10 partitions, which implies that count of each // partition is 1. Contribution bounding is disabled and 5 out of 10 partitions are @@ -166,11 +189,11 @@ func TestDistinctPrivacyIDWithPartitionsTestModeAddsEmptyPartitions(t *testing.T }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), }, } { // pairs{privacy_id, partition_key} contains {0,0}, {0,1}, {0,2}, …, {0,9}. @@ -215,7 +238,7 @@ func TestCountTestMode(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 3, maxValue: 2, // The same privacy ID contributes twice (third contribution is dropped due per-partition @@ -229,7 +252,7 @@ func TestCountTestMode(t *testing.T) { desc: "test mode without contribution bounding", maxPartitionsContributed: 3, maxValue: 2, - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), // The same privacy ID contributes thrice to 10 partitions, which implies that count of each // partition is 3. Contribution bounding is disabled. The sum of all counts must then be 30. // This also ensures that no partitions (each with a single privacy id) gets thresholded. @@ -279,7 +302,7 @@ func TestCountWithPartitionsTestMode(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxPartitionsContributed: 3, maxValue: 2, // The same privacy ID contributes twice (third contribution is dropped due per-partition @@ -290,7 +313,7 @@ func TestCountWithPartitionsTestMode(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxPartitionsContributed: 3, maxValue: 2, // The same privacy ID contributes thrice to 10 partitions, which implies that count of each @@ -342,11 +365,11 @@ func TestCountWithPartitionsTestModeAddsEmptyPartitions(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), }, } { var pairs []testutils.PairII @@ -390,7 +413,7 @@ func TestSumPerKeyTestModeInt(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -402,7 +425,7 @@ func TestSumPerKeyTestModeInt(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -458,7 +481,7 @@ func TestSumPerKeyWithPartitionsTestModeInt(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -469,7 +492,7 @@ func TestSumPerKeyWithPartitionsTestModeInt(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -523,11 +546,11 @@ func TestSumPerKeyWithPartitionsTestModeAddsEmptyPartitionsInt(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), }, } { // triples{privacy_id, partition_key, value} contains {0,0,1}, {0,1,1}, {0,2,1}, …, {0,9,1}. @@ -573,7 +596,7 @@ func TestSumPerKeyTestModeFloat(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -585,7 +608,7 @@ func TestSumPerKeyTestModeFloat(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -641,7 +664,7 @@ func TestSumPerKeyWithPartitionsTestModeFloat(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -652,7 +675,7 @@ func TestSumPerKeyWithPartitionsTestModeFloat(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxPartitionsContributed: 3, minValue: 0.0, maxValue: 1.0, @@ -706,11 +729,11 @@ func TestSumPerKeyWithPartitionsTestModeAddsEmptyPartitionsFloat(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), }, } { // triples{privacy_id, partition_key, value} contains {0,0,1}, {0,1,1}, {0,2,1}, …, {0,9,1}. @@ -753,7 +776,7 @@ func TestMeanPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each // partition is 1.0. With a max contribution of 3, 7 partitions should be dropped. The sum @@ -763,7 +786,7 @@ func TestMeanPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each // partition is 1.0. Cross-partition contribution bounding is disabled. The sum of all means @@ -815,7 +838,7 @@ func TestMeanPerKeyTestModePerPartitionContributionBounding(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 50.0, @@ -827,7 +850,7 @@ func TestMeanPerKeyTestModePerPartitionContributionBounding(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 50.0, @@ -876,7 +899,7 @@ func TestMeanPerKeyWithPartitionsTestModeCrossPartitionContributionBounding(t *t }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each // partition is 1.0. With a max contribution of 3, 2 out of 5 public partitions should be @@ -885,7 +908,7 @@ func TestMeanPerKeyWithPartitionsTestModeCrossPartitionContributionBounding(t *t }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each // partition is 1.0. Cross-partition contribution bounding is disabled and 5 out of 10 partitions @@ -940,7 +963,7 @@ func TestMeanPerKeyWithPartitionsTestModePerPartitionContributionBoundingAddsEmp }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 50.0, @@ -952,7 +975,7 @@ func TestMeanPerKeyWithPartitionsTestModePerPartitionContributionBoundingAddsEmp }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 50.0, @@ -1009,7 +1032,7 @@ func TestQuantilesPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxContributionsPerPartition: 20, maxPartitionsContributed: 1, minValue: 0.0, @@ -1025,7 +1048,7 @@ func TestQuantilesPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxContributionsPerPartition: 20, maxPartitionsContributed: 1, minValue: 0.0, @@ -1090,7 +1113,7 @@ func TestQuantilesPerKeyWithPartitionsTestModeCrossPartitionContributionBounding }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxContributionsPerPartition: 20, maxPartitionsContributed: 1, minValue: 0.0, @@ -1106,7 +1129,7 @@ func TestQuantilesPerKeyWithPartitionsTestModeCrossPartitionContributionBounding }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxContributionsPerPartition: 20, maxPartitionsContributed: 1, minValue: 0.0, @@ -1172,7 +1195,7 @@ func TestQuantilesPerKeyTestModePerPartitionContributionBounding(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 1.0, @@ -1186,7 +1209,7 @@ func TestQuantilesPerKeyTestModePerPartitionContributionBounding(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 1.0, @@ -1241,7 +1264,7 @@ func TestQuantilesPerKeyWithPartitionsTestModePerPartitionContributionBounding(t }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 1.0, @@ -1255,7 +1278,7 @@ func TestQuantilesPerKeyWithPartitionsTestModePerPartitionContributionBounding(t }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxContributionPerPartition: 1, minValue: 0.0, maxValue: 1.0, @@ -1307,11 +1330,11 @@ func TestQuantilesPerKeyWithPartitionsAppliesPublicPartitions(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), }, } { triples := testutils.ConcatenateTriplesWithFloatValue( @@ -1378,14 +1401,14 @@ func TestSelectPartitionsTestModeCrossPartitionContributionBoundingV(t *testing. }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 1, // With a max contribution of 1, only 1 partition should be outputted. want: 1, }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 1, // Cross-partition contribution bounding is disabled, so all 10 partitions should be outputted. want: 10, @@ -1420,14 +1443,14 @@ func TestSelectPartitionsTestModeCrossPartitionContributionBoundingKV(t *testing }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 1, // With a max contribution of 1, only 1 partition should be outputted. want: 1, }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 1, // Cross-partition contribution bounding is disabled, so all 10 partitions should be outputted. want: 10, @@ -1463,7 +1486,7 @@ func TestDistinctPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxPartitionsContributed: 3, // The same privacy ID contributes once to 10 partitions, which implies that count of each // partition is 1. With a max contribution of 3, 7 partitions should be dropped. The sum of @@ -1473,7 +1496,7 @@ func TestDistinctPerKeyTestModeCrossPartitionContributionBounding(t *testing.T) }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxPartitionsContributed: 3, // The same privacy ID contributes once to 10 partitions, which implies that count of each // partition is 3. Cross-partition contribution bounding is disabled. The sum of all counts @@ -1521,7 +1544,7 @@ func TestDistinctPerKeyTestModePerPartitionContributionBounding(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, false), maxContributionsPerPartition: 3, // MaxContributionsPerPartition = 3, but id = 0 contributes 10 distinct values to partition 0. // There will be a per-partition contribution bounding stage and only 3 of 10 distinct values @@ -1531,7 +1554,7 @@ func TestDistinctPerKeyTestModePerPartitionContributionBounding(t *testing.T) { }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, tinyDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, false), maxContributionsPerPartition: 3, // MaxContributionsPerPartition = 3, but id = 0 contributes 10 distinct values to partition 0. // There will not be a per-partition contribution bounding stage, so all 10 distinct values will @@ -1580,7 +1603,7 @@ func TestDistinctPerKeyWithPartitionsTestModeCrossPartitionContributionBounding( }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each // partition is 1.0. With a max contribution of 3, 2 out of 5 public partitions should be @@ -1590,7 +1613,7 @@ func TestDistinctPerKeyWithPartitionsTestModeCrossPartitionContributionBounding( }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), maxPartitionsContributed: 3, // The same privacy ID contributes "1.0" to 10 partitions, which implies that mean of each // partition is 1.0. Cross-partition contribution bounding is disabled and 5 out of 10 partitions @@ -1640,11 +1663,11 @@ func TestDistinctPerKeyWithPartitionsTestModeEmptyPartitionsInt(t *testing.T) { }{ { desc: "test mode with contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithContributionBounding, true), }, { desc: "test mode without contribution bounding", - privacySpec: NewPrivacySpecNoNoiseWithoutContributionBounding(tinyEpsilon, zeroDelta), + privacySpec: privacySpec(t, pbeam.TestModeWithoutContributionBounding, true), }, } { // triples{privacy_id, partition_key, value} contains {0,0,1}, {0,1,1}, {0,2,1}, …, {0,9,1}. diff --git a/privacy-on-beam/pbeam/public_partitions_test.go b/privacy-on-beam/pbeam/public_partitions_test.go index 42d54966..eddef57a 100644 --- a/privacy-on-beam/pbeam/public_partitions_test.go +++ b/privacy-on-beam/pbeam/public_partitions_test.go @@ -95,8 +95,8 @@ func TestDropNonPublicPartitionsVFn(t *testing.T) { partitions := []int{0, 2} partitionsCol := beam.CreateList(s, partitions) - epsilon, delta := 50.0, 1e-200 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon := 50.0 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) got := dropNonPublicPartitionsVFn(s, partitionsCol, pcol) testutils.EqualsKVInt(t, s, got, want) if err := ptest.Run(p); err != nil { @@ -127,16 +127,16 @@ func TestDropNonPublicPartitionsKVFn(t *testing.T) { partitionsCol := beam.CreateList(s, []int{0, 2, 3, 4, 5, 6, 9, 10}) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) col2 = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col2) - epsilon, delta := 50.0, 1e-200 + epsilon := 50.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) idT, _ := beam.ValidateKVType(pcol.col) got := dropNonPublicPartitionsKVFn(s, partitionsCol, pcol, idT) got = beam.SwapKV(s, got) - pcol2 := MakePrivate(s, col2, NewPrivacySpec(epsilon, delta)) + pcol2 := MakePrivate(s, col2, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol2 = ParDo(s, testutils.TripleWithIntValueToKV, pcol2) want := pcol2.col want = beam.SwapKV(s, want) @@ -171,16 +171,16 @@ func TestDropNonPublicPartitionsFloat(t *testing.T) { partitionsCol := beam.CreateList(s, []int{0, 2, 3, 4, 5, 6, 7}) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) col2 = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col2) - epsilon, delta := 50.0, 1e-200 + epsilon := 50.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) idT, _ := beam.ValidateKVType(pcol.col) got := dropNonPublicPartitionsKVFn(s, partitionsCol, pcol, idT) got = beam.SwapKV(s, got) - pcol2 := MakePrivate(s, col2, NewPrivacySpec(epsilon, delta)) + pcol2 := MakePrivate(s, col2, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol2 = ParDo(s, testutils.TripleWithFloatValueToKV, pcol2) want := pcol2.col want = beam.SwapKV(s, want) diff --git a/privacy-on-beam/pbeam/quantiles_test.go b/privacy-on-beam/pbeam/quantiles_test.go index b8d88578..c68c7826 100644 --- a/privacy-on-beam/pbeam/quantiles_test.go +++ b/privacy-on-beam/pbeam/quantiles_test.go @@ -389,20 +389,24 @@ func TestQuantilesPerKeyAddsNoise(t *testing.T) { name string noiseKind NoiseKind // Differential privacy params used - epsilon float64 - delta float64 + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 0.1, // It is split in two: 0.05 for the noise and 0.05 for the partition selection. - delta: 2e-3, // It is split in two: 1e-3 for the noise and 1e-3 for the partition selection. - }, + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 0.05, + aggregationDelta: 1e-3, + partitionSelectionEpsilon: 0.05, + partitionSelectionDelta: 1e-3}, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 0.1, // It is split in two: 0.05 for the noise and 0.05 for the partition selection. - delta: 1e-3, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 0.05, + partitionSelectionEpsilon: 0.05, + partitionSelectionDelta: 1e-3, }, } { ranks := []float64{0.50} @@ -417,28 +421,30 @@ func TestQuantilesPerKeyAddsNoise(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) // Use twice epsilon & delta because we compute Quantiles twice. - pcol := MakePrivate(s, col, NewPrivacySpec(2*tc.epsilon, 2*tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: 2 * tc.aggregationEpsilon, + AggregationDelta: 2 * tc.aggregationDelta, + PartitionSelectionEpsilon: 2 * tc.partitionSelectionEpsilon, + PartitionSelectionDelta: 2 * tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) - got1 := QuantilesPerKey(s, pcol, QuantilesParams{ - Epsilon: tc.epsilon, - Delta: tc.delta, - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 1, - MinValue: 0.0, - MaxValue: 2.0, - NoiseKind: tc.noiseKind, - Ranks: ranks, - }) - got2 := QuantilesPerKey(s, pcol, QuantilesParams{ - Epsilon: tc.epsilon, - Delta: tc.delta, + quantilesParams := QuantilesParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionParams: PartitionSelectionParams{ + Epsilon: tc.partitionSelectionEpsilon, + Delta: tc.partitionSelectionDelta, + }, MaxPartitionsContributed: 1, MaxContributionsPerPartition: 1, MinValue: 0.0, MaxValue: 2.0, NoiseKind: tc.noiseKind, Ranks: ranks, - }) + } + got1 := QuantilesPerKey(s, pcol, quantilesParams) + got2 := QuantilesPerKey(s, pcol, quantilesParams) got1 = beam.ParDo(s, testutils.DereferenceFloat64Slice, got1) got2 = beam.ParDo(s, testutils.DereferenceFloat64Slice, got2) @@ -505,11 +511,15 @@ func TestQuantilesWithPartitionsPerKeyAddsNoise(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) // Use twice epsilon & delta because we compute Quantiles twice. - pcol := MakePrivate(s, col, NewPrivacySpec(2*tc.epsilon, 2*tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: 2 * tc.epsilon, + AggregationDelta: 2 * tc.delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) quantilesParams := QuantilesParams{ - Epsilon: tc.epsilon, - Delta: tc.delta, + AggregationEpsilon: tc.epsilon, + AggregationDelta: tc.delta, MaxPartitionsContributed: 100, MaxContributionsPerPartition: 100, MinValue: 0.0, @@ -551,8 +561,12 @@ func TestQuantilesPerKeyNoNoise(t *testing.T) { ranks := []float64{0.00, 0.25, 0.75, 1.00} // Act - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := QuantilesPerKey(s, pcol, QuantilesParams{ MaxPartitionsContributed: 1, @@ -590,7 +604,6 @@ func TestQuantilesPerKeyWithPartitionsNoNoise(t *testing.T) { p, s, col, want := ptest.CreateList2(triples, wantMetric) epsilon := 900.0 - delta := 0.0 lower := 0.0 upper := 5.0 ranks := []float64{0.00, 0.25, 0.75, 1.00} @@ -605,7 +618,7 @@ func TestQuantilesPerKeyWithPartitionsNoNoise(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) // Act - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) quantilesParams := QuantilesParams{ MaxPartitionsContributed: 1, @@ -643,7 +656,6 @@ func TestQuantilesPerKeyWithPartitionsAppliesPublicPartitions(t *testing.T) { p, s, col := ptest.CreateList(triples) epsilon := 900.0 - delta := 0.0 lower := 0.0 upper := 5.0 ranks := []float64{0.00, 0.25, 0.75, 1.00} @@ -657,7 +669,7 @@ func TestQuantilesPerKeyWithPartitionsAppliesPublicPartitions(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) quantilesParams := QuantilesParams{ MaxPartitionsContributed: 1, @@ -686,21 +698,22 @@ func TestQuantilesPerKeyWithPartitionsAppliesPublicPartitions(t *testing.T) { } var quantilesPartitionSelectionTestCases = []struct { - name string - noiseKind NoiseKind - epsilon float64 - delta float64 - numPartitions int - entriesPerPartition int + name string + noiseKind NoiseKind + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 + numPartitions int + entriesPerPartition int }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in a ε=1, - // δ=0.3 partition selection budget. - epsilon: 2, - delta: 0.6, + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.3, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -709,14 +722,11 @@ var quantilesPartitionSelectionTestCases = []struct { numPartitions: 143, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in the - // partition selection portion of the budget being ε_selectPartition=1, - // δ_selectPartition=0.3. - epsilon: 2, - delta: 0.3, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -754,7 +764,13 @@ func TestQuantilesPartitionSelection(t *testing.T) { // Run QuantilesPerKey on triples ranks := []float64{0.00, 0.25, 0.75, 1.00} - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := QuantilesPerKey(s, pcol, QuantilesParams{ MinValue: 0.0, @@ -804,8 +820,12 @@ func TestQuantilesPerKeyCrossPartitionContributionBounding(t *testing.T) { delta := 1e-200 ranks := []float64{0.60} - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := QuantilesPerKey(s, pcol, QuantilesParams{ MaxPartitionsContributed: 1, @@ -857,7 +877,6 @@ func TestQuantilesPerKeyWithPartitionsCrossPartitionContributionBounding(t *test p, s, col, want := ptest.CreateList2(triples, wantMetric) epsilon := 900.0 - delta := 0.0 ranks := []float64{0.60} publicPartitionsSlice := []int{0, 1} var publicPartitions any @@ -869,7 +888,7 @@ func TestQuantilesPerKeyWithPartitionsCrossPartitionContributionBounding(t *test col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) quantilesParams := QuantilesParams{ MaxPartitionsContributed: 1, @@ -916,8 +935,12 @@ func TestQuantilesPerKeyPerPartitionContributionBounding(t *testing.T) { upper := 5.0 ranks := []float64{0.49, 0.51} - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := QuantilesPerKey(s, pcol, QuantilesParams{ MaxPartitionsContributed: 1, @@ -957,7 +980,6 @@ func TestQuantilesPerKeyWithPartitionsPerPartitionContributionBounding(t *testin p, s, col, want := ptest.CreateList2(triples, wantMetric) epsilon := 900.0 - delta := 0.0 lower := 0.0 upper := 5.0 ranks := []float64{0.49, 0.51} @@ -971,8 +993,7 @@ func TestQuantilesPerKeyWithPartitionsPerPartitionContributionBounding(t *testin col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) quantilesParams := QuantilesParams{ MaxPartitionsContributed: 1, @@ -1011,8 +1032,12 @@ func TestQuantilesPerKeyAppliesClamping(t *testing.T) { upper := 5.0 ranks := []float64{0.00, 0.25, 0.75, 1.00} - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := QuantilesPerKey(s, pcol, QuantilesParams{ MaxPartitionsContributed: 1, @@ -1048,7 +1073,6 @@ func TestQuantilesPerKeyWithPartitionsAppliesClamping(t *testing.T) { p, s, col, want := ptest.CreateList2(triples, wantMetric) epsilon := 900.0 - delta := 0.0 lower := 0.0 upper := 5.0 ranks := []float64{0.00, 0.25, 0.75, 1.00} @@ -1062,8 +1086,7 @@ func TestQuantilesPerKeyWithPartitionsAppliesClamping(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - // ε is split in two for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) quantilesParams := QuantilesParams{ MaxPartitionsContributed: 1, @@ -1575,115 +1598,6 @@ func TestCheckQuantilesPerKeyParams(t *testing.T) { } } -// The logic mirrors TestQuantilesPerKeyNoNoise, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestQuantilesPerKeyNoNoiseTemp(t *testing.T) { - // Arrange - triples := testutils.ConcatenateTriplesWithFloatValue( - testutils.MakeTripleWithFloatValue(100, 0, 1.0), - testutils.MakeTripleWithFloatValue(100, 0, 4.0)) - - wantMetric := []testutils.PairIF64Slice{ - {0, []float64{1.0, 1.0, 4.0, 4.0}}, - } - p, s, col, want := ptest.CreateList2(triples, wantMetric) - want = beam.ParDo(s, testutils.PairIF64SliceToKV, want) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - - // ε=900, δ=10⁻²⁰⁰ and l0Sensitivity=1 gives a threshold of =2. - epsilon := 900.0 - delta := 1e-200 - lower := 0.0 - upper := 5.0 - ranks := []float64{0.00, 0.25, 0.75, 1.00} - - // Act - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ - AggregationEpsilon: epsilon, - AggregationDelta: 0, - PartitionSelectionEpsilon: epsilon, - PartitionSelectionDelta: delta, - }) - if err != nil { - t.Fatalf("TestQuantilesPerKeyNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) - got := QuantilesPerKey(s, pcol, QuantilesParams{ - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 2, - MinValue: lower, - MaxValue: upper, - Ranks: ranks, - }) - - // Assert - testutils.ApproxEqualsKVFloat64Slice(t, s, got, want, testutils.QuantilesTolerance(lower, upper)) - if err := ptest.Run(p); err != nil { - t.Errorf("QuantilesPerKey did not return approximate quantile: %v", err) - } -} - -// The logic mirrors TestQuantilesPerKeyWithPartitionsNoNoise, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestQuantilesPerKeyWithPartitionsNoNoiseTemp(t *testing.T) { - // We have two test cases, one for public partitions as a PCollection and one for public partitions as a slice (i.e., in-memory). - for _, tc := range []struct { - inMemory bool - }{ - {true}, - {false}, - } { - // Arrange - triples := testutils.ConcatenateTriplesWithFloatValue( - testutils.MakeTripleWithFloatValue(100, 0, 1.0), - testutils.MakeTripleWithFloatValue(100, 0, 4.0)) - - wantMetric := []testutils.PairIF64Slice{ - {0, []float64{1.0, 1.0, 4.0, 4.0}}, - } - p, s, col, want := ptest.CreateList2(triples, wantMetric) - want = beam.ParDo(s, testutils.PairIF64SliceToKV, want) - - epsilon := 900.0 - lower := 0.0 - upper := 5.0 - ranks := []float64{0.00, 0.25, 0.75, 1.00} - publicPartitionsSlice := []int{0} - var publicPartitions any - if tc.inMemory { - publicPartitions = publicPartitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicPartitionsSlice) - } - - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - - // Act - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, AggregationDelta: 0}) - if err != nil { - t.Fatalf("TestQuantilesPerKeyWithPartitionsNoNoiseTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) - quantilesParams := QuantilesParams{ - MaxPartitionsContributed: 1, - MaxContributionsPerPartition: 2, - MinValue: lower, - MaxValue: upper, - Ranks: ranks, - PublicPartitions: publicPartitions, - } - got := QuantilesPerKey(s, pcol, quantilesParams) - - // Assert - testutils.ApproxEqualsKVFloat64Slice(t, s, got, want, testutils.QuantilesTolerance(lower, upper)) - if err := ptest.Run(p); err != nil { - t.Errorf("QuantilesPerKey with partitions in-memory=%t did not return approximate quantile: %v", tc.inMemory, err) - } - } -} - func TestQuantilesPerKeyPreThresholding(t *testing.T) { // Arrange // ε=10⁹, δ≈1 and l0Sensitivity=1 gives a threshold of ≈1. @@ -1692,16 +1606,13 @@ func TestQuantilesPerKeyPreThresholding(t *testing.T) { lower := 0.0 upper := 5.0 ranks := []float64{0.00, 0.25, 0.75, 1.00} - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ - AggregationEpsilon: epsilon, - AggregationDelta: 0, - PartitionSelectionEpsilon: epsilon, - PartitionSelectionDelta: delta, - PreThreshold: 10, - }) - if err != nil { - t.Fatalf("TestQuantilesPerKeyPreThresholding: %v", err) - } + spec := privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + PreThreshold: 10, + }) triples := testutils.ConcatenateTriplesWithFloatValue( testutils.MakeTripleWithFloatValue(9, 0, 1.0), testutils.MakeTripleWithFloatValueStartingFromKey(10, 10, 1, 1.0), diff --git a/privacy-on-beam/pbeam/select_partitions_test.go b/privacy-on-beam/pbeam/select_partitions_test.go index fcd41a00..399ba53f 100644 --- a/privacy-on-beam/pbeam/select_partitions_test.go +++ b/privacy-on-beam/pbeam/select_partitions_test.go @@ -56,7 +56,11 @@ func TestSelectPartitionsIsNonDeterministicV(t *testing.T) { col = beam.ParDo(s, testutils.PairToKV, col) // Run SelectPartitions on pairs - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + PartitionSelectionEpsilon: tc.epsilon, + PartitionSelectionDelta: tc.delta, + })) got := SelectPartitions(s, pcol, PartitionSelectionParams{MaxPartitionsContributed: 1}) // Validate that partitions are selected randomly (i.e., some emitted and some dropped). @@ -106,7 +110,11 @@ func TestSelectPartitionsIsNonDeterministicKV(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) // Run SelectPartitions on triples - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + PartitionSelectionEpsilon: tc.epsilon, + PartitionSelectionDelta: tc.delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SelectPartitions(s, pcol, PartitionSelectionParams{MaxPartitionsContributed: 1}) @@ -132,7 +140,11 @@ func TestSelectPartitionsBoundsCrossPartitionContributionsV(t *testing.T) { // ε=50, δ=~1 and l0Sensitivity=1 gives a threshold of 2. epsilon, delta, l0Sensitivity := 50.0, dpagg.LargestRepresentableDelta, 1 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) got := SelectPartitions(s, pcol, PartitionSelectionParams{MaxPartitionsContributed: int64(l0Sensitivity)}) // With a max contribution of 1, only 1 partition should be outputted. testutils.CheckNumPartitions(s, got, 1) @@ -154,7 +166,11 @@ func TestSelectPartitionsBoundsCrossPartitionContributionsKV(t *testing.T) { // ε=50, δ=~1 and l0Sensitivity=1 gives a threshold of 2. epsilon, delta, l0Sensitivity := 50.0, dpagg.LargestRepresentableDelta, 1 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SelectPartitions(s, pcol, PartitionSelectionParams{MaxPartitionsContributed: int64(l0Sensitivity)}) // With a max contribution of 1, only 1 partition should be outputted. @@ -174,11 +190,12 @@ func TestSelectPartitionsPrethresholding(t *testing.T) { // We set very large epsilon & delta, and a pre-threshold of 5, so the partition // with 5 users should be kept and the one with 4 users should not be kept. epsilon, delta, preThreshold, l0Sensitivity := 1e9, dpagg.LargestRepresentableDelta, int64(5), int64(1) - spec, err := NewPrivacySpecTemp(PrivacySpecParams{PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta, PreThreshold: preThreshold}) - if err != nil { - t.Fatalf("Failed to create PrivacySpec: %v", err) - } - pcol := MakePrivate(s, col, spec) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + PreThreshold: preThreshold, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SelectPartitions(s, pcol, PartitionSelectionParams{MaxPartitionsContributed: l0Sensitivity}) diff --git a/privacy-on-beam/pbeam/sum_test.go b/privacy-on-beam/pbeam/sum_test.go index 367acce2..7884ba50 100644 --- a/privacy-on-beam/pbeam/sum_test.go +++ b/privacy-on-beam/pbeam/sum_test.go @@ -55,8 +55,12 @@ func TestSumPerKeyNoNoiseInt(t *testing.T) { // To see the logic and the math behind flakiness and tolerance calculation, // See https://github.com/google/differential-privacy/blob/main/privacy-on-beam/docs/Tolerance_Calculation.pdf. epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 3.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 3, MinValue: 0.0, MaxValue: 1, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -147,8 +151,9 @@ func TestSumPerKeyWithPartitionsNoNoiseInt(t *testing.T) { } // We have ε=50, δ=0, and l1Sensitivity=3*lInfSensitivity, to scale the noise with different MinValues and MaxValues. - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 25.0, 3.0*tc.lInfSensitivity - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 25.0, 3.0*tc.lInfSensitivity + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) + pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: 3, MinValue: tc.minValue, MaxValue: tc.maxValue, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -176,8 +181,12 @@ func TestSumPerKeyNegativeBoundsInt(t *testing.T) { // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 2, MinValue: -3, MaxValue: -2, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -217,8 +226,8 @@ func TestSumPerKeyWithPartitionsNegativeBoundsInt(t *testing.T) { // We have ε=50, δ=0 and l1Sensitivity=6. // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 6.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 24.0, 6.0 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: 2, MinValue: -3, MaxValue: -2, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -249,8 +258,12 @@ func TestSumPerKeyNoNoiseFloat(t *testing.T) { // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 3.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 3, MinValue: 0.0, MaxValue: 1.0, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairIF64ToKV, want) @@ -334,8 +347,8 @@ func TestSumPerKeyWithPartitionsNoNoiseFloat(t *testing.T) { // We have ε=50, δ=0 and l1Sensitivity=3*tc.lInfSensitivity. // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 3.0*tc.lInfSensitivity - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 24.0, 3.0*tc.lInfSensitivity + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: 3, MinValue: tc.minValue, MaxValue: tc.maxValue, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -363,8 +376,12 @@ func TestSumPerKeyNegativeBoundsFloat(t *testing.T) { // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 2, MinValue: -3.0, MaxValue: -2.0, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairIF64ToKV, want) @@ -405,8 +422,8 @@ func TestSumPerKeyWithPartitionsNegativeBoundsFloat(t *testing.T) { // We have ε=50, δ=0 and l1Sensitivity=6. // We have 2 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 6.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 24.0, 6.0 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: 2, MinValue: -3.0, MaxValue: -2.0, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -425,20 +442,25 @@ func TestSumPerKeyAddsNoiseInt(t *testing.T) { name string noiseKind NoiseKind // Differential privacy params used. - epsilon float64 - delta float64 + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 2 * 1e-15, // It is split by 2: 1e-15 for the noise and 1e-15 for the partition selection. - delta: 2 * 1e-5, // It is split by 2: 1e-5 for the noise and 1e-5 for the partition selection. + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1e-15, + aggregationDelta: 1e-5, + partitionSelectionEpsilon: 1e-15, + partitionSelectionDelta: 1e-5, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 2 * 1e-15, // It is split by 2: 1e-15 for the noise and 1e-15 for the partition selection. - delta: 0.01, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1e-15, + partitionSelectionEpsilon: 1e-15, + partitionSelectionDelta: 0.01, }, } { // Because this is an integer aggregation, we can't use the regular complementary @@ -458,16 +480,12 @@ func TestSumPerKeyAddsNoiseInt(t *testing.T) { // about tests taking long. tolerance := 0.0 l0Sensitivity, minValue, maxValue := int64(1), 0.0, 1.0 - partitionSelectionEpsilon, partitionSelectionDelta := tc.epsilon/2, tc.delta - if tc.noiseKind == gaussianNoise { - partitionSelectionDelta = tc.delta / 2 - } // Compute the number of IDs needed to keep the partition. sp, err := dpagg.NewPreAggSelectPartition( &dpagg.PreAggSelectPartitionOptions{ - Epsilon: partitionSelectionEpsilon, - Delta: partitionSelectionDelta, + Epsilon: tc.partitionSelectionEpsilon, + Delta: tc.partitionSelectionDelta, MaxPartitionsContributed: l0Sensitivity, }) if err != nil { @@ -483,7 +501,13 @@ func TestSumPerKeyAddsNoiseInt(t *testing.T) { p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: l0Sensitivity, MinValue: minValue, MaxValue: maxValue, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairII64, got) @@ -566,7 +590,11 @@ func TestSumPerKeyWithPartitionsAddsNoiseInt(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.epsilon, + AggregationDelta: tc.delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: l0Sensitivity, MinValue: minValue, MaxValue: maxValue, NoiseKind: tc.noiseKind, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -586,41 +614,42 @@ func TestSumPerKeyAddsNoiseFloat(t *testing.T) { name string noiseKind NoiseKind // Differential privacy params used. - epsilon float64 - delta float64 + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - epsilon: 2, // It is split by 2: 1 for the noise and 1 for the partition selection. - delta: 0.01, // It is split by 2: 0.005 for the noise and 0.005 for the partition selection. + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.005, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.005, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - epsilon: 0.2, // It is split by 2: 0.1 for the noise and 0.1 for the partition selection. - delta: 0.01, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 0.1, + partitionSelectionEpsilon: 0.1, + partitionSelectionDelta: 0.01, }, } { // We have 1 partition. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²³ probability (k=23). - noiseEpsilon, noiseDelta := tc.epsilon/2, 0.0 k := 23.0 l0Sensitivity, lInfSensitivity := 1.0, 1.0 - partitionSelectionEpsilon, partitionSelectionDelta := tc.epsilon/2, tc.delta l1Sensitivity := l0Sensitivity * lInfSensitivity - tolerance := testutils.ComplementaryLaplaceTolerance(k, l1Sensitivity, noiseEpsilon) + tolerance := testutils.ComplementaryLaplaceTolerance(k, l1Sensitivity, tc.aggregationEpsilon) if tc.noiseKind == gaussianNoise { - noiseDelta = tc.delta / 2 - partitionSelectionDelta = tc.delta / 2 - tolerance = testutils.ComplementaryGaussianTolerance(k, l0Sensitivity, lInfSensitivity, noiseEpsilon, noiseDelta) + tolerance = testutils.ComplementaryGaussianTolerance(k, l0Sensitivity, lInfSensitivity, tc.aggregationEpsilon, tc.aggregationDelta) } // Compute the number of IDs needed to keep the partition. sp, err := dpagg.NewPreAggSelectPartition( &dpagg.PreAggSelectPartitionOptions{ - Epsilon: partitionSelectionEpsilon, - Delta: partitionSelectionDelta, + Epsilon: tc.partitionSelectionEpsilon, + Delta: tc.partitionSelectionDelta, MaxPartitionsContributed: 1, }) if err != nil { @@ -636,7 +665,13 @@ func TestSumPerKeyAddsNoiseFloat(t *testing.T) { p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 1, MinValue: 0.0, MaxValue: 1.0, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairIF64, got) @@ -666,8 +701,12 @@ func TestSumPerKeyCrossPartitionContributionBoundingInt(t *testing.T) { // We have 10 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (24). epsilon, delta, k, l1Sensitivity := 50.0, 0.01, 24.0, 3.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 3, MinValue: 0, MaxValue: 1, NoiseKind: LaplaceNoise{}}) // With a max contribution of 3, 70% of the data should have be @@ -714,8 +753,8 @@ func TestSumPerKeyWithPartitionsCrossPartitionContributionBoundingInt(t *testing // We have ε=50, δ=0.0 and l1Sensitivity=3. // We have 5 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 3.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 24.0, 3.0 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: 3, MinValue: 0, MaxValue: 1, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -750,7 +789,12 @@ func TestSumPerKeyCrossPartitionContributionBoundingFloat(t *testing.T) { // We have 10 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 50.0, 0.01, 24.0, 3.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 3, MinValue: 0.0, MaxValue: 1.0, NoiseKind: LaplaceNoise{}}) // With a max contribution of 3, 70% of the data should have be @@ -798,8 +842,8 @@ func TestSumPerKeyWithPartitionsCrossPartitionContributionBoundingFloat(t *testi // We have ε=50, δ=0.0 and l1Sensitivity=3. // We have 5 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 24.0, 3.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + epsilon, k, l1Sensitivity := 50.0, 24.0, 3.0 + pcol := MakePrivate(s, col, privacySpec(t, PrivacySpecParams{AggregationEpsilon: epsilon})) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) sumParams := SumParams{MaxPartitionsContributed: 3, MinValue: 0.0, MaxValue: 1.0, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} got := SumPerKey(s, pcol, sumParams) @@ -837,8 +881,12 @@ func TestSumPerKeyPerPartitionContributionBoundingInt(t *testing.T) { // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 60.0, 0.01, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MinValue: 2, MaxValue: 3, MaxPartitionsContributed: 2, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairII64ToKV, want) @@ -869,8 +917,12 @@ func TestSumPerKeyPerPartitionContributionBoundingFloat(t *testing.T) { // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). epsilon, delta, k, l1Sensitivity := 60.0, 0.01, 24.0, 6.0 - // ε is split by 2 for noise and for partition selection, so we use 2*ε to get a Laplace noise with ε. - pcol := MakePrivate(s, col, NewPrivacySpec(2*epsilon, delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: delta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MinValue: 2.0, MaxValue: 3.0, MaxPartitionsContributed: 2, NoiseKind: LaplaceNoise{}}) want = beam.ParDo(s, testutils.PairIF64ToKV, want) @@ -881,21 +933,22 @@ func TestSumPerKeyPerPartitionContributionBoundingFloat(t *testing.T) { } var sumPartitionSelectionTestCases = []struct { - name string - noiseKind NoiseKind - epsilon float64 - delta float64 - numPartitions int - entriesPerPartition int + name string + noiseKind NoiseKind + aggregationEpsilon float64 + aggregationDelta float64 + partitionSelectionEpsilon float64 + partitionSelectionDelta float64 + numPartitions int + entriesPerPartition int }{ { - name: "Gaussian", - noiseKind: GaussianNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in a ε=1, - // δ=0.3 partition selection budget. - epsilon: 2, - delta: 0.6, + name: "Gaussian", + noiseKind: GaussianNoise{}, + aggregationEpsilon: 1, + aggregationDelta: 0.3, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -904,14 +957,11 @@ var sumPartitionSelectionTestCases = []struct { numPartitions: 143, }, { - name: "Laplace", - noiseKind: LaplaceNoise{}, - // After splitting the (ε, δ) budget between the noise and partition - // selection portions of the privacy algorithm, this results in the - // partition selection portion of the budget being ε_selectPartition=1, - // δ_selectPartition=0.3. - epsilon: 2, - delta: 0.3, + name: "Laplace", + noiseKind: LaplaceNoise{}, + aggregationEpsilon: 1, + partitionSelectionEpsilon: 1, + partitionSelectionDelta: 0.3, // entriesPerPartition=1 yields a 30% chance of emitting any particular partition // (since δ_emit=0.3). entriesPerPartition: 1, @@ -948,7 +998,13 @@ func TestSumPartitionSelectionInt(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) // Run SumPerKey on triples - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MinValue: 0, MaxValue: 1, NoiseKind: tc.noiseKind, MaxPartitionsContributed: 1}) got = beam.ParDo(s, testutils.KVToPairII64, got) @@ -991,7 +1047,13 @@ func TestSumPartitionSelectionFloat(t *testing.T) { col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) // Run SumPerKey on triples - pcol := MakePrivate(s, col, NewPrivacySpec(tc.epsilon, tc.delta)) + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: tc.aggregationEpsilon, + AggregationDelta: tc.aggregationDelta, + PartitionSelectionEpsilon: tc.partitionSelectionEpsilon, + PartitionSelectionDelta: tc.partitionSelectionDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) got := SumPerKey(s, pcol, SumParams{MinValue: 0.0, MaxValue: 1.0, MaxPartitionsContributed: 1, NoiseKind: tc.noiseKind}) got = beam.ParDo(s, testutils.KVToPairIF64, got) @@ -1066,10 +1128,16 @@ func TestSumPerKeyReturnsNonNegativeFloat64(t *testing.T) { } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - // Using a low epsilon, a high delta, and a high maxValue here to add a - // lot of noise while keeping partitions. - epsilon, delta, maxValue := 0.001, 0.999, 1e8 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + // Using a low epsilon & delta for noise, a high epsilon &delta for partition selection, and a high maxValue + // here to add a lot of noise while keeping partitions. + lowEps, lowDel, highEps, highDelta, maxValue := 0.001, 1e-10, 10.0, 0.999, 1e8 + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: lowEps, + AggregationDelta: lowDel, + PartitionSelectionEpsilon: highEps, + PartitionSelectionDelta: highDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) sums := SumPerKey(s, pcol, SumParams{MinValue: 0, MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}}) values := beam.DropKey(s, sums) @@ -1107,9 +1175,13 @@ func TestSumPerKeyWithPartitionsReturnsNonNegativeFloat64(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - // Using a low epsilon, a high delta, and a high maxValue. - epsilon, delta, maxValue := 0.001, 0.999, 1e8 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + // Using a low epsilon & delta and a high maxValue here to add a lot of noise. + eps, del, maxValue := 0.001, 1e-10, 1e8 + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: eps, + AggregationDelta: del, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) sumParams := SumParams{MinValue: 0, MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}, PublicPartitions: publicPartitions} sums := SumPerKey(s, pcol, sumParams) @@ -1129,10 +1201,16 @@ func TestSumPerKeyReturnsNonNegativeInt64(t *testing.T) { } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - // Using a low epsilon, a high delta, and a high maxValue here to add a - // lot of noise while keeping partitions. - epsilon, delta, maxValue := 0.001, 0.999, 1e8 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + // Using a low epsilon & delta for noise, a high epsilon &delta for partition selection, and a high maxValue + // here to add a lot of noise while keeping partitions. + lowEps, lowDel, highEps, highDelta, maxValue := 0.001, 1e-10, 10.0, 0.999, 1e8 + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: lowEps, + AggregationDelta: lowDel, + PartitionSelectionEpsilon: highEps, + PartitionSelectionDelta: highDelta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sums := SumPerKey(s, pcol, SumParams{MinValue: 0, MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}}) values := beam.DropKey(s, sums) @@ -1170,9 +1248,13 @@ func TestSumPerKeyWithPartitionsReturnsNonNegativeInt64(t *testing.T) { publicPartitions = beam.CreateList(s, publicPartitionsSlice) } - // Using a low epsilon, a high delta, and a high maxValue here. - epsilon, delta, maxValue := 0.001, 0.999, 1e8 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + // Using a low epsilon & delta and a high maxValue here to add a lot of noise. + eps, del, maxValue := 0.001, 1e-10, 1e8 + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: eps, + AggregationDelta: del, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sumParams := SumParams{MinValue: 0, MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}, PublicPartitions: publicPartitions} sums := SumPerKey(s, pcol, sumParams) @@ -1193,10 +1275,15 @@ func TestSumPerKeyNoClampingForNegativeMinValueFloat64(t *testing.T) { } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithFloatValue, col) - // Using `typical` privacy parameters with a high delta to keep - // partitions. - epsilon, delta, minValue, maxValue := 0.1, 0.999, -100.0, 100.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + // Using `typical` privacy parameters with a high delta to keep partitions. + epsilon, lowDelta, highDelta, minValue, maxValue := 0.1, 1e-10, 0.999, -100.0, 100.0 + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + AggregationDelta: lowDelta, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: highDelta, + })) pcol = ParDo(s, testutils.TripleWithFloatValueToKV, pcol) sums := SumPerKey(s, pcol, SumParams{MinValue: minValue, MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}}) values := beam.DropKey(s, sums) @@ -1223,10 +1310,15 @@ func TestSumPerKeyNoClampingForNegativeMinValueInt64(t *testing.T) { } p, s, col := ptest.CreateList(triples) col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - // Using `typical` privacy parameters with a high delta to keep - // partitions. - epsilon, delta, minValue, maxValue := 0.1, 0.999, -100.0, 100.0 - pcol := MakePrivate(s, col, NewPrivacySpec(epsilon, delta)) + // Using `typical` privacy parameters with a high delta to keep partitions. + epsilon, lowDelta, highDelta, minValue, maxValue := 0.1, 1e-10, 0.999, -100.0, 100.0 + pcol := MakePrivate(s, col, privacySpec(t, + PrivacySpecParams{ + AggregationEpsilon: epsilon, + AggregationDelta: lowDelta, + PartitionSelectionEpsilon: epsilon, + PartitionSelectionDelta: highDelta, + })) pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) sums := SumPerKey(s, pcol, SumParams{MinValue: minValue, MaxValue: maxValue, MaxPartitionsContributed: 1, NoiseKind: GaussianNoise{}}) values := beam.DropKey(s, sums) @@ -1622,161 +1714,18 @@ func TestCheckSumPerKeyParams(t *testing.T) { } } -// Checks that SumPerKey returns a correct answer with int values. The logic -// mirrors TestSumPerKeyNoNoiseInt, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestSumPerKeyNoNoiseIntTemp(t *testing.T) { - triples := testutils.ConcatenateTriplesWithIntValue( - testutils.MakeSampleTripleWithIntValue(7, 0), - testutils.MakeSampleTripleWithIntValue(31, 1), - testutils.MakeSampleTripleWithIntValue(99, 2)) - result := []testutils.PairII64{ - // The sum for partition 0 is 7, which is below the threshold of 31: so it should be dropped. - {1, 31}, - {2, 99}, - } - p, s, col, want := ptest.CreateList2(triples, result) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - - // ε=50, δ=10⁻²⁰⁰ and l0Sensitivity=3 gives a threshold of ≈31. - // We have 3 partitions. So, to get an overall flakiness of 10⁻²³, - // we need to have each partition pass with 1-10⁻²⁴ probability (k=24). - epsilon, delta, k, l1Sensitivity := 50.0, 1e-200, 24.0, 3.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ - AggregationEpsilon: epsilon, - AggregationDelta: 0, - PartitionSelectionEpsilon: epsilon, - PartitionSelectionDelta: delta, - }) - if err != nil { - t.Fatalf("TestSumPerKeyNoNoiseIntTemp: %v", err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - got := SumPerKey(s, pcol, SumParams{MaxPartitionsContributed: 3, MinValue: 0.0, MaxValue: 1, NoiseKind: LaplaceNoise{}}) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestSumPerKeyNoNoiseIntTemp: SumPerKey(%v) = %v, expected %v: %v", col, got, want, err) - } -} - -// Checks that SumPerKey with partitions returns a correct answer with int values. The logic -// mirrors TestSumPerKeyWithPartitionsNoNoiseInt, but with the new privacy budget API where -// clients specify aggregation budget and partition selection budget separately. -func TestSumPerKeyWithPartitionsNoNoiseIntTemp(t *testing.T) { - for _, tc := range []struct { - minValue float64 - maxValue float64 - lInfSensitivity float64 - inMemory bool - }{ - { - minValue: 1.0, - maxValue: 3.0, - lInfSensitivity: 3.0, - inMemory: false, - }, - { - minValue: 1.0, - maxValue: 3.0, - lInfSensitivity: 3.0, - inMemory: true, - }, - { - minValue: 0.0, - maxValue: 2.0, - lInfSensitivity: 2.0, - inMemory: false, - }, - { - minValue: 0.0, - maxValue: 2.0, - lInfSensitivity: 2.0, - inMemory: true, - }, - { - minValue: -10.0, - maxValue: 10.0, - lInfSensitivity: 10.0, - inMemory: false, - }, - { - minValue: -10.0, - maxValue: 10.0, - lInfSensitivity: 10.0, - inMemory: true, - }, - } { - // ID:1 contributes to 8 partitions, only 3 of which are public partitions. So none - // should be dropped with maxPartitionsContributed=3. - // Tests that cross-partition contribution bounding happens after non-public partitions are dropped. - triples := testutils.ConcatenateTriplesWithIntValue( - testutils.MakeSampleTripleWithIntValue(7, 0), - testutils.MakeSampleTripleWithIntValue(58, 1), - testutils.MakeSampleTripleWithIntValue(99, 2), - testutils.MakeSampleTripleWithIntValue(1, 5), - testutils.MakeSampleTripleWithIntValue(1, 6), - testutils.MakeSampleTripleWithIntValue(1, 7), - testutils.MakeSampleTripleWithIntValue(1, 8), - testutils.MakeSampleTripleWithIntValue(1, 9)) - - publicPartitionsSlice := []int{0, 2, 5, 10, 11} - // Keep partitions 0, 2 and 5. - // drop partition 6 to 9. - // Add partitions 10 and 11. - result := []testutils.PairII64{ - {0, 7}, - {2, 99}, - {5, 1}, - {10, 0}, - {11, 0}, - } - - p, s, col, want := ptest.CreateList2(triples, result) - col = beam.ParDo(s, testutils.ExtractIDFromTripleWithIntValue, col) - - var publicPartitions any - if tc.inMemory { - publicPartitions = publicPartitionsSlice - } else { - publicPartitions = beam.CreateList(s, publicPartitionsSlice) - } - - // We have ε=50, δ=0, and l1Sensitivity=3*lInfSensitivity, to scale the noise with different MinValues and MaxValues. - epsilon, delta, k, l1Sensitivity := 50.0, 0.0, 25.0, 3.0*tc.lInfSensitivity - spec, err := NewPrivacySpecTemp(PrivacySpecParams{AggregationEpsilon: epsilon, AggregationDelta: delta}) - if err != nil { - t.Fatalf("TestSumPerKeyWithPartitionsNoNoiseIntTemp test case=+%v: %v", tc, err) - } - pcol := MakePrivate(s, col, spec) - pcol = ParDo(s, testutils.TripleWithIntValueToKV, pcol) - sumParams := SumParams{MaxPartitionsContributed: 3, MinValue: tc.minValue, MaxValue: tc.maxValue, NoiseKind: LaplaceNoise{}, PublicPartitions: publicPartitions} - got := SumPerKey(s, pcol, sumParams) - want = beam.ParDo(s, testutils.PairII64ToKV, want) - testutils.ApproxEqualsKVInt64(t, s, got, want, testutils.RoundedLaplaceTolerance(k, l1Sensitivity, epsilon)) - if err := ptest.Run(p); err != nil { - t.Errorf("TestSumPerKeyWithPartitionsNoNoiseIntTemp test case=+%v: SumPerKey(%v) = %v, expected %v: %v", tc, col, got, want, err) - } - } -} - func TestSumPerKeyPreThresholdingInt(t *testing.T) { // Arrange // ε=10⁹, δ≈1 and l0Sensitivity=1 means partitions meeting the preThreshold should be kept. // We have 1 partition. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 1e9, dpagg.LargestRepresentableDelta, 23.0, 1.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ + spec := privacySpec(t, PrivacySpecParams{ AggregationEpsilon: epsilon, - AggregationDelta: 0, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta, PreThreshold: 10, }) - if err != nil { - t.Fatalf("TestSumPerKeyPreThresholdingInt: %v", err) - } triples := testutils.ConcatenateTriplesWithIntValue( testutils.MakeSampleTripleWithIntValue(9, 0), testutils.MakeTripleWithIntValueStartingFromKey(9, 10, 1, 1)) @@ -1807,16 +1756,12 @@ func TestSumPerKeyPreThresholdingFloat(t *testing.T) { // We have 1 partition. So, to get an overall flakiness of 10⁻²³, // we need to have each partition pass with 1-10⁻²³ probability (k=23). epsilon, delta, k, l1Sensitivity := 1e9, dpagg.LargestRepresentableDelta, 23.0, 1.0 - spec, err := NewPrivacySpecTemp(PrivacySpecParams{ + spec := privacySpec(t, PrivacySpecParams{ AggregationEpsilon: epsilon, - AggregationDelta: 0, PartitionSelectionEpsilon: epsilon, PartitionSelectionDelta: delta, PreThreshold: 10, }) - if err != nil { - t.Fatalf("TestSumPerKeyPreThresholdingFloat: %v", err) - } triples := testutils.ConcatenateTriplesWithFloatValue( testutils.MakeSampleTripleWithFloatValue(9, 0), testutils.MakeTripleWithFloatValueStartingFromKey(9, 10, 1, 1))