Skip to content

Commit

Permalink
Add wrapper to get catalog for ZetaSQL, improvements in accounting.
Browse files Browse the repository at this point in the history
ZetaSQL:
- Add wrapper to get catalog for ZetaSQL

DP Accounting:
- Enable test for RepeatAndSelect for RDP accountant which were accidentally disabled by not being part of the test class
- Add checks for invalid sampling rate and noise multiplier in RDP accountant

Change-Id: If3418d1dd2a8474c35e4f9ef999fe5d0e27f52db
GitOrigin-RevId: 3413fff081f0f496ed987fc44e8ab7f6c7cda05f
  • Loading branch information
Differential Privacy Team authored and miracvbasaran committed Aug 20, 2024
1 parent 05cd41f commit 156c8fb
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 71 deletions.
1 change: 1 addition & 0 deletions examples/zetasql/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ cc_binary(
"@com_google_cc_differential_privacy//base:status",
"@com_google_protobuf//:protobuf",
"@com_google_zetasql//zetasql/public:analyzer_options",
"@com_google_zetasql//zetasql/public:builtin_function_options",
"@com_google_zetasql//zetasql/public:catalog",
"@com_google_zetasql//zetasql/public:language_options",
"@com_google_zetasql//zetasql/public:options_cc_proto",
Expand Down
19 changes: 11 additions & 8 deletions examples/zetasql/execute_query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include "google/protobuf/descriptor.h"
#include "zetasql/public/analyzer_options.h"
#include "zetasql/public/builtin_function_options.h"
#include "zetasql/public/catalog.h"
#include "zetasql/public/language_options.h"
#include "zetasql/public/options.pb.h"
Expand Down Expand Up @@ -177,15 +178,19 @@ static std::string GetCSVFileNameFromPath(const std::string_view file_path) {
return std::string(file_name);
}

static absl::Status InitializeExecuteQueryConfig(
// Wrapper to get catalog for a config.
static zetasql::SimpleCatalog& GetCatalogForConfig(
zetasql::ExecuteQueryConfig& config) {
return config.mutable_catalog();
}

static absl::Status InitExecuteQueryConfig(
zetasql::ExecuteQueryConfig& config) {
config.set_examine_resolved_ast_callback(
[](const zetasql::ResolvedNode* node) -> absl::Status {
auto visitor = VerifyAnonymizationParametersVisitor();
return node->Accept(&visitor);
});
config.mutable_catalog().SetDescriptorPool(
google::protobuf::DescriptorPool::generated_pool());

RETURN_IF_ERROR(SetToolModeFromFlags(config));

Expand All @@ -194,18 +199,16 @@ static absl::Status InitializeExecuteQueryConfig(

ASSIGN_OR_RETURN(std::unique_ptr<zetasql::SimpleTable> table,
zetasql::MakeTableFromCsvFile(table_name, file_path));

const std::string userid_col = absl::GetFlag(FLAGS_userid_col);
RETURN_IF_ERROR(table->SetAnonymizationInfo({userid_col}));
config.mutable_analyzer_options().set_enabled_rewrites(
{zetasql::REWRITE_ANONYMIZATION});

config.mutable_catalog().AddOwnedTable(std::move(table));
GetCatalogForConfig(config).AddOwnedTable(std::move(table));

config.mutable_analyzer_options()
.mutable_language()
->EnableMaximumLanguageFeaturesForDevelopment();
config.mutable_catalog().AddZetaSQLFunctions(
GetCatalogForConfig(config).AddZetaSQLFunctions(
config.analyzer_options().language());
return absl::OkStatus();
}
Expand All @@ -221,7 +224,7 @@ int main(int argc, char* argv[]) {
const std::string sql = absl::StrJoin(remaining_args.begin() + 1,
remaining_args.end(), " ");
zetasql::ExecuteQueryConfig config;
absl::Status status = InitializeExecuteQueryConfig(config);
absl::Status status = InitExecuteQueryConfig(config);
if (!status.ok()) {
std::cout << "ERROR: " << status << std::endl;
return 1;
Expand Down
12 changes: 11 additions & 1 deletion python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,17 @@ def _compute_rdp_poisson_subsampled_gaussian(
Returns:
The RDPs at all orders. Can be `np.inf`.
Raises:
ValueError: If q is not in [0, 1] or noise_multiplier is negative.
"""
if not 0 <= q <= 1:
raise ValueError(f'Sampling rate must be in [0, 1]. Found {q}.')

if noise_multiplier < 0:
raise ValueError(
f'Noise multiplier must be non-negative: {noise_multiplier}'
)

def compute_one_order(q, alpha):
if q == 0:
Expand Down Expand Up @@ -524,7 +534,7 @@ def _effective_gaussian_noise_multiplier(
sigma = _effective_gaussian_noise_multiplier(event.event)
if not isinstance(sigma, float):
return sigma
return (event.count * sigma**-2)**-0.5
return sigma * event.count**-0.5
else:
return event

Expand Down
106 changes: 44 additions & 62 deletions python/dp_accounting/dp_accounting/rdp/rdp_privacy_accountant_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,69 +686,51 @@ def test_repeat_and_select_pure_poisson(self, eps, mean):
self.assertAlmostEqual(accountant._orders[0], alpha)
self.assertAlmostEqual(accountant._rdp[0] / ans, 1, places=3)

@parameterized.named_parameters(('q_neg', -0.1), ('q_large', 1.1))
def test_raises_on_invalid_sampling_rate(self, q):
with self.assertRaisesRegex(ValueError, 'Sampling rate'):
rdp_privacy_accountant.RdpAccountant().compose(
dp_event.PoissonSampledDpEvent(q, dp_event.GaussianDpEvent(1))
)

def test_raises_on_invalid_noise_multiplier(self):
with self.assertRaisesRegex(ValueError, 'Noise multiplier'):
rdp_privacy_accountant.RdpAccountant().compose(
dp_event.GaussianDpEvent(-1)
)

@parameterized.named_parameters(
('small_small', 0.001, 1),
('small_med', 0.001, 1000),
('small_large', 0.001, 1e9),
('med_small', 1, 1),
('med_med', 1, 1000),
('med_large', 1, 1e9),
('large_small', 1000, 1),
('large_med', 1000, 1000),
('large_large', 1000, 1e9)
)
def test_repeat_and_select_gaussian_poisson(self, sigma, mean):
event = dp_event.GaussianDpEvent(sigma)
event = dp_event.RepeatAndSelectDpEvent(event, mean, np.inf)
accountant = rdp_privacy_accountant.RdpAccountant()
accountant.compose(event)
orders = accountant._orders
rdp = []
for order in orders:
if order <= 1: # Avoid division by zero.
rdp.append(np.inf)
continue
eps = math.log1p(1/(order-1))
x = (eps * sigma - 0.5/sigma)/math.sqrt(2)
y = (eps * sigma + 0.5/sigma)/math.sqrt(2)
delta = math.erfc(x)/2-math.exp(eps)*math.erfc(y)/2
rdp.append(order*0.5/(sigma**2) + mean*delta + math.log(mean)/(order - 1))
for i in range(len(orders)):
lb = min(rdp[j] for j in range(len(orders)) if orders[j] >= orders[i])
self.assertLessEqual(lb, accountant._rdp[i])


@parameterized.named_parameters(
('all_0', 1, 1, 1, 0), # Compose before and after.
('all_1', 2, 3, 4, 1),
('all_2', 0.1, 0.2, 0.3, 2),
('all_inf', 1.1, 1.2, 2.1, np.inf),
('pre_0', 1, 2, 0, 0), # Compose before, but not after.
('pre_1', 1, 0.5, 0, 1),
('pre_2', 2, 1, 0, 2),
('pre_inf', 10, 0.1, 0, np.inf),
('post_0', 1, 0, 2, 0), # Compose after, but not before.
('post_1', 10, 0, 2, 1),
('post_half', 0.1, 0, 12, 0.5),
('post_inf', 6, 0, 0.2, np.inf)
)
def test_repeat_and_select_composition(self, sigma, sigma1, sigma2, shape):
pre_event = dp_event.GaussianDpEvent(sigma1)
post_event = dp_event.GaussianDpEvent(sigma2)
event = dp_event.GaussianDpEvent(sigma)
event = dp_event.RepeatAndSelectDpEvent(event, 1, shape)
accountant = rdp_privacy_accountant.RdpAccountant()
rho = 0.5 / (sigma**2)
if sigma1 > 0:
rho += 0.5 / (sigma1**2)
accountant.compose(pre_event)
accountant.compose(event)
if sigma2 > 0:
rho += 0.5 / (sigma2**2)
accountant.compose(post_event)
for i in range(len(accountant._orders)):
self.assertAlmostEqual(accountant._rdp[i], accountant._orders[i] * rho)
@parameterized.named_parameters(
('small_small', 0.001, 1),
('small_med', 0.001, 1000),
('small_large', 0.001, 1e9),
('med_small', 1, 1),
('med_med', 1, 1000),
('med_large', 1, 1e9),
('large_small', 1000, 1),
('large_med', 1000, 1000),
('large_large', 1000, 1e9),
)
def test_repeat_and_select_gaussian_poisson(self, sigma, mean):
event = dp_event.GaussianDpEvent(sigma)
event = dp_event.RepeatAndSelectDpEvent(event, mean, np.inf)
accountant = rdp_privacy_accountant.RdpAccountant()
accountant.compose(event)
orders = accountant._orders
rdp = []
for order in orders:
if order <= 1: # Avoid division by zero.
rdp.append(np.inf)
continue
eps = math.log1p(1 / (order - 1))
x = (eps * sigma - 0.5 / sigma) / math.sqrt(2)
y = (eps * sigma + 0.5 / sigma) / math.sqrt(2)
delta = math.erfc(x) / 2 - math.exp(eps) * math.erfc(y) / 2
rdp.append(
order * 0.5 / (sigma**2) + mean * delta + math.log(mean) / (order - 1)
)
for order, accountant_rdp in zip(orders, accountant._rdp):
lb = min(rdp[j] for j in range(len(orders)) if orders[j] >= order)
self.assertLessEqual(lb, accountant_rdp)


if __name__ == '__main__':
Expand Down

0 comments on commit 156c8fb

Please sign in to comment.