From 571b47badb3e33633ae7d2ab709920b03c8058ca Mon Sep 17 00:00:00 2001 From: elsapet Date: Mon, 3 Jun 2024 09:56:01 +0200 Subject: [PATCH] feat(python): add Google BigQuery third party rule (CWE-201) --- rules/python/third_parties/bigquery.yml | 63 +++++++++++++++++++ tests/python/third_parties/bigquery/test.js | 20 ++++++ .../third_parties/bigquery/testdata/main.py | 20 ++++++ 3 files changed, 103 insertions(+) create mode 100644 rules/python/third_parties/bigquery.yml create mode 100644 tests/python/third_parties/bigquery/test.js create mode 100644 tests/python/third_parties/bigquery/testdata/main.py diff --git a/rules/python/third_parties/bigquery.yml b/rules/python/third_parties/bigquery.yml new file mode 100644 index 00000000..0d669ad7 --- /dev/null +++ b/rules/python/third_parties/bigquery.yml @@ -0,0 +1,63 @@ +imports: + - python_shared_lang_datatype + - python_shared_lang_instance + - python_shared_lang_import3 +patterns: + - pattern: | + $.$($<...>$$<...>) + filters: + - variable: CLIENT + detection: python_third_parties_bigquery_client + scope: cursor + - variable: METHOD + values: + - insert_rows + - insert_rows_json + - variable: DATA_TYPE + detection: python_shared_lang_datatype + scope: result +auxiliary: + - id: python_third_parties_bigquery_client + patterns: + - pattern: $ + filters: + - variable: CLIENT + detection: python_shared_lang_instance + scope: cursor + filters: + - variable: CLASS + detection: python_shared_lang_import3 + scope: cursor + filters: + - variable: MODULE1 + values: [google] + - variable: MODULE2 + values: [cloud] + - variable: MODULE3 + values: [bigquery] + - variable: NAME + values: [Client] +languages: + - python +severity: medium +skip_data_types: + - Unique Identifier +metadata: + description: Leakage of sensitive data to BigQuery + remediation_message: | + ## Description + + Leaking sensitive data to third-party data tools like BigQuery is a common cause of data leaks and can lead to data breaches. + + ## Remediations + + - **Do** ensure all sensitive data is removed when sending data to third-party services like BigQuery. + + ## References + - [Python Client for Google BigQuery](https://github.com/googleapis/python-bigquery) + - [BigQuery docs](https://cloud.google.com/python/docs/reference/bigquery/latest) + cwe_id: + - 201 + associated_recipe: Google Cloud BigQuery + id: python_third_parties_bigquery + documentation_url: https://docs.bearer.com/reference/rules/python_third_parties_bigquery diff --git a/tests/python/third_parties/bigquery/test.js b/tests/python/third_parties/bigquery/test.js new file mode 100644 index 00000000..84168897 --- /dev/null +++ b/tests/python/third_parties/bigquery/test.js @@ -0,0 +1,20 @@ +const { + createNewInvoker, + getEnvironment, +} = require("../../../helper.js") +const { ruleId, ruleFile, testBase } = getEnvironment(__dirname) + +describe(ruleId, () => { + const invoke = createNewInvoker(ruleId, ruleFile, testBase) + + test("bigquery", () => { + const testCase = "main.py" + + const results = invoke(testCase) + + expect(results).toEqual({ + Missing: [], + Extra: [] + }) + }) +}) \ No newline at end of file diff --git a/tests/python/third_parties/bigquery/testdata/main.py b/tests/python/third_parties/bigquery/testdata/main.py new file mode 100644 index 00000000..eaa14926 --- /dev/null +++ b/tests/python/third_parties/bigquery/testdata/main.py @@ -0,0 +1,20 @@ +from google.cloud import bigquery + +def insert_user(): + client = bigquery.Client() + dataset_ref = client.dataset("my_dataset") + table_ref = dataset_ref.table("my_table") + + schema = [ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("username", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("email", "INTEGER", mode="REQUIRED"), + ] + + rows = [ + { "id": user.id, "username": user.username, "email": user.email} + ] + + # bearer:expected python_third_parties_bigquery + errors = client.insert_rows(table_ref, rows, selected_fields=schema) + print("Insert errors: ", errors)