From 96fb2db9791f9141e21396620cd04838d1109452 Mon Sep 17 00:00:00 2001 From: elsapet Date: Mon, 3 Jun 2024 15:45:40 +0200 Subject: [PATCH] feat(python): rule for Google Dataflow --- .../python/third_parties/google_dataflow.yml | 31 ++++++++++++++++++ .../third_parties/google_dataflow/test.js | 20 ++++++++++++ .../google_dataflow/testdata/main.py | 32 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 rules/python/third_parties/google_dataflow.yml create mode 100644 tests/python/third_parties/google_dataflow/test.js create mode 100644 tests/python/third_parties/google_dataflow/testdata/main.py diff --git a/rules/python/third_parties/google_dataflow.yml b/rules/python/third_parties/google_dataflow.yml new file mode 100644 index 00000000..c02b6579 --- /dev/null +++ b/rules/python/third_parties/google_dataflow.yml @@ -0,0 +1,31 @@ +imports: + - python_shared_lang_datatype +patterns: + - pattern: beam.Create($<...>$$<...>) + filters: + - variable: DATA_TYPE + detection: python_shared_lang_datatype + scope: result +languages: + - python +severity: medium +skip_data_types: + - Unique Identifier +metadata: + description: Leakage of sensitive data to Google Dataflow + remediation_message: | + ## Description + + Leaking sensitive data to a third-party service is a common cause of data leaks and can lead to data breaches. + + ## Remediations + + - **Do** ensure all sensitive data is removed when sending data to third-party services like Google Dataflow. + + ## References + - [Google Dataflow Docs](https://cloud.google.com/dataflow/docs/overview) + - [Apache Beam Python SDK](https://beam.apache.org/documentation/sdks/python/) + cwe_id: + - 201 + id: python_third_parties_google_dataflow + documentation_url: https://docs.bearer.com/reference/rules/python_third_parties_google_dataflow diff --git a/tests/python/third_parties/google_dataflow/test.js b/tests/python/third_parties/google_dataflow/test.js new file mode 100644 index 00000000..0b670b81 --- /dev/null +++ b/tests/python/third_parties/google_dataflow/test.js @@ -0,0 +1,20 @@ +const { + createNewInvoker, + getEnvironment, +} = require("../../../helper.js") +const { ruleId, ruleFile, testBase } = getEnvironment(__dirname) + +describe(ruleId, () => { + const invoke = createNewInvoker(ruleId, ruleFile, testBase) + + test("google_dataflow", () => { + const testCase = "main.py" + + const results = invoke(testCase) + + expect(results).toEqual({ + Missing: [], + Extra: [] + }) + }) +}) \ No newline at end of file diff --git a/tests/python/third_parties/google_dataflow/testdata/main.py b/tests/python/third_parties/google_dataflow/testdata/main.py new file mode 100644 index 00000000..67fcdee0 --- /dev/null +++ b/tests/python/third_parties/google_dataflow/testdata/main.py @@ -0,0 +1,32 @@ +# Use Apache Beam to create Dataflow pipeline into Google Cloud +import apache_beam as beam +from apache_beam.options.pipeline_options import PipelineOptions + +class bad(): + beam_options = + def run(): + beam_options = PipelineOptions( + runner='DataflowRunner', + project='my-project-id', + job_name='unique-job-name', + temp_location='gs://my-bucket/temp', + ) + with beam.Pipeline(options=beam_options) as pipeline: + # bearer:expected python_third_parties_google_dataflow + pipeline | "Create elements" >> beam.Create([user.firstname, user.lastname]) + | "Print elements" >> beam.Map(print) + # run() is called automatically + +class ok(): + beam_options = + def run(): + beam_options = PipelineOptions( + runner='DataflowRunner', + project='my-project-id', + job_name='unique-job-name', + temp_location='gs://my-bucket/temp', + ) + with beam.Pipeline(options=beam_options) as pipeline: + pipeline | "Create elements" >> beam.Create([user.uuid]) + | "Print elements" >> beam.Map(print) + # run() is called automatically \ No newline at end of file