From c48faae575c43d7a66914677e20789755cd68586 Mon Sep 17 00:00:00 2001 From: elsapet Date: Thu, 30 May 2024 10:40:11 +0200 Subject: [PATCH] feat(python): deserialization with user input (CWE-502) --- .../lang/deserialization_of_user_input.yml | 178 ++++++++++++++++++ .../deserialization_of_user_input/test.js | 20 ++ .../testdata/main.py | 74 ++++++++ 3 files changed, 272 insertions(+) create mode 100644 rules/python/lang/deserialization_of_user_input.yml create mode 100644 tests/python/lang/deserialization_of_user_input/test.js create mode 100644 tests/python/lang/deserialization_of_user_input/testdata/main.py diff --git a/rules/python/lang/deserialization_of_user_input.yml b/rules/python/lang/deserialization_of_user_input.yml new file mode 100644 index 00000000..5be3e883 --- /dev/null +++ b/rules/python/lang/deserialization_of_user_input.yml @@ -0,0 +1,178 @@ +imports: + - python_shared_common_user_input + - python_shared_lang_import1 + - python_shared_lang_import2 +patterns: + - pattern: $($$<...>) + filters: + - variable: PICKLE + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: + - pickle + - _pickle + - cPickle + - variable: NAME + values: + - load + - loads + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - pattern: $($$<...>) + filters: + - variable: SHELVE + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: [shelve] + - variable: NAME + values: + - open + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - pattern: $($$<...>) + filters: + - variable: PY_YAML + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: [yaml] + - variable: NAME + values: + - load + - load_all + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - pattern: $($$<...>) + filters: + - variable: DILL + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: [dill] + - variable: NAME + values: + - load + - loads + - load_module + - load_session + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - pattern: $($$<...>) + filters: + - variable: MARSHAL + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: [marshal] + - variable: NAME + values: + - load + - loads + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - pattern: $($$<...>) + filters: + - variable: JSON_PICKLE + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: [jsonpickle] + - variable: NAME + values: + - decode + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - pattern: $.$() + filters: + - variable: UNPICKLER + detection: python_lang_deserialization_of_user_input_unpickler_with_user_input + scope: cursor + - variable: METHOD + values: + - load + - persistent_load + - pattern: $.load($$<...>) + filters: + - variable: UNSAFE_RUAMEL_YAML + detection: python_lang_deserialization_of_user_input_unsafe_ruamel_yaml + scope: cursor + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result +auxiliary: + - id: python_lang_deserialization_of_user_input_unpickler_with_user_input + patterns: + - pattern: $($$<...>) + filters: + - variable: UNPICKLER + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: + - pickle + - _pickle + - cPickle + - dill + - variable: NAME + values: [Unpickler] + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: result + - id: python_lang_deserialization_of_user_input_unsafe_ruamel_yaml + patterns: + - pattern: $($<...>typ=$$<...>) + filters: + - variable: RUAMEL_YAML + detection: python_shared_lang_import2 + scope: cursor + filters: + - variable: MODULE1 + values: [ruamel] + - variable: MODULE2 + values: [yaml] + - variable: NAME + values: [YAML] + - variable: UNSAFE_TYPE + string_regex: \A(unsafe|base)\z +languages: + - python +severity: critical +metadata: + description: Unsanitized user input in deserialization method + remediation_message: |- + ## Description + + Deserializing data from untrusted sources, like user inputs or request parameters, without proper verification is a security risk. Attackers can embed malicious code or payloads within serialized data. When your application deserializes this data without checks, it becomes vulnerable to attacks. + + ## Remediations + + - **Do not** deserialize data from untrusted sources directly. This can lead to security vulnerabilities. + - **Do** validate and sanitize all data before deserializing it. Ensure that the data is coming from a trusted source and is in the expected format. + - **Do** use data formats that are purely data-oriented and not tied to a specific programming language, such as JSON or XML, for serialization and deserialization. This approach minimizes the risk of executing malicious code during deserialization. + - **Do** use recommended safer alternatives wherever possible. For example, instead of PyYAML's `load` function, use the `safe_load` function which limits results to simple Python objects like integers or lists. + ```python + yaml.safe_load(some_input) # safer alternative to PyYAML load + ``` + + ## References + + - [OWASP Deserialization cheat sheet](https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html) + cwe_id: + - 502 + id: python_lang_deserialization_of_user_input + documentation_url: https://docs.bearer.com/reference/rules/python_lang_deserialization_of_user_input diff --git a/tests/python/lang/deserialization_of_user_input/test.js b/tests/python/lang/deserialization_of_user_input/test.js new file mode 100644 index 00000000..9cea744b --- /dev/null +++ b/tests/python/lang/deserialization_of_user_input/test.js @@ -0,0 +1,20 @@ +const { + createNewInvoker, + getEnvironment, +} = require("../../../helper.js") +const { ruleId, ruleFile, testBase } = getEnvironment(__dirname) + +describe(ruleId, () => { + const invoke = createNewInvoker(ruleId, ruleFile, testBase) + + test("deserialization_of_user_input", () => { + const testCase = "main.py" + + const results = invoke(testCase) + + expect(results).toEqual({ + Missing: [], + Extra: [] + }) + }) +}) \ No newline at end of file diff --git a/tests/python/lang/deserialization_of_user_input/testdata/main.py b/tests/python/lang/deserialization_of_user_input/testdata/main.py new file mode 100644 index 00000000..eb0937fd --- /dev/null +++ b/tests/python/lang/deserialization_of_user_input/testdata/main.py @@ -0,0 +1,74 @@ +import pickle + +unsafe_file = request.FILES["user_file"] +# bearer:expected python_lang_deserialization_of_user_input +pickle.load(unsafe_file) + +unsafe_bytes = input().encode() +# bearer:expected python_lang_deserialization_of_user_input +pickle.loads(unsafe_bytes) + +unpickler = pickle.Unpickler(unsafe_file) +# bearer:expected python_lang_deserialization_of_user_input +unpickler.load() +# bearer:expected python_lang_deserialization_of_user_input +unpickler.persistent_load() + +import _pickle as P +# bearer:expected python_lang_deserialization_of_user_input +P.load(unsafe_file) + +from yaml import load, Loader +# bearer:expected python_lang_deserialization_of_user_input +data = load(unsafe_file, Loader=Loader) + +unsafe_filepath = request.GET.get("ext_filepath") +stream = file(unsafe_filepath) +# bearer:expected python_lang_deserialization_of_user_input +data = load(stream) + +import jsonpickle +unsafe_string = request.GET.get("external_json_tr") +# bearer:expected python_lang_deserialization_of_user_input +danger = jsonpickle.decode(unsafe_string, safe=True) + +import dill +# bearer:expected python_lang_deserialization_of_user_input +dill.load(unsafe_file) + +import shelve +# bearer:expected python_lang_deserialization_of_user_input +shelve.open(unsafe_file, flag="c") + +from ruamel.yaml import YAML +yaml = YAML(typ="base") +# bearer:expected python_lang_deserialization_of_user_input +yaml.load(unsafe_file) + +import marshal +# bearer:expected python_lang_deserialization_of_user_input +res = marshal.load(unsafe_file) +# bearer:expected python_lang_deserialization_of_user_input +res2 = marshal.loads(unsafe_bytes) + +# ok +safe_file = "/some/known/file.txt" +pickle.load(safe_file) +safe_bytes = "hello".encode() +pickle.loads(safe_bytes) + +unpickler = pickle.Unpickler(safe_file) +unpickler.load() +unpickler.persistent_load() + +data = load(safe_file, Loader=Loader) + +jsonpickle.decode("hello") + +yaml = YAML(typ="safe") +yaml.load(unsafe_file) # fine - type is safe + +yaml2 = YAML() +yaml2.load(unsafe_file) # fine - default is rt (round-trip) + +marshal.load(safe_file)