Skip to content

Commit

Permalink
feat(python): deserialization with user input (CWE-502)
Browse files Browse the repository at this point in the history
  • Loading branch information
elsapet committed May 30, 2024
1 parent 867150a commit c48faae
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 0 deletions.
178 changes: 178 additions & 0 deletions rules/python/lang/deserialization_of_user_input.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
imports:
- python_shared_common_user_input
- python_shared_lang_import1
- python_shared_lang_import2
patterns:
- pattern: $<PICKLE>($<USER_INPUT>$<...>)
filters:
- variable: PICKLE
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values:
- pickle
- _pickle
- cPickle
- variable: NAME
values:
- load
- loads
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- pattern: $<SHELVE>($<USER_INPUT>$<...>)
filters:
- variable: SHELVE
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values: [shelve]
- variable: NAME
values:
- open
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- pattern: $<PY_YAML>($<USER_INPUT>$<...>)
filters:
- variable: PY_YAML
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values: [yaml]
- variable: NAME
values:
- load
- load_all
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- pattern: $<DILL>($<USER_INPUT>$<...>)
filters:
- variable: DILL
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values: [dill]
- variable: NAME
values:
- load
- loads
- load_module
- load_session
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- pattern: $<MARSHAL>($<USER_INPUT>$<...>)
filters:
- variable: MARSHAL
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values: [marshal]
- variable: NAME
values:
- load
- loads
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- pattern: $<JSON_PICKLE>($<USER_INPUT>$<...>)
filters:
- variable: JSON_PICKLE
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values: [jsonpickle]
- variable: NAME
values:
- decode
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- pattern: $<UNPICKLER>.$<METHOD>()
filters:
- variable: UNPICKLER
detection: python_lang_deserialization_of_user_input_unpickler_with_user_input
scope: cursor
- variable: METHOD
values:
- load
- persistent_load
- pattern: $<UNSAFE_RUAMEL_YAML>.load($<USER_DATA>$<...>)
filters:
- variable: UNSAFE_RUAMEL_YAML
detection: python_lang_deserialization_of_user_input_unsafe_ruamel_yaml
scope: cursor
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
auxiliary:
- id: python_lang_deserialization_of_user_input_unpickler_with_user_input
patterns:
- pattern: $<UNPICKLER>($<USER_INPUT>$<...>)
filters:
- variable: UNPICKLER
detection: python_shared_lang_import1
scope: cursor
filters:
- variable: MODULE1
values:
- pickle
- _pickle
- cPickle
- dill
- variable: NAME
values: [Unpickler]
- variable: USER_INPUT
detection: python_shared_common_user_input
scope: result
- id: python_lang_deserialization_of_user_input_unsafe_ruamel_yaml
patterns:
- pattern: $<RUAMEL_YAML>($<...>typ=$<UNSAFE_TYPE>$<...>)
filters:
- variable: RUAMEL_YAML
detection: python_shared_lang_import2
scope: cursor
filters:
- variable: MODULE1
values: [ruamel]
- variable: MODULE2
values: [yaml]
- variable: NAME
values: [YAML]
- variable: UNSAFE_TYPE
string_regex: \A(unsafe|base)\z
languages:
- python
severity: critical
metadata:
description: Unsanitized user input in deserialization method
remediation_message: |-
## Description
Deserializing data from untrusted sources, like user inputs or request parameters, without proper verification is a security risk. Attackers can embed malicious code or payloads within serialized data. When your application deserializes this data without checks, it becomes vulnerable to attacks.
## Remediations
- **Do not** deserialize data from untrusted sources directly. This can lead to security vulnerabilities.
- **Do** validate and sanitize all data before deserializing it. Ensure that the data is coming from a trusted source and is in the expected format.
- **Do** use data formats that are purely data-oriented and not tied to a specific programming language, such as JSON or XML, for serialization and deserialization. This approach minimizes the risk of executing malicious code during deserialization.
- **Do** use recommended safer alternatives wherever possible. For example, instead of PyYAML's `load` function, use the `safe_load` function which limits results to simple Python objects like integers or lists.
```python
yaml.safe_load(some_input) # safer alternative to PyYAML load
```
## References
- [OWASP Deserialization cheat sheet](https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html)
cwe_id:
- 502
id: python_lang_deserialization_of_user_input
documentation_url: https://docs.bearer.com/reference/rules/python_lang_deserialization_of_user_input
20 changes: 20 additions & 0 deletions tests/python/lang/deserialization_of_user_input/test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const {
createNewInvoker,
getEnvironment,
} = require("../../../helper.js")
const { ruleId, ruleFile, testBase } = getEnvironment(__dirname)

describe(ruleId, () => {
const invoke = createNewInvoker(ruleId, ruleFile, testBase)

test("deserialization_of_user_input", () => {
const testCase = "main.py"

const results = invoke(testCase)

expect(results).toEqual({
Missing: [],
Extra: []
})
})
})
74 changes: 74 additions & 0 deletions tests/python/lang/deserialization_of_user_input/testdata/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pickle

unsafe_file = request.FILES["user_file"]
# bearer:expected python_lang_deserialization_of_user_input
pickle.load(unsafe_file)

unsafe_bytes = input().encode()
# bearer:expected python_lang_deserialization_of_user_input
pickle.loads(unsafe_bytes)

unpickler = pickle.Unpickler(unsafe_file)
# bearer:expected python_lang_deserialization_of_user_input
unpickler.load()
# bearer:expected python_lang_deserialization_of_user_input
unpickler.persistent_load()

import _pickle as P
# bearer:expected python_lang_deserialization_of_user_input
P.load(unsafe_file)

from yaml import load, Loader
# bearer:expected python_lang_deserialization_of_user_input
data = load(unsafe_file, Loader=Loader)

unsafe_filepath = request.GET.get("ext_filepath")
stream = file(unsafe_filepath)
# bearer:expected python_lang_deserialization_of_user_input
data = load(stream)

import jsonpickle
unsafe_string = request.GET.get("external_json_tr")
# bearer:expected python_lang_deserialization_of_user_input
danger = jsonpickle.decode(unsafe_string, safe=True)

import dill
# bearer:expected python_lang_deserialization_of_user_input
dill.load(unsafe_file)

import shelve
# bearer:expected python_lang_deserialization_of_user_input
shelve.open(unsafe_file, flag="c")

from ruamel.yaml import YAML
yaml = YAML(typ="base")
# bearer:expected python_lang_deserialization_of_user_input
yaml.load(unsafe_file)

import marshal
# bearer:expected python_lang_deserialization_of_user_input
res = marshal.load(unsafe_file)
# bearer:expected python_lang_deserialization_of_user_input
res2 = marshal.loads(unsafe_bytes)

# ok
safe_file = "/some/known/file.txt"
pickle.load(safe_file)
safe_bytes = "hello".encode()
pickle.loads(safe_bytes)

unpickler = pickle.Unpickler(safe_file)
unpickler.load()
unpickler.persistent_load()

data = load(safe_file, Loader=Loader)

jsonpickle.decode("hello")

yaml = YAML(typ="safe")
yaml.load(unsafe_file) # fine - type is safe

yaml2 = YAML()
yaml2.load(unsafe_file) # fine - default is rt (round-trip)

marshal.load(safe_file)

0 comments on commit c48faae

Please sign in to comment.