diff --git a/rules/python/lang/regex_using_user_input.yml b/rules/python/lang/regex_using_user_input.yml new file mode 100644 index 00000000..803145fd --- /dev/null +++ b/rules/python/lang/regex_using_user_input.yml @@ -0,0 +1,90 @@ +imports: + - python_shared_common_user_input + - python_shared_lang_import1 +patterns: + - pattern: $($$<...>) + filters: + - variable: RE + detection: python_shared_lang_import1 + filters: + - variable: MODULE1 + values: + - re + - re2 + - regex + - variable: NAME + values: + - compile + - findall + - finditer + - fullmatch + - match + - search + - split + - splititer + - sub + - subn + - variable: USER_INPUT + detection: python_lang_regex_using_user_input_unsanitized_input + scope: result + - pattern: $($) + filters: + - variable: SRE_PARSE + detection: python_shared_lang_import1 + filters: + - variable: MODULE1 + values: [sre_parse] + - variable: NAME + values: [parse] + - variable: USER_INPUT + detection: python_lang_regex_using_user_input_unsanitized_input + scope: result +auxiliary: + - id: python_lang_regex_using_user_input_unsanitized_input + sanitizer: python_lang_regex_using_user_input_sanitizer + patterns: + - pattern: $ + filters: + - variable: USER_INPUT + detection: python_shared_common_user_input + scope: cursor + - id: python_lang_regex_using_user_input_sanitizer + patterns: + - pattern: $($<...>$$<_>$<...>) + filters: + - variable: RE_ESCAPE + detection: python_shared_lang_import1 + scope: cursor + filters: + - variable: MODULE1 + values: + - re + - re2 + - regex + - variable: NAME + values: [escape] +languages: + - python +severity: medium +metadata: + description: Unsanitized user input in regular expression + remediation_message: |- + ## Description + + Creating regular expressions from user input can lead to a vulnerability known as Regular Expression Denial of Service (ReDoS). This issue arises because some regular expressions can be processed with exponential time complexity. When attackers exploit this, it can significantly drain CPU resources, effectively causing a denial of service. + + ## Remediations + + - **Do not** construct regular expressions directly from user inputs. This can make your application susceptible to ReDoS attacks. + - **Do** consider using the `regex` module for regular expressions in Python. Unlike the `re` module, `regex` allows you to set a timeout for regular expressions to prevent excessive CPU usage. This can help mitigate the impact of potential ReDoS attacks. + ```python + regex.match(pattern, string, timeout=0.1) + ``` + + ## References + + - [OWASP ReDoS attacks explained](https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS) + cwe_id: + - 1287 + id: python_lang_regex_using_user_input + documentation_url: https://docs.bearer.com/reference/rules/python_lang_regex_using_user_input diff --git a/tests/python/lang/regex_using_user_input/test.js b/tests/python/lang/regex_using_user_input/test.js new file mode 100644 index 00000000..5e34c3f1 --- /dev/null +++ b/tests/python/lang/regex_using_user_input/test.js @@ -0,0 +1,20 @@ +const { + createNewInvoker, + getEnvironment, +} = require("../../../helper.js") +const { ruleId, ruleFile, testBase } = getEnvironment(__dirname) + +describe(ruleId, () => { + const invoke = createNewInvoker(ruleId, ruleFile, testBase) + + test("regex_using_user_input", () => { + const testCase = "main.py" + + const results = invoke(testCase) + + expect(results).toEqual({ + Missing: [], + Extra: [] + }) + }) +}) \ No newline at end of file diff --git a/tests/python/lang/regex_using_user_input/testdata/main.py b/tests/python/lang/regex_using_user_input/testdata/main.py new file mode 100644 index 00000000..293aefa5 --- /dev/null +++ b/tests/python/lang/regex_using_user_input/testdata/main.py @@ -0,0 +1,37 @@ +import re +import re2 as some_regex +import regex +from sre_parse import parse as my_sre_parse + +def bad(): + user_regex = input("enter pattern:") + + pattern = fr"^\w{user_regex}$" + # bearer:expected python_lang_regex_using_user_input + re.compile(pattern).match("hello world") + + pattern2 = r"^\w+" + user_regex + r"$" + # bearer:expected python_lang_regex_using_user_input + some_regex.match(pattern2, "hello world") + + pattern3 = r"^\w{0}$".format(user_regex) + # bearer:expected python_lang_regex_using_user_input + regex.compile(pattern3).match("hello world") + + # bearer:expected python_lang_regex_using_user_input + my_sre_parse(pattern3).dump() + +def ok(): + user_regex = input("enter pattern: ") + user_string = input("enter string: ") + + pattern = r"^\w+" + re.escape(user_regex) + r"$" + pattern2 = fr"^\w{re.escape(user_regex)}$" + pattern3 = r"^\w{}$".format(re.escape(user_regex)) + + re.compile(pattern).match("hello world") + regex.match(pattern2, user_string) + + some_regex.compile(pattern3).match("hello world") + + my_sre_parse(pattern3).dump() \ No newline at end of file