feat(python): add cwe-79 xss rules (#406)

Bearer · May 22, 2024 · 0a31e5c · 0a31e5c
1 parent 38cf721
commit 0a31e5c
Show file tree

Hide file tree

Showing 21 changed files with 634 additions and 3 deletions.
diff --git a/.github/workflows/canary_integration_tests.yml b/.github/workflows/canary_integration_tests.yml
@@ -36,6 +36,7 @@ jobs:
             "php/lang",
             "php/symfony",
             "php/third_parties",
+            "python/django",
             "python/lang",
             "go/gorilla",
             "go/gosec",

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
@@ -42,6 +42,7 @@ jobs:
             "php/lang",
             "php/symfony",
             "php/third_parties",
+            "python/django",
             "python/lang",
             "go/lang",
             "go/gosec",

diff --git a/rules/python/django/response_using_user_input.yml b/rules/python/django/response_using_user_input.yml
@@ -0,0 +1,73 @@
+imports:
+  - python_shared_common_html_user_input
+  - python_shared_django_http_response
+  - python_shared_lang_import2
+patterns:
+  - pattern: $<CLASS>($<USER_INPUT>$<...>)
+    filters:
+      - variable: CLASS
+        detection: python_shared_lang_import2
+        scope: cursor
+        filters:
+          - variable: MODULE1
+            values: [django]
+          - variable: MODULE2
+            values: [http]
+          - variable: NAME
+            values: [HttpResponse]
+      - variable: USER_INPUT
+        detection: python_shared_common_html_user_input
+        scope: result
+  - pattern: $<RESPONSE>.content = $<USER_INPUT>
+    filters:
+      - variable: RESPONSE
+        detection: python_shared_django_http_response
+        scope: cursor
+      - variable: USER_INPUT
+        detection: python_shared_common_html_user_input
+        scope: result
+  - pattern: $<RESPONSE>.$<METHOD>($<USER_INPUT>)
+    filters:
+      - variable: RESPONSE
+        detection: python_shared_django_http_response
+        scope: cursor
+      - variable: METHOD
+        values:
+          - write
+          - writelines
+      - variable: USER_INPUT
+        detection: python_shared_common_html_user_input
+        scope: result
+languages:
+  - python
+metadata:
+  description: "Unsanitized user input in output stream (XSS)"
+  remediation_message: |-
+    ## Description
+
+    Cross-site scripting (XSS) vulnerabilities occur when unsanitized user input is included in web page content. This flaw can lead to malicious scripts being executed in the context of the user's browser, compromising the security of user data and interactions with the application.
+
+    ## Remediations
+
+    - **Do** use templating engines that automatically encode data based on its context.
+    - **Do** use an encoder to handle user input before incorporating it into the output stream. This step helps minimize the risk of XSS attacks by converting potentially dangerous characters into a safe format.
+      ```python
+      user_input = request.GET["user"]
+      encoded_user_input = django.utils.html.escape(user_input)
+      response = HttpResponse(encoded_user_input)
+      ```
+    - **Do** sanitize user input to remove or neutralize unwanted scripts. Sanitization goes beyond encoding by actively removing harmful content from user input before it is used in the output.
+      ```python
+      user_input = request.GET["user"]
+      sanitized_user_input = django.utils.html.strip_tags(user_input)
+      response = HttpResponse(sanitized_user_input)
+      ```
+
+    ## References
+
+    - [OWASP XSS Prevention Cheatsheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html)
+  cwe_id:
+    - 79
+  id: "python_django_response_using_user_input"
+  documentation_url: https://docs.bearer.com/reference/rules/python_django_response_using_user_input
+severity: high
diff --git a/rules/python/django/template_injection.yml b/rules/python/django/template_injection.yml
@@ -0,0 +1,50 @@
+imports:
+  - python_shared_common_user_input
+  - python_shared_lang_import2
+patterns:
+  - pattern: $<CLASS>($<USER_INPUT>)
+    filters:
+      - variable: CLASS
+        detection: python_shared_lang_import2
+        scope: cursor
+        filters:
+          - variable: MODULE1
+            values: [django]
+          - variable: MODULE2
+            values: [template]
+          - variable: NAME
+            values: [Template]
+      - variable: USER_INPUT
+        detection: python_shared_common_user_input
+        scope: result
+languages:
+  - python
+metadata:
+  description: "Unsanitized user input in web page generation (XSS)"
+  remediation_message: |-
+    ## Description
+
+    Cross-Site Scripting (XSS) is a vulnerability that allows attackers to run malicious scripts in the context of a trusted web application. This can happen when an application includes untrusted data without proper validation or escaping. There are several contexts where XSS can occur, each requiring specific encoding strategies to mitigate the risk.
+
+    ## Remediations
+
+    - **Do** encode user input based on the context it is used in, such as HTML content, HTML attributes, JavaScript, and CSS contexts. This helps prevent malicious scripts from being executed.
+      ```python
+      django.utils.html.escape(user_input)
+      ```
+    - **Do** use templating engines like Django that automatically encode data based on its context.
+    - **Do** sanitize data using libraries or functions specifically designed for this purpose, especially when inserting content into a web page.
+    - **Do** separate data from code by avoiding inline scripting and event handlers. Use separate JavaScript files for event handling to minimize script injection risks.
+    - **Do not** mix server-side and client-side templating systems, as server-side systems may not escape output safely for client-side use.
+    - **Do not** encode user input before storing it in a database. Any encoding should be applied when the data is output, not before storage, to ensure that it is encoded appropriately for its context.
+
+    ## References
+
+    - [OWASP XSS Prevention Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html)
+    - [Django templates Documentation](https://docs.djangoproject.com/en/5.0/ref/templates/)
+
+  cwe_id:
+    - 79
+  id: python_django_template_injection
+  documentation_url: https://docs.bearer.com/reference/rules/python_django_template_injection
+severity: high
diff --git a/rules/python/lang/http_response_splitting.yml b/rules/python/lang/http_response_splitting.yml
@@ -0,0 +1,65 @@
+imports:
+  - python_shared_common_user_input
+  - python_shared_lang_instance
+  - python_shared_lang_import2
+patterns:
+  - pattern: $<HANDLER>.send_header($<...>$<USER_INPUT>$<...>)
+    filters:
+      - variable: HANDLER
+        detection: python_shared_lang_instance
+        scope: cursor
+        filters:
+          - variable: CLASS
+            detection: python_shared_lang_import2
+            scope: cursor
+            filters:
+              - variable: MODULE1
+                values: [http]
+              - variable: MODULE2
+                values: [server]
+              - variable: NAME
+                values: [BaseHTTPRequestHandler]
+      - variable: USER_INPUT
+        detection: python_lang_http_response_splitting_user_input
+        scope: result
+auxiliary:
+  - id: python_lang_http_response_splitting_user_input
+    sanitizer: python_lang_http_response_splitting_sanitizer
+    patterns:
+      - pattern: $<UNSANITIZED_USER_INPUT>
+        filters:
+          - variable: UNSANITIZED_USER_INPUT
+            detection: python_shared_common_user_input
+            scope: cursor
+  - id: python_lang_http_response_splitting_sanitizer
+    patterns:
+      - pattern: $<_>.replace($<SOURCE>, $<REPLACEMENT>)
+        filters:
+          - variable: SOURCE
+            string_regex: "\\r\\n|\\\\r\\\\n"
+          - not:
+              variable: REPLACEMENT
+              string_regex: "\\r\\n|\\\\r\\\\n"
+languages:
+  - python
+severity: high
+metadata:
+  description: "Unsanitized user input in HTTP response (XSS)"
+  remediation_message: |-
+    ## Description
+
+    Including unsanitized user input in a HTTP response could allow an attacker inject Carriage Return Line Feed (CRLF) characters into the response. An entirely attacker-controlled response can then be returned, creating a cross-site scripting (XSS) vulnerability.
+
+    ## Remediations
+
+    - **Do not** include user input in cookies or other HTTP headers without proper sanitization. This can prevent attackers from exploiting the input to manipulate the response.
+    - **Do** remove CRLF sequences from user input to mitigate the risk of response splitting and XSS attacks. Use the following code snippet as a reference for sanitizing input in Java:
+      ```python
+      input = request.getParameter("data");
+      var sanitized = input.replaceAll("\r\n", "");
+      cookie.setValue(sanitized);
+      ```
+  cwe_id:
+    - 79
+  id: python_lang_http_response_splitting
+  documentation_url: https://docs.bearer.com/reference/rules/python_lang_http_response_splitting
diff --git a/rules/python/lang/manual_html_sanitization.yml b/rules/python/lang/manual_html_sanitization.yml
@@ -0,0 +1,43 @@
+patterns:
+  - pattern: |
+      $<_>.$<METHOD>($<_>, $<REPLACEMENT>)
+    filters:
+      - variable: METHOD
+        values:
+          - replace
+      - variable: REPLACEMENT
+        string_regex: "&(lt|gt|apos|quot|amp);"
+languages:
+  - python
+severity: high
+metadata:
+  description: "Usage of manual HTML sanitization (XSS)"
+  remediation_message: |-
+    ## Description
+
+    Manually sanitizing HTML is prone to mistakes and can lead to Cross-Site Scripting (XSS) vulnerabilities. This occurs when user input is not properly sanitized, allowing attackers to inject malicious scripts into web pages viewed by other users.
+
+    ## Remediations
+
+    - **Do not** manually escape HTML to sanitize user input. This method is unreliable and can easily miss certain exploits.
+      ```python
+      sanitized_value = user_input.replace('<', '&lt;').replace('>', '&gt;'); # unsafe
+      html = f"<strong>{sanitized_value}</strong>"
+      ```
+    - **Do** use a trusted HTML sanitization library to handle user input safely. Libraries designed for sanitization are more reliable as they cover a wide range of XSS attack vectors.
+      ```python
+      from html_sanitizer import Sanitizer
+
+      sanitizer = Sanitizer()
+      sanitized_value = sanitizer.sanitize(user_input)
+
+      html = f"<strong>{sanitized_value}</strong>"
+      ```
+
+    ## References
+
+    - [OWASP XSS explained](https://owasp.org/www-community/attacks/xss/)
+  cwe_id:
+    - 79
+  id: python_lang_manual_html_sanitization
+  documentation_url: https://docs.bearer.com/reference/rules/python_lang_manual_html_sanitization
diff --git a/rules/python/lang/raw_html_using_user_input.yml b/rules/python/lang/raw_html_using_user_input.yml
@@ -0,0 +1,43 @@
+imports:
+  - python_shared_common_html_user_input
+patterns:
+  - pattern: $<STRING>
+    filters:
+      - variable: STRING
+        string_regex: <\w+(\s[^>]*)?>
+      - variable: STRING
+        detection: python_shared_common_html_user_input
+        scope: result
+languages:
+  - python
+severity: high
+metadata:
+  description: "Unsanitized user input in raw HTML strings (XSS)"
+  remediation_message: |-
+    ## Description
+
+    Including unsanitized user input in HTML exposes your application to cross-site scripting (XSS) attacks. This vulnerability allows attackers to inject malicious scripts into web pages viewed by other users.
+
+    ## Remediations
+
+    - **Do not** include user input directly in HTML strings. This practice can lead to XSS vulnerabilities.
+      ```python
+      html = f"<h1>{user_input}</h1>" # unsafe
+      ```
+    - **Do** use a framework or templating language that automatically handles the encoding and sanitization of user input when constructing HTML. This approach minimizes the risk of XSS attacks.
+    - **Do** sanitize user input if you must use HTML strings directly. Utilize libraries designed for input sanitization to ensure that user input does not contain malicious content.
+      ```python
+      from html_sanitizer import Sanitizer
+
+      sanitizer = Sanitizer()
+      sanitized_value = sanitizer.sanitize(user_input)
+      html = f"<h1>{sanitized_value}</h1>"
+      ```
+
+    ## References
+
+    - [OWASP Cross-Site Scripting (XSS) Cheatsheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html)
+  cwe_id:
+    - 79
+  id: python_lang_raw_html_using_user_input
+  documentation_url: https://docs.bearer.com/reference/rules/python_lang_raw_html_using_user_input
diff --git a/rules/python/shared/common/html_user_input.yaml b/rules/python/shared/common/html_user_input.yaml
@@ -0,0 +1,40 @@
+type: shared
+languages:
+  - python
+imports:
+  - python_shared_lang_import1
+  - python_shared_lang_instance
+  - python_shared_common_user_input
+  - python_shared_django_html_sanitizer
+sanitizer: python_shared_common_html_user_input_sanitizer
+patterns:
+  - pattern: $<USER_INPUT>
+    filters:
+      - variable: USER_INPUT
+        detection: python_shared_common_user_input
+        scope: cursor
+auxiliary:
+  - id: python_shared_common_html_user_input_sanitizer
+    patterns:
+      - pattern: $<DJANGO_SANITIZER>
+        filters:
+          - variable: DJANGO_SANITIZER
+            detection: python_shared_django_html_sanitizer
+            scope: cursor_strict
+      - pattern: $<SANITIZER>.sanitize($<_>)
+        filters:
+          - variable: SANITIZER
+            detection: python_shared_lang_instance
+            scope: cursor
+            filters:
+              - variable: CLASS
+                detection: python_shared_lang_import1
+                scope: cursor
+                filters:
+                  - variable: MODULE1
+                    values: [html_sanitizer]
+                  - variable: NAME
+                    values: [Sanitizer]
+metadata:
+  description: "Python HTML user input."
+  id: python_shared_common_html_user_input
diff --git a/rules/python/shared/django/html_sanitizer.yml b/rules/python/shared/django/html_sanitizer.yml
@@ -0,0 +1,25 @@
+type: shared
+languages:
+  - python
+imports:
+  - python_shared_lang_import3
+patterns:
+  - pattern: $<FUNCTION>($<_>)
+    filters:
+      - variable: FUNCTION
+        detection: python_shared_lang_import3
+        scope: cursor
+        filters:
+          - variable: MODULE1
+            values: [django]
+          - variable: MODULE2
+            values: [utils]
+          - variable: MODULE3
+            values: [http]
+          - variable: NAME
+            values:
+              - escape
+              - strip_tags
+metadata:
+  description: "Django HTML sanitizer."
+  id: python_shared_django_html_sanitizer