diff --git a/.envrc.example b/.envrc.example index 68e4c35d6..faa359982 100644 --- a/.envrc.example +++ b/.envrc.example @@ -7,9 +7,11 @@ export GOOGLE_MAX_ATTEMPT="5" export BEARER_EXECUTABLE_PATH="./bearer" export GITHUB_WORKSPACE="/path/to/bearer/project" export SCAN_DIR=/Users/username/OWASP +export BEARER_DISABLE_VERSION_CHECK=true export BEARER_DISABLE_DEFAULT_RULES=true export BEARER_EXTERNAL_RULE_DIR=$PWD/../bearer-rules/rules export BEARER_FORCE=true +export BEARER_IGNORE_GIT=true export BEARER_PHP_ENABLED=true export BEARER_GOLANG_ENABLED=true -export BEARER_IGNORE_GIT=true +export BEARER_PYTHON_ENABLED=true diff --git a/internal/commands/process/settings/rules.go b/internal/commands/process/settings/rules.go index a827f5190..bd266f2cc 100644 --- a/internal/commands/process/settings/rules.go +++ b/internal/commands/process/settings/rules.go @@ -33,8 +33,10 @@ var ( func GetSupportedRuleLanguages() map[string]bool { phpSupported := os.Getenv("BEARER_PHP_ENABLED") == "true" goSupported := os.Getenv("BEARER_GOLANG_ENABLED") == "true" + pythonSupported := os.Getenv("BEARER_PYTHON_ENABLED") == "true" return map[string]bool{ + "python": pythonSupported, "php": phpSupported, "go": goSupported, "java": true, diff --git a/internal/commands/process/settings/settings.go b/internal/commands/process/settings/settings.go index 3d3d24ae2..74c2eec91 100644 --- a/internal/commands/process/settings/settings.go +++ b/internal/commands/process/settings/settings.go @@ -298,6 +298,8 @@ func (rule *Rule) Language() string { return "Go" case "php": return "PHP" + case "python": + return "Python" default: return rule.Languages[0] } diff --git a/internal/languages/python/.snapshots/TestScope--scope.yml b/internal/languages/python/.snapshots/TestScope--scope.yml new file mode 100644 index 000000000..8174cba47 --- /dev/null +++ b/internal/languages/python/.snapshots/TestScope--scope.yml @@ -0,0 +1,350 @@ +high: + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 1 + full_filename: scope.py + filename: scope.py + source: + location: + start: 1 + end: 1 + column: + start: 1 + end: 37 + sink: + location: + start: 1 + end: 1 + column: + start: 1 + end: 37 + content: scopeCursor(request.GET.get('oops')) + parent_line_number: 1 + snippet: scopeCursor(request.GET.get('oops')) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_0 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_0 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 3 + full_filename: scope.py + filename: scope.py + source: + location: + start: 3 + end: 3 + column: + start: 1 + end: 49 + sink: + location: + start: 3 + end: 3 + column: + start: 1 + end: 49 + content: scopeCursor(request.GET.get('oops') if x else y) + parent_line_number: 3 + snippet: scopeCursor(request.GET.get('oops') if x else y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_1 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_1 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 5 + full_filename: scope.py + filename: scope.py + source: + location: + start: 5 + end: 5 + column: + start: 1 + end: 42 + sink: + location: + start: 5 + end: 5 + column: + start: 1 + end: 42 + content: scopeCursor(request.GET.get('oops') or y) + parent_line_number: 5 + snippet: scopeCursor(request.GET.get('oops') or y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_2 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_2 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 7 + full_filename: scope.py + filename: scope.py + source: + location: + start: 7 + end: 7 + column: + start: 1 + end: 37 + sink: + location: + start: 7 + end: 7 + column: + start: 1 + end: 37 + content: scopeNested(request.GET.get('oops')) + parent_line_number: 7 + snippet: scopeNested(request.GET.get('oops')) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_3 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_3 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 8 + full_filename: scope.py + filename: scope.py + source: + location: + start: 8 + end: 8 + column: + start: 1 + end: 41 + sink: + location: + start: 8 + end: 8 + column: + start: 1 + end: 41 + content: scopeNested(x + request.GET.get('oops')) + parent_line_number: 8 + snippet: scopeNested(x + request.GET.get('oops')) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_4 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_4 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 9 + full_filename: scope.py + filename: scope.py + source: + location: + start: 9 + end: 9 + column: + start: 1 + end: 49 + sink: + location: + start: 9 + end: 9 + column: + start: 1 + end: 49 + content: scopeNested(request.GET.get('oops') if x else y) + parent_line_number: 9 + snippet: scopeNested(request.GET.get('oops') if x else y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_5 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_5 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 10 + full_filename: scope.py + filename: scope.py + source: + location: + start: 10 + end: 10 + column: + start: 1 + end: 49 + sink: + location: + start: 10 + end: 10 + column: + start: 1 + end: 49 + content: scopeNested(x if request.GET.get('oops') else y) + parent_line_number: 10 + snippet: scopeNested(x if request.GET.get('oops') else y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_6 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_6 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 11 + full_filename: scope.py + filename: scope.py + source: + location: + start: 11 + end: 11 + column: + start: 1 + end: 42 + sink: + location: + start: 11 + end: 11 + column: + start: 1 + end: 42 + content: scopeNested(request.GET.get('oops') or y) + parent_line_number: 11 + snippet: scopeNested(request.GET.get('oops') or y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_7 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_7 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 13 + full_filename: scope.py + filename: scope.py + source: + location: + start: 13 + end: 13 + column: + start: 1 + end: 37 + sink: + location: + start: 13 + end: 13 + column: + start: 1 + end: 37 + content: scopeResult(request.GET.get('oops')) + parent_line_number: 13 + snippet: scopeResult(request.GET.get('oops')) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_8 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_8 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 14 + full_filename: scope.py + filename: scope.py + source: + location: + start: 14 + end: 14 + column: + start: 1 + end: 41 + sink: + location: + start: 14 + end: 14 + column: + start: 1 + end: 41 + content: scopeResult(x + request.GET.get('oops')) + parent_line_number: 14 + snippet: scopeResult(x + request.GET.get('oops')) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_9 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_9 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 15 + full_filename: scope.py + filename: scope.py + source: + location: + start: 15 + end: 15 + column: + start: 1 + end: 49 + sink: + location: + start: 15 + end: 15 + column: + start: 1 + end: 49 + content: scopeResult(request.GET.get('oops') if x else y) + parent_line_number: 15 + snippet: scopeResult(request.GET.get('oops') if x else y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_10 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_10 + - rule: + cwe_ids: + - "42" + id: scope_test + title: Test detection filter scopes + description: Test detection filter scopes + documentation_url: "" + line_number: 17 + full_filename: scope.py + filename: scope.py + source: + location: + start: 17 + end: 17 + column: + start: 1 + end: 42 + sink: + location: + start: 17 + end: 17 + column: + start: 1 + end: 42 + content: scopeResult(request.GET.get('oops') or y) + parent_line_number: 17 + snippet: scopeResult(request.GET.get('oops') or y) + fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_11 + old_fingerprint: bf75ffc19e7352a46ad95a1ad74cedb9_11 + diff --git a/internal/languages/python/.snapshots/flow/TestFlow--different-line.yml b/internal/languages/python/.snapshots/flow/TestFlow--different-line.yml new file mode 100644 index 000000000..4b058e325 --- /dev/null +++ b/internal/languages/python/.snapshots/flow/TestFlow--different-line.yml @@ -0,0 +1,36 @@ +high: + - rule: + cwe_ids: [] + id: rule_logger_test + title: "" + description: "" + documentation_url: "" + line_number: 2 + full_filename: different-line.py + filename: different-line.py + data_type: + category_uuid: 14124881-6b92-4fc5-8005-ea7c1c09592e + name: Fullname + category_groups: + - PII + - Personal Data + source: + location: + start: 2 + end: 2 + column: + start: 8 + end: 17 + sink: + location: + start: 3 + end: 3 + column: + start: 1 + end: 20 + content: logging.error(name) + parent_line_number: 3 + snippet: logging.error(name) + fingerprint: c94602447d6771c00b72425485a6cf6c_0 + old_fingerprint: c94602447d6771c00b72425485a6cf6c_0 + diff --git a/internal/languages/python/.snapshots/flow/TestFlow--same-line.yml b/internal/languages/python/.snapshots/flow/TestFlow--same-line.yml new file mode 100644 index 000000000..33657183f --- /dev/null +++ b/internal/languages/python/.snapshots/flow/TestFlow--same-line.yml @@ -0,0 +1,70 @@ +high: + - rule: + cwe_ids: [] + id: rule_logger_test + title: "" + description: "" + documentation_url: "" + line_number: 1 + full_filename: same-line.py + filename: same-line.py + data_type: + category_uuid: 14124881-6b92-4fc5-8005-ea7c1c09592e + name: Fullname + category_groups: + - PII + - Personal Data + source: + location: + start: 1 + end: 1 + column: + start: 15 + end: 24 + sink: + location: + start: 1 + end: 1 + column: + start: 1 + end: 25 + content: logging.error(user.name) + parent_line_number: 1 + snippet: logging.error(user.name) + fingerprint: 4b26059938bf9c55dcda8d08bcf6a4bd_0 + old_fingerprint: 4b26059938bf9c55dcda8d08bcf6a4bd_0 + - rule: + cwe_ids: [] + id: rule_logger_test + title: "" + description: "" + documentation_url: "" + line_number: 2 + full_filename: same-line.py + filename: same-line.py + data_type: + category_uuid: 14124881-6b92-4fc5-8005-ea7c1c09592e + name: Fullname + category_groups: + - PII + - Personal Data + source: + location: + start: 2 + end: 2 + column: + start: 15 + end: 24 + sink: + location: + start: 2 + end: 2 + column: + start: 1 + end: 27 + content: logging.error(user.name()) + parent_line_number: 2 + snippet: logging.error(user.name()) + fingerprint: 4b26059938bf9c55dcda8d08bcf6a4bd_1 + old_fingerprint: 4b26059938bf9c55dcda8d08bcf6a4bd_1 + diff --git a/internal/languages/python/analyzer/analyzer.go b/internal/languages/python/analyzer/analyzer.go new file mode 100644 index 000000000..6037e21ad --- /dev/null +++ b/internal/languages/python/analyzer/analyzer.go @@ -0,0 +1,181 @@ +package analyzer + +import ( + sitter "github.com/smacker/go-tree-sitter" + + "github.com/bearer/bearer/internal/scanner/ast/tree" + "github.com/bearer/bearer/internal/scanner/language" +) + +type analyzer struct { + builder *tree.Builder + scope *language.Scope +} + +func New(builder *tree.Builder) language.Analyzer { + return &analyzer{ + builder: builder, + scope: language.NewScope(nil), + } +} + +func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) error { + switch node.Type() { + case "class_definition", "block", "function_definition": + return analyzer.withScope(language.NewScope(analyzer.scope), func() error { + return visitChildren() + }) + case "augmented_assignment": + return analyzer.analyzeAugmentedAssignment(node, visitChildren) + case "assignment": + return analyzer.analyzeAssignment(node, visitChildren) + case "attribute": + return analyzer.analyzeAttribute(node, visitChildren) + case "subscript": + return analyzer.analyzeSubscript(node, visitChildren) + case "call": + return analyzer.analyzeCall(node, visitChildren) + case "argument_list": + return analyzer.analyzeGenericOperation(node, visitChildren) + case "expression_statement": + return analyzer.analyzeGenericOperation(node, visitChildren) + case "while_statement", "try_statement", "if_statement": // statements don't have results + return visitChildren() + case "conditional_expression": + return analyzer.analyzeConditional(node, visitChildren) + case "boolean_operator": + return analyzer.analyzeBoolean(node, visitChildren) + case "identifier": + return visitChildren() + default: + analyzer.builder.Dataflow(node, analyzer.builder.ChildrenFor(node)...) + return visitChildren() + } +} + +// foo += a +func (analyzer *analyzer) analyzeAugmentedAssignment(node *sitter.Node, visitChildren func() error) error { + left := node.ChildByFieldName("left") + right := node.ChildByFieldName("right") + analyzer.builder.Dataflow(node, left, right) + analyzer.lookupVariable(left) + analyzer.lookupVariable(right) + + err := visitChildren() + + if left.Type() == "identifier" { + analyzer.scope.Assign(analyzer.builder.ContentFor(left), node) + } + + return err +} + +// foo = a +func (analyzer *analyzer) analyzeAssignment(node *sitter.Node, visitChildren func() error) error { + left := node.ChildByFieldName("left") + right := node.ChildByFieldName("right") + analyzer.builder.Alias(node, right) + analyzer.lookupVariable(right) + + err := visitChildren() + + if left.Type() == "identifier" { + analyzer.scope.Assign(analyzer.builder.ContentFor(left), node) + } + + return err +} + +// foo.bar(a, b) +func (analyzer *analyzer) analyzeCall(node *sitter.Node, visitChildren func() error) error { + if receiver := node.ChildByFieldName("function"); receiver != nil { + analyzer.lookupVariable(receiver) + + analyzer.builder.Dataflow(node, receiver) + } + + if argumentsNode := node.ChildByFieldName("arguments"); argumentsNode != nil { + analyzer.builder.Dataflow(node, argumentsNode) + } + + return visitChildren() +} + +// foo.bar +func (analyzer *analyzer) analyzeAttribute(node *sitter.Node, visitChildren func() error) error { + if receiver := node.ChildByFieldName("object"); receiver != nil { + analyzer.lookupVariable(receiver) + analyzer.builder.Dataflow(node, receiver) + } + + return visitChildren() +} + +// foo["bar"] +func (analyzer *analyzer) analyzeSubscript(node *sitter.Node, visitChildren func() error) error { + objectNode := node.ChildByFieldName("value") + analyzer.builder.Dataflow(node, objectNode) + analyzer.lookupVariable(objectNode) + + return visitChildren() +} + +// x if foo else y +func (analyzer *analyzer) analyzeConditional(node *sitter.Node, visitChildren func() error) error { + condition := node.NamedChild(1) + consequence := node.NamedChild(0) + alternative := node.NamedChild(2) + + analyzer.lookupVariable(condition) + analyzer.lookupVariable(consequence) + analyzer.lookupVariable(alternative) + + analyzer.builder.Alias(node, consequence, alternative) + + return visitChildren() +} + +// a or b +func (analyzer *analyzer) analyzeBoolean(node *sitter.Node, visitChildren func() error) error { + left := node.ChildByFieldName("left") + right := node.ChildByFieldName("right") + + analyzer.lookupVariable(left) + analyzer.lookupVariable(right) + + analyzer.builder.Alias(node, left, right) + + return visitChildren() +} + +// default analysis, where the children are assumed to be data sources +func (analyzer *analyzer) analyzeGenericOperation(node *sitter.Node, visitChildren func() error) error { + children := analyzer.builder.ChildrenFor(node) + analyzer.builder.Dataflow(node, children...) + + for _, child := range children { + analyzer.lookupVariable(child) + } + + return visitChildren() +} + +func (analyzer *analyzer) withScope(newScope *language.Scope, body func() error) error { + oldScope := analyzer.scope + + analyzer.scope = newScope + err := body() + analyzer.scope = oldScope + + return err +} + +func (analyzer *analyzer) lookupVariable(node *sitter.Node) { + if node == nil || node.Type() != "identifier" { + return + } + + if pointsToNode := analyzer.scope.Lookup(analyzer.builder.ContentFor(node)); pointsToNode != nil { + analyzer.builder.Alias(node, pointsToNode) + } +} diff --git a/internal/languages/python/detectors/.snapshots/TestPythonObjects-object_class b/internal/languages/python/detectors/.snapshots/TestPythonObjects-object_class new file mode 100644 index 000000000..88c9ae923 --- /dev/null +++ b/internal/languages/python/detectors/.snapshots/TestPythonObjects-object_class @@ -0,0 +1,563 @@ +type: module +id: 0 +range: 1:1 - 8:33 +dataflow_sources: + - 1 +children: + - type: class_definition + id: 1 + range: 1:1 - 8:33 + queries: + - 2 + children: + - type: '"class"' + id: 2 + range: 1:1 - 1:6 + - type: identifier + id: 3 + range: 1:7 - 1:11 + content: User + - type: '":"' + id: 4 + range: 1:11 - 1:12 + - type: block + id: 5 + range: 2:5 - 8:33 + children: + - type: function_definition + id: 6 + range: 2:5 - 4:27 + children: + - type: '"def"' + id: 7 + range: 2:5 - 2:8 + - type: identifier + id: 8 + range: 2:9 - 2:17 + content: __init__ + - type: parameters + id: 9 + range: 2:17 - 2:39 + dataflow_sources: + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + - 21 + children: + - type: '"("' + id: 10 + range: 2:17 - 2:18 + - type: identifier + id: 11 + range: 2:18 - 2:22 + content: self + - type: '","' + id: 12 + range: 2:22 - 2:23 + - type: identifier + id: 13 + range: 2:24 - 2:28 + content: name + - type: '","' + id: 14 + range: 2:28 - 2:29 + - type: default_parameter + id: 15 + range: 2:30 - 2:38 + dataflow_sources: + - 16 + - 17 + - 18 + children: + - type: identifier + id: 16 + range: 2:30 - 2:35 + content: email + - type: '"="' + id: 17 + range: 2:35 - 2:36 + - type: string + id: 18 + range: 2:36 - 2:38 + dataflow_sources: + - 19 + - 20 + children: + - type: '"""' + id: 19 + range: 2:36 - 2:37 + - type: '"""' + id: 20 + range: 2:37 - 2:38 + - type: '")"' + id: 21 + range: 2:38 - 2:39 + - type: '":"' + id: 22 + range: 2:39 - 2:40 + - type: block + id: 23 + range: 3:9 - 4:27 + children: + - type: expression_statement + id: 24 + range: 3:9 - 3:25 + dataflow_sources: + - 25 + children: + - type: assignment + id: 25 + range: 3:9 - 3:25 + alias_of: + - 31 + children: + - type: attribute + id: 26 + range: 3:9 - 3:18 + dataflow_sources: + - 27 + queries: + - 3 + children: + - type: identifier + id: 27 + range: 3:9 - 3:13 + content: self + - type: '"."' + id: 28 + range: 3:13 - 3:14 + - type: identifier + id: 29 + range: 3:14 - 3:18 + content: name + - type: '"="' + id: 30 + range: 3:19 - 3:20 + - type: identifier + id: 31 + range: 3:21 - 3:25 + content: name + - type: expression_statement + id: 32 + range: 4:9 - 4:27 + dataflow_sources: + - 33 + children: + - type: assignment + id: 33 + range: 4:9 - 4:27 + alias_of: + - 39 + children: + - type: attribute + id: 34 + range: 4:9 - 4:19 + dataflow_sources: + - 35 + queries: + - 3 + children: + - type: identifier + id: 35 + range: 4:9 - 4:13 + content: self + - type: '"."' + id: 36 + range: 4:13 - 4:14 + - type: identifier + id: 37 + range: 4:14 - 4:19 + content: email + - type: '"="' + id: 38 + range: 4:20 - 4:21 + - type: identifier + id: 39 + range: 4:22 - 4:27 + content: email + - type: function_definition + id: 40 + range: 6:5 - 8:33 + children: + - type: '"def"' + id: 41 + range: 6:5 - 6:8 + - type: identifier + id: 42 + range: 6:9 - 6:23 + content: lowercase_name + - type: parameters + id: 43 + range: 6:23 - 6:29 + dataflow_sources: + - 44 + - 45 + - 46 + children: + - type: '"("' + id: 44 + range: 6:23 - 6:24 + - type: identifier + id: 45 + range: 6:24 - 6:28 + content: self + - type: '")"' + id: 46 + range: 6:28 - 6:29 + - type: '":"' + id: 47 + range: 6:29 - 6:30 + - type: block + id: 48 + range: 7:9 - 8:33 + children: + - type: expression_statement + id: 49 + range: 7:9 - 7:33 + dataflow_sources: + - 50 + children: + - type: call + id: 50 + range: 7:9 - 7:33 + dataflow_sources: + - 51 + - 55 + children: + - type: attribute + id: 51 + range: 7:9 - 7:22 + dataflow_sources: + - 52 + queries: + - 3 + children: + - type: identifier + id: 52 + range: 7:9 - 7:16 + content: logging + - type: '"."' + id: 53 + range: 7:16 - 7:17 + - type: identifier + id: 54 + range: 7:17 - 7:22 + content: error + - type: argument_list + id: 55 + range: 7:22 - 7:33 + dataflow_sources: + - 56 + - 57 + - 61 + children: + - type: '"("' + id: 56 + range: 7:22 - 7:23 + - type: attribute + id: 57 + range: 7:23 - 7:32 + dataflow_sources: + - 58 + queries: + - 3 + children: + - type: identifier + id: 58 + range: 7:23 - 7:27 + content: self + - type: '"."' + id: 59 + range: 7:27 - 7:28 + - type: identifier + id: 60 + range: 7:28 - 7:32 + content: name + - type: '")"' + id: 61 + range: 7:32 - 7:33 + - type: expression_statement + id: 62 + range: 8:9 - 8:33 + dataflow_sources: + - 63 + children: + - type: call + id: 63 + range: 8:9 - 8:33 + dataflow_sources: + - 64 + - 65 + children: + - type: identifier + id: 64 + range: 8:9 - 8:14 + content: print + - type: argument_list + id: 65 + range: 8:14 - 8:33 + dataflow_sources: + - 66 + - 67 + - 78 + children: + - type: '"("' + id: 66 + range: 8:14 - 8:15 + - type: call + id: 67 + range: 8:15 - 8:32 + dataflow_sources: + - 68 + - 75 + children: + - type: attribute + id: 68 + range: 8:15 - 8:30 + dataflow_sources: + - 69 + queries: + - 3 + children: + - type: attribute + id: 69 + range: 8:15 - 8:24 + dataflow_sources: + - 70 + queries: + - 3 + children: + - type: identifier + id: 70 + range: 8:15 - 8:19 + content: self + - type: '"."' + id: 71 + range: 8:19 - 8:20 + - type: identifier + id: 72 + range: 8:20 - 8:24 + content: name + - type: '"."' + id: 73 + range: 8:24 - 8:25 + - type: identifier + id: 74 + range: 8:25 - 8:30 + content: lower + - type: argument_list + id: 75 + range: 8:30 - 8:32 + dataflow_sources: + - 76 + - 77 + children: + - type: '"("' + id: 76 + range: 8:30 - 8:31 + - type: '")"' + id: 77 + range: 8:31 - 8:32 + - type: '")"' + id: 78 + range: 8:32 - 8:33 + +- node: 1 + content: |- + class User: + def __init__(self, name, email=""): + self.name = name + self.email = email + + def lowercase_name(self): + logging.error(self.name) + print(self.name.lower()) + data: + properties: + - name: User + node: null + object: + ruleid: object + matchnode: + id: 1 + typeid: 1 + contentstart: + byte: 0 + line: 1 + column: 1 + contentend: + byte: 200 + line: 8 + column: 33 + executingdetectors: [] + data: + properties: + - name: name + node: + id: 13 + typeid: 3 + contentstart: + byte: 35 + line: 2 + column: 24 + contentend: + byte: 39 + line: 2 + column: 28 + executingdetectors: [] + object: null + - name: email + node: + id: 16 + typeid: 3 + contentstart: + byte: 41 + line: 2 + column: 30 + contentend: + byte: 46 + line: 2 + column: 35 + executingdetectors: [] + object: null + isvirtual: false + isvirtual: false +- node: 26 + content: self.name + data: + properties: + - name: self + node: null + object: + ruleid: object + matchnode: + id: 26 + typeid: 18 + contentstart: + byte: 60 + line: 3 + column: 9 + contentend: + byte: 69 + line: 3 + column: 18 + executingdetectors: [] + data: + properties: + - name: name + node: null + object: null + isvirtual: true + isvirtual: true +- node: 34 + content: self.email + data: + properties: + - name: self + node: null + object: + ruleid: object + matchnode: + id: 34 + typeid: 18 + contentstart: + byte: 85 + line: 4 + column: 9 + contentend: + byte: 95 + line: 4 + column: 19 + executingdetectors: [] + data: + properties: + - name: email + node: null + object: null + isvirtual: true + isvirtual: true +- node: 51 + content: logging.error + data: + properties: + - name: logging + node: null + object: + ruleid: object + matchnode: + id: 51 + typeid: 18 + contentstart: + byte: 143 + line: 7 + column: 9 + contentend: + byte: 156 + line: 7 + column: 22 + executingdetectors: [] + data: + properties: + - name: error + node: null + object: null + isvirtual: true + isvirtual: true +- node: 57 + content: self.name + data: + properties: + - name: self + node: null + object: + ruleid: object + matchnode: + id: 57 + typeid: 18 + contentstart: + byte: 157 + line: 7 + column: 23 + contentend: + byte: 166 + line: 7 + column: 32 + executingdetectors: [] + data: + properties: + - name: name + node: null + object: null + isvirtual: true + isvirtual: true +- node: 69 + content: self.name + data: + properties: + - name: self + node: null + object: + ruleid: object + matchnode: + id: 69 + typeid: 18 + contentstart: + byte: 182 + line: 8 + column: 15 + contentend: + byte: 191 + line: 8 + column: 24 + executingdetectors: [] + data: + properties: + - name: name + node: null + object: null + isvirtual: true + isvirtual: true + diff --git a/internal/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class b/internal/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class new file mode 100644 index 000000000..b353d8e5b --- /dev/null +++ b/internal/languages/python/detectors/.snapshots/TestPythonObjects-object_no_class @@ -0,0 +1,80 @@ +type: module +id: 0 +range: 1:1 - 1:12 +dataflow_sources: + - 1 +children: + - type: expression_statement + id: 1 + range: 1:1 - 1:12 + dataflow_sources: + - 2 + children: + - type: call + id: 2 + range: 1:1 - 1:12 + dataflow_sources: + - 3 + - 7 + children: + - type: attribute + id: 3 + range: 1:1 - 1:10 + dataflow_sources: + - 4 + queries: + - 3 + children: + - type: identifier + id: 4 + range: 1:1 - 1:5 + content: user + - type: '"."' + id: 5 + range: 1:5 - 1:6 + - type: identifier + id: 6 + range: 1:6 - 1:10 + content: name + - type: argument_list + id: 7 + range: 1:10 - 1:12 + dataflow_sources: + - 8 + - 9 + children: + - type: '"("' + id: 8 + range: 1:10 - 1:11 + - type: '")"' + id: 9 + range: 1:11 - 1:12 + +- node: 3 + content: user.name + data: + properties: + - name: user + node: null + object: + ruleid: object + matchnode: + id: 3 + typeid: 3 + contentstart: + byte: 0 + line: 1 + column: 1 + contentend: + byte: 9 + line: 1 + column: 10 + executingdetectors: [] + data: + properties: + - name: name + node: null + object: null + isvirtual: true + isvirtual: true + diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string b/internal/languages/python/detectors/.snapshots/TestPythonString-string new file mode 100644 index 000000000..87b70b09a --- /dev/null +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string @@ -0,0 +1,413 @@ +type: module +id: 0 +range: 1:1 - 12:31 +dataflow_sources: + - 1 +children: + - type: class_definition + id: 1 + range: 1:1 - 12:31 + queries: + - 2 + children: + - type: '"class"' + id: 2 + range: 1:1 - 1:6 + - type: identifier + id: 3 + range: 1:7 - 1:12 + content: Greet + - type: '":"' + id: 4 + range: 1:12 - 1:13 + - type: block + id: 5 + range: 2:5 - 12:31 + children: + - type: expression_statement + id: 6 + range: 2:5 - 2:29 + dataflow_sources: + - 7 + children: + - type: assignment + id: 7 + range: 2:5 - 2:29 + alias_of: + - 10 + queries: + - 1 + children: + - type: identifier + id: 8 + range: 2:5 - 2:13 + content: Greeting + - type: '"="' + id: 9 + range: 2:14 - 2:15 + - type: string + id: 10 + range: 2:16 - 2:29 + dataflow_sources: + - 11 + - 12 + children: + - type: '"""' + id: 11 + range: 2:16 - 2:17 + - type: '"""' + id: 12 + range: 2:28 - 2:29 + - type: function_definition + id: 13 + range: 4:5 - 12:31 + children: + - type: '"def"' + id: 14 + range: 4:5 - 4:8 + - type: identifier + id: 15 + range: 4:9 - 4:13 + content: main + - type: parameters + id: 16 + range: 4:13 - 4:19 + dataflow_sources: + - 17 + - 18 + - 19 + children: + - type: '"("' + id: 17 + range: 4:13 - 4:14 + - type: identifier + id: 18 + range: 4:14 - 4:18 + content: args + - type: '")"' + id: 19 + range: 4:18 - 4:19 + - type: '":"' + id: 20 + range: 4:19 - 4:20 + - type: block + id: 21 + range: 5:9 - 12:31 + children: + - type: expression_statement + id: 22 + range: 5:9 - 5:33 + dataflow_sources: + - 23 + children: + - type: assignment + id: 23 + range: 5:9 - 5:33 + alias_of: + - 26 + queries: + - 1 + children: + - type: identifier + id: 24 + range: 5:9 - 5:10 + content: s + - type: '"="' + id: 25 + range: 5:11 - 5:12 + - type: binary_operator + id: 26 + range: 5:13 - 5:33 + dataflow_sources: + - 27 + - 31 + - 32 + children: + - type: attribute + id: 27 + range: 5:13 - 5:27 + dataflow_sources: + - 28 + queries: + - 3 + children: + - type: identifier + id: 28 + range: 5:13 - 5:18 + content: Greet + - type: '"."' + id: 29 + range: 5:18 - 5:19 + - type: identifier + id: 30 + range: 5:19 - 5:27 + content: Greeting + - type: '"+"' + id: 31 + range: 5:28 - 5:29 + - type: string + id: 32 + range: 5:30 - 5:33 + dataflow_sources: + - 33 + - 34 + children: + - type: '"""' + id: 33 + range: 5:30 - 5:31 + - type: '"""' + id: 34 + range: 5:32 - 5:33 + - type: expression_statement + id: 35 + range: 6:9 - 6:18 + dataflow_sources: + - 36 + children: + - type: augmented_assignment + id: 36 + range: 6:9 - 6:18 + dataflow_sources: + - 37 + - 39 + children: + - type: identifier + id: 37 + range: 6:9 - 6:10 + content: s + alias_of: + - 23 + - type: '"+="' + id: 38 + range: 6:11 - 6:13 + - type: string + id: 39 + range: 6:14 - 6:18 + dataflow_sources: + - 40 + - 41 + children: + - type: '"""' + id: 40 + range: 6:14 - 6:15 + - type: '"""' + id: 41 + range: 6:17 - 6:18 + - type: expression_statement + id: 42 + range: 8:9 - 8:20 + dataflow_sources: + - 43 + children: + - type: assignment + id: 43 + range: 8:9 - 8:20 + alias_of: + - 46 + queries: + - 1 + children: + - type: identifier + id: 44 + range: 8:9 - 8:11 + content: s2 + - type: '"="' + id: 45 + range: 8:12 - 8:13 + - type: string + id: 46 + range: 8:14 - 8:20 + dataflow_sources: + - 47 + - 48 + children: + - type: '"""' + id: 47 + range: 8:14 - 8:15 + - type: '"""' + id: 48 + range: 8:19 - 8:20 + - type: expression_statement + id: 49 + range: 9:9 - 9:22 + dataflow_sources: + - 50 + children: + - type: augmented_assignment + id: 50 + range: 9:9 - 9:22 + dataflow_sources: + - 51 + - 53 + children: + - type: identifier + id: 51 + range: 9:9 - 9:11 + content: s2 + alias_of: + - 43 + - type: '"+="' + id: 52 + range: 9:12 - 9:14 + - type: subscript + id: 53 + range: 9:15 - 9:22 + dataflow_sources: + - 54 + queries: + - 4 + children: + - type: identifier + id: 54 + range: 9:15 - 9:19 + content: args + - type: '"["' + id: 55 + range: 9:19 - 9:20 + - type: integer + id: 56 + range: 9:20 - 9:21 + content: "0" + - type: '"]"' + id: 57 + range: 9:21 - 9:22 + - type: expression_statement + id: 58 + range: 10:9 - 10:23 + dataflow_sources: + - 59 + children: + - type: augmented_assignment + id: 59 + range: 10:9 - 10:23 + dataflow_sources: + - 60 + - 62 + children: + - type: identifier + id: 60 + range: 10:9 - 10:11 + content: s2 + alias_of: + - 50 + - type: '"+="' + id: 61 + range: 10:12 - 10:14 + - type: string + id: 62 + range: 10:15 - 10:23 + dataflow_sources: + - 63 + - 64 + children: + - type: '"""' + id: 63 + range: 10:15 - 10:16 + - type: '"""' + id: 64 + range: 10:22 - 10:23 + - type: expression_statement + id: 65 + range: 12:9 - 12:31 + dataflow_sources: + - 66 + children: + - type: assignment + id: 66 + range: 12:9 - 12:31 + alias_of: + - 69 + queries: + - 1 + children: + - type: identifier + id: 67 + range: 12:9 - 12:11 + content: s3 + - type: '"="' + id: 68 + range: 12:12 - 12:13 + - type: string + id: 69 + range: 12:14 - 12:31 + dataflow_sources: + - 70 + - 71 + - 75 + children: + - type: '"""' + id: 70 + range: 12:14 - 12:16 + - type: interpolation + id: 71 + range: 12:21 - 12:25 + dataflow_sources: + - 72 + - 73 + - 74 + children: + - type: '"{"' + id: 72 + range: 12:21 - 12:22 + - type: identifier + id: 73 + range: 12:22 - 12:24 + content: s2 + - type: '"}"' + id: 74 + range: 12:24 - 12:25 + - type: '"""' + id: 75 + range: 12:30 - 12:31 + +- node: 10 + content: '"Hello World"' + data: + value: Hello World + isliteral: true +- node: 36 + content: s += "!!" + data: + value: �!!! + isliteral: false +- node: 50 + content: s2 += args[0] + data: + value: hey � + isliteral: false +- node: 59 + content: s2 += " there" + data: + value: hey � there + isliteral: false +- node: 26 + content: Greet.Greeting + "!" + data: + value: �! + isliteral: false +- node: 39 + content: '"!!"' + data: + value: '!!' + isliteral: true +- node: 46 + content: '"hey "' + data: + value: 'hey ' + isliteral: true +- node: 62 + content: '" there"' + data: + value: ' there' + isliteral: true +- node: 69 + content: f"foo '{s2}' bar" + data: + value: foo '�' bar + isliteral: false +- node: 32 + content: '"!"' + data: + value: '!' + isliteral: true + diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal new file mode 100644 index 000000000..0da162673 --- /dev/null +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal @@ -0,0 +1,231 @@ +type: module +id: 0 +range: 1:1 - 7:7 +dataflow_sources: + - 1 + - 5 + - 9 + - 17 + - 21 + - 25 + - 33 +children: + - type: expression_statement + id: 1 + range: 1:1 - 1:4 + dataflow_sources: + - 2 + children: + - type: string + id: 2 + range: 1:1 - 1:4 + dataflow_sources: + - 3 + - 4 + children: + - type: '"""' + id: 3 + range: 1:1 - 1:2 + - type: '"""' + id: 4 + range: 1:3 - 1:4 + - type: expression_statement + id: 5 + range: 2:1 - 2:4 + dataflow_sources: + - 6 + children: + - type: string + id: 6 + range: 2:1 - 2:4 + dataflow_sources: + - 7 + - 8 + children: + - type: '"""' + id: 7 + range: 2:1 - 2:2 + - type: '"""' + id: 8 + range: 2:3 - 2:4 + - type: expression_statement + id: 9 + range: 3:1 - 3:8 + dataflow_sources: + - 10 + children: + - type: concatenated_string + id: 10 + range: 3:1 - 3:8 + dataflow_sources: + - 11 + - 14 + children: + - type: string + id: 11 + range: 3:1 - 3:4 + dataflow_sources: + - 12 + - 13 + children: + - type: '"""' + id: 12 + range: 3:1 - 3:2 + - type: '"""' + id: 13 + range: 3:3 - 3:4 + - type: string + id: 14 + range: 3:5 - 3:8 + dataflow_sources: + - 15 + - 16 + children: + - type: '"""' + id: 15 + range: 3:5 - 3:6 + - type: '"""' + id: 16 + range: 3:7 - 3:8 + - type: expression_statement + id: 17 + range: 4:1 - 4:8 + dataflow_sources: + - 18 + children: + - type: string + id: 18 + range: 4:1 - 4:8 + dataflow_sources: + - 19 + - 20 + children: + - type: '"""' + id: 19 + range: 4:1 - 4:4 + - type: '"""' + id: 20 + range: 4:5 - 4:8 + - type: expression_statement + id: 21 + range: 5:1 - 5:8 + dataflow_sources: + - 22 + children: + - type: string + id: 22 + range: 5:1 - 5:8 + dataflow_sources: + - 23 + - 24 + children: + - type: '"""' + id: 23 + range: 5:1 - 5:4 + - type: '"""' + id: 24 + range: 5:5 - 5:8 + - type: expression_statement + id: 25 + range: 6:1 - 6:11 + dataflow_sources: + - 26 + children: + - type: string + id: 26 + range: 6:1 - 6:11 + dataflow_sources: + - 27 + - 28 + - 32 + children: + - type: '"""' + id: 27 + range: 6:1 - 6:3 + - type: interpolation + id: 28 + range: 6:3 - 6:8 + dataflow_sources: + - 29 + - 30 + - 31 + children: + - type: '"{"' + id: 29 + range: 6:3 - 6:4 + - type: identifier + id: 30 + range: 6:4 - 6:7 + content: foo + - type: '"}"' + id: 31 + range: 6:7 - 6:8 + - type: '"""' + id: 32 + range: 6:10 - 6:11 + - type: expression_statement + id: 33 + range: 7:1 - 7:7 + dataflow_sources: + - 34 + children: + - type: string + id: 34 + range: 7:1 - 7:7 + dataflow_sources: + - 35 + - 36 + children: + - type: '"""' + id: 35 + range: 7:1 - 7:3 + - type: '"""' + id: 36 + range: 7:6 - 7:7 + +- node: 2 + content: '''a''' + data: + value: a + isliteral: true +- node: 6 + content: '"a"' + data: + value: a + isliteral: true +- node: 10 + content: '"a" "b"' + data: + value: ab + isliteral: true +- node: 18 + content: '"""a"""' + data: + value: a + isliteral: true +- node: 22 + content: '''''''a''''''' + data: + value: a + isliteral: true +- node: 26 + content: f'{foo} a' + data: + value: � a + isliteral: false +- node: 34 + content: r'a\n' + data: + value: a\n + isliteral: true +- node: 11 + content: '"a"' + data: + value: a + isliteral: true +- node: 14 + content: '"b"' + data: + value: b + isliteral: true + diff --git a/internal/languages/python/detectors/detectors_test.go b/internal/languages/python/detectors/detectors_test.go new file mode 100644 index 000000000..d2dc62d48 --- /dev/null +++ b/internal/languages/python/detectors/detectors_test.go @@ -0,0 +1,22 @@ +package detectors_test + +import ( + "testing" + + "github.com/bearer/bearer/internal/languages/python" + "github.com/bearer/bearer/internal/scanner/detectors/testhelper" +) + +func TestPythonObjects(t *testing.T) { + runTest(t, "object_class", "object", "testdata/class.py") + runTest(t, "object_no_class", "object", "testdata/no_class.py") +} + +func TestPythonString(t *testing.T) { + runTest(t, "string", "string", "testdata/string.py") + runTest(t, "string_literal", "string", "testdata/string_literal.py") +} + +func runTest(t *testing.T, name, detectorType, fileName string) { + testhelper.RunTest(t, name, python.Get(), detectorType, fileName) +} diff --git a/internal/languages/python/detectors/object/object.go b/internal/languages/python/detectors/object/object.go new file mode 100644 index 000000000..9fc8016f6 --- /dev/null +++ b/internal/languages/python/detectors/object/object.go @@ -0,0 +1,217 @@ +package object + +import ( + "github.com/bearer/bearer/internal/scanner/ast/query" + "github.com/bearer/bearer/internal/scanner/ast/traversalstrategy" + "github.com/bearer/bearer/internal/scanner/ast/tree" + + "github.com/bearer/bearer/internal/scanner/detectors/common" + "github.com/bearer/bearer/internal/scanner/detectors/types" + "github.com/bearer/bearer/internal/scanner/ruleset" +) + +type objectDetector struct { + types.DetectorBase + // Base + hashPairQuery *query.Query + classQuery *query.Query + // arrayCreationQuery *query.Query + // Naming + assignmentQuery *query.Query + // Projection + fieldAccessQuery *query.Query + subscriptQuery *query.Query +} + +func New(querySet *query.Set) types.Detector { + // { "foo": "bar" } + hashPairQuery := querySet.Add(`(dictionary (pair key: (_) @key value: (_) @value) @pair) @root`) + + // user = + assignmentQuery := querySet.Add(`[ + (assignment left: (identifier) @name right: (_) @value) @root + ]`) + + // class User: + // def __init__(self, name='', gender=''): + // self.name = name + // self.gender = gender + classQuery := querySet.Add(` + ( + class_definition + name: (identifier) @class_name + body: (block (function_definition + name: (identifier) @method.name + parameters: ( + parameters [ + (identifier) @name + (default_parameter (identifier) @name) + ] + ) + ) + ) + ) @root + `) + + // user.name + // user.name() + fieldAccessQuery := querySet.Add(`[ + (attribute object: (_) @object attribute: (identifier) @field) @root + ]`) + + // user["uuid"] + subscriptQuery := querySet.Add(` + (subscript value: (_) @object subscript: (_) @key) @root + `) + + return &objectDetector{ + hashPairQuery: hashPairQuery, + classQuery: classQuery, + // arrayCreationQuery: arrayCreationQuery, + assignmentQuery: assignmentQuery, + fieldAccessQuery: fieldAccessQuery, + subscriptQuery: subscriptQuery, + } +} + +func (detector *objectDetector) Rule() *ruleset.Rule { + return ruleset.BuiltinObjectRule +} + +func (detector *objectDetector) DetectAt( + node *tree.Node, + detectorContext types.Context, +) ([]interface{}, error) { + detections, err := detector.getHash(node, detectorContext) + if len(detections) != 0 || err != nil { + return detections, err + } + + detections, err = detector.getAssignment(node, detectorContext) + if len(detections) != 0 || err != nil { + return detections, err + } + + detections, err = detector.getClass(node) + if len(detections) != 0 || err != nil { + return detections, err + } + + return detector.getProjections(node, detectorContext) +} + +func (detector *objectDetector) getHash( + node *tree.Node, + detectorContext types.Context, +) ([]interface{}, error) { + results := detector.hashPairQuery.MatchAt(node) + if len(results) == 0 { + return nil, nil + } + + var properties []common.Property + for _, result := range results { + pairNode := result["pair"] + + name := result["key"].Content() + if name == "" { + continue + } + + propertyObjects, err := detectorContext.Scan(result["value"], ruleset.BuiltinObjectRule, traversalstrategy.Cursor) + if err != nil { + return nil, err + } + + if len(propertyObjects) == 0 { + properties = append(properties, common.Property{ + Name: name, + Node: pairNode, + }) + + continue + } + + for _, propertyObject := range propertyObjects { + properties = append(properties, common.Property{ + Name: name, + Node: pairNode, + Object: propertyObject, + }) + } + } + + return []interface{}{common.Object{Properties: properties}}, nil +} + +func (detector *objectDetector) getAssignment( + node *tree.Node, + detectorContext types.Context, +) ([]interface{}, error) { + result, err := detector.assignmentQuery.MatchOnceAt(node) + + if result == nil || err != nil { + return nil, err + } + + rightObjects, err := common.GetNonVirtualObjects( + detectorContext, + result["value"], + ) + if err != nil { + return nil, err + } + + var objects []interface{} + for _, object := range rightObjects { + objects = append(objects, common.Object{ + IsVirtual: true, + Properties: []common.Property{{ + Name: result["name"].Content(), + Node: node, + Object: object, + }}, + }) + } + + return objects, nil +} + +func (detector *objectDetector) getClass(node *tree.Node) ([]interface{}, error) { + results := detector.classQuery.MatchAt(node) + if len(results) == 0 { + return nil, nil + } + + className := results[0]["class_name"].Content() + var properties []common.Property + for _, result := range results { + nameNode := result["name"] + + if result["method.name"].Content() != "__init__" { + continue + } + + if result["name"].Content() == "self" { + continue + } + + properties = append(properties, common.Property{ + Name: nameNode.Content(), + Node: nameNode, + }) + } + + return []interface{}{common.Object{ + Properties: []common.Property{{ + Name: className, + Object: &types.Detection{ + RuleID: ruleset.BuiltinObjectRule.ID(), + MatchNode: node, + Data: common.Object{ + Properties: properties, + }, + }, + }}, + }}, nil +} diff --git a/internal/languages/python/detectors/object/projection.go b/internal/languages/python/detectors/object/projection.go new file mode 100644 index 000000000..54738b761 --- /dev/null +++ b/internal/languages/python/detectors/object/projection.go @@ -0,0 +1,73 @@ +package object + +import ( + "github.com/bearer/bearer/internal/scanner/ast/tree" + "github.com/bearer/bearer/internal/scanner/detectors/common" + "github.com/bearer/bearer/internal/scanner/detectors/types" +) + +func (detector *objectDetector) getProjections( + node *tree.Node, + detectorContext types.Context, +) ([]interface{}, error) { + result, err := detector.fieldAccessQuery.MatchOnceAt(node) + if err != nil { + return nil, err + } + + if result != nil { + objectNode := result["object"] + objects, err := common.ProjectObject( + node, + detectorContext, + objectNode, + getObjectName(objectNode), + result["field"].Content(), + true, + ) + if err != nil { + return nil, err + } + + return objects, nil + } + + result, err = detector.subscriptQuery.MatchOnceAt(node) + if err != nil { + return nil, err + } + + if result != nil { + objectNode := result["object"] + propertyName := result["key"].Content() + if propertyName == "" { + return nil, nil + } + + objects, err := common.ProjectObject( + node, + detectorContext, + objectNode, + getObjectName(objectNode), + propertyName, + false, + ) + if err != nil { + return nil, err + } + + return objects, nil + } + + return nil, nil +} + +func getObjectName(objectNode *tree.Node) string { + // user->name() + // user->name + if objectNode.Type() == "identifier" { + return objectNode.Content() + } + + return "" +} diff --git a/internal/languages/python/detectors/string/string.go b/internal/languages/python/detectors/string/string.go new file mode 100644 index 000000000..5cbe74738 --- /dev/null +++ b/internal/languages/python/detectors/string/string.go @@ -0,0 +1,92 @@ +package string + +import ( + "regexp" + + "github.com/bearer/bearer/internal/scanner/ast/query" + "github.com/bearer/bearer/internal/scanner/ast/tree" + "github.com/bearer/bearer/internal/scanner/ruleset" + + "github.com/bearer/bearer/internal/scanner/detectors/common" + "github.com/bearer/bearer/internal/scanner/detectors/types" +) + +var stringRegex = regexp.MustCompile(`\A\w?['"]{1,3}(.*?)['"]{1,3}\z`) + +type stringDetector struct { + types.DetectorBase +} + +func New(querySet *query.Set) types.Detector { + return &stringDetector{} +} + +func (detector *stringDetector) Rule() *ruleset.Rule { + return ruleset.BuiltinStringRule +} + +func (detector *stringDetector) DetectAt( + node *tree.Node, + detectorContext types.Context, +) ([]interface{}, error) { + switch node.Type() { + case "string": + return handleTemplateString(node, detectorContext) + case "concatenated_string": + return common.ConcatenateChildStrings(node, detectorContext) + case "binary_operator": + if node.Children()[1].Content() == "+" { + return common.ConcatenateChildStrings(node, detectorContext) + } + case "augmented_assignment": + if node.Children()[1].Content() == "+=" { + return common.ConcatenateAssignEquals(node, detectorContext) + } + } + + return nil, nil +} + +func handleTemplateString(node *tree.Node, detectorContext types.Context) ([]interface{}, error) { + text := "" + isLiteral := true + + err := node.EachContentPart(func(partText string) error { + text += partText + return nil + }, func(child *tree.Node) error { + var childValue string + var childIsLiteral bool + namedChildren := child.NamedChildren() + + if len(namedChildren) == 0 { + childValue = "" + childIsLiteral = true + } else { + var err error + childValue, childIsLiteral, err = common.GetStringValue(namedChildren[0], detectorContext) + if err != nil { + return err + } + } + + if childValue == "" && !childIsLiteral { + childValue = common.NonLiteralValue + } + + text += childValue + + if !childIsLiteral { + isLiteral = false + } + + return nil + }) + + text = stringRegex.ReplaceAllString(text, `$1`) + + return []interface{}{common.String{ + Value: text, + IsLiteral: isLiteral, + }}, err +} diff --git a/internal/languages/python/detectors/testdata/class.py b/internal/languages/python/detectors/testdata/class.py new file mode 100644 index 000000000..86fd72302 --- /dev/null +++ b/internal/languages/python/detectors/testdata/class.py @@ -0,0 +1,8 @@ +class User: + def __init__(self, name, email=""): + self.name = name + self.email = email + + def lowercase_name(self): + logging.error(self.name) + print(self.name.lower()) \ No newline at end of file diff --git a/internal/languages/python/detectors/testdata/no_class.py b/internal/languages/python/detectors/testdata/no_class.py new file mode 100644 index 000000000..9149a816f --- /dev/null +++ b/internal/languages/python/detectors/testdata/no_class.py @@ -0,0 +1 @@ +user.name() \ No newline at end of file diff --git a/internal/languages/python/detectors/testdata/string.py b/internal/languages/python/detectors/testdata/string.py new file mode 100644 index 000000000..a6539d00a --- /dev/null +++ b/internal/languages/python/detectors/testdata/string.py @@ -0,0 +1,12 @@ +class Greet: + Greeting = "Hello World" + + def main(args): + s = Greet.Greeting + "!" + s += "!!" + + s2 = "hey " + s2 += args[0] + s2 += " there" + + s3 = f"foo '{s2}' bar" \ No newline at end of file diff --git a/internal/languages/python/detectors/testdata/string_literal.py b/internal/languages/python/detectors/testdata/string_literal.py new file mode 100644 index 000000000..e9e5eed0d --- /dev/null +++ b/internal/languages/python/detectors/testdata/string_literal.py @@ -0,0 +1,7 @@ +'a' +"a" +"a" "b" +"""a""" +'''a''' +f'{foo} a' +r'a\n' \ No newline at end of file diff --git a/internal/languages/python/pattern/pattern.go b/internal/languages/python/pattern/pattern.go new file mode 100644 index 000000000..0ecaed06f --- /dev/null +++ b/internal/languages/python/pattern/pattern.go @@ -0,0 +1,161 @@ +package pattern + +import ( + "fmt" + "regexp" + "slices" + "strings" + + "github.com/bearer/bearer/internal/scanner/ast/tree" + "github.com/bearer/bearer/internal/scanner/language" + "github.com/bearer/bearer/internal/util/regex" +) + +var ( + // $ or $ or $ + patternQueryVariableRegex = regexp.MustCompile(`\$<(?P[^>:!\.]+)(?::(?P[^>]+))?>`) + matchNodeRegex = regexp.MustCompile(`\$`) + ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) + unanchoredPatternNodeTypes = []string{} + patternMatchNodeContainerTypes = []string{} + + allowedPatternQueryTypes = []string{"_"} +) + +type Pattern struct { + language.PatternBase +} + +func (*Pattern) FixupVariableDummyValue(input []byte, node *tree.Node, dummyValue string) string { + return dummyValue +} + +func (*Pattern) ExtractVariables(input string) (string, []language.PatternVariable, error) { + nameIndex := patternQueryVariableRegex.SubexpIndex("name") + typesIndex := patternQueryVariableRegex.SubexpIndex("types") + i := 0 + + var params []language.PatternVariable + + replaced, err := regex.ReplaceAllWithSubmatches(patternQueryVariableRegex, input, func(submatches []string) (string, error) { + nodeTypes := strings.Split(submatches[typesIndex], "|") + if nodeTypes[0] == "" { + nodeTypes = []string{"_"} + } + + for _, nodeType := range nodeTypes { + if !slices.Contains(allowedPatternQueryTypes, nodeType) { + return "", fmt.Errorf("invalid node type '%s' in pattern query", nodeType) + } + } + + dummyValue := produceDummyValue(i, nodeTypes[0]) + + params = append(params, language.PatternVariable{ + Name: submatches[nameIndex], + NodeTypes: nodeTypes, + DummyValue: dummyValue, + }) + + i += 1 + + return dummyValue, nil + }) + + if err != nil { + return "", nil, err + } + + return replaced, params, nil +} + +func produceDummyValue(i int, nodeType string) string { + return "BearerVar" + fmt.Sprint(i) +} + +func (*Pattern) FindMatchNode(input []byte) [][]int { + return matchNodeRegex.FindAllIndex(input, -1) +} + +func (*Pattern) FindUnanchoredPoints(input []byte) [][]int { + return ellipsisRegex.FindAllIndex(input, -1) +} + +func (*Pattern) IsLeaf(node *tree.Node) bool { + return false +} + +func (*Pattern) LeafContentTypes() []string { + return []string{ + "string", + "identifier", + "true", + "false", + "float", + "integer", + "none", + } +} + +func (*Pattern) IsAnchored(node *tree.Node) (bool, bool) { + if slices.Contains(unanchoredPatternNodeTypes, node.Type()) { + return false, false + } + + parent := node.Parent() + if parent == nil { + return true, true + } + + if parent.Type() == "method_declaration" { + // visibility + if node == parent.ChildByFieldName("name") { + return false, true + } + + // type + if node == parent.ChildByFieldName("parameters") { + return true, false + } + + return false, false + } + + // Associative array elements are unanchored + // eg. array("foo" => 42) + if parent.Type() == "array_creation_expression" && + node.Type() == "array_element_initializer" && + len(node.NamedChildren()) == 2 { + return false, false + } + + // Class body declaration_list + // function/block compound_statement + unAnchored := []string{} + + isUnanchored := !slices.Contains(unAnchored, parent.Type()) + return isUnanchored, isUnanchored +} + +func (*Pattern) IsRoot(node *tree.Node) bool { + return !slices.Contains([]string{"module", "expression_statement"}, node.Type()) && !node.IsMissing() +} + +func (patternLanguage *Pattern) NodeTypes(node *tree.Node) []string { + return []string{node.Type()} +} + +// func (*Pattern) TranslateContent(fromNodeType, toNodeType, content string) string { +// if fromNodeType == "string" && toNodeType == "encapsed_string" { +// return fmt.Sprintf(`"%s"`, content[1:len(content)-1]) +// } +// if fromNodeType == "encapsed_string" && toNodeType == "string" { +// return fmt.Sprintf("'%s'", content[1:len(content)-1]) +// } + +// return content +// } + +func (*Pattern) ContainerTypes() []string { + return patternMatchNodeContainerTypes +} diff --git a/internal/languages/python/python.go b/internal/languages/python/python.go new file mode 100644 index 000000000..9de500ca8 --- /dev/null +++ b/internal/languages/python/python.go @@ -0,0 +1,59 @@ +package python + +import ( + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/python" + + "github.com/bearer/bearer/internal/classification/schema" + "github.com/bearer/bearer/internal/report/detectors" + "github.com/bearer/bearer/internal/scanner/ast/query" + "github.com/bearer/bearer/internal/scanner/ast/tree" + detectortypes "github.com/bearer/bearer/internal/scanner/detectors/types" + + "github.com/bearer/bearer/internal/languages/python/analyzer" + "github.com/bearer/bearer/internal/languages/python/detectors/object" + stringdetector "github.com/bearer/bearer/internal/languages/python/detectors/string" + "github.com/bearer/bearer/internal/languages/python/pattern" + "github.com/bearer/bearer/internal/scanner/detectors/datatype" + "github.com/bearer/bearer/internal/scanner/detectors/insecureurl" + "github.com/bearer/bearer/internal/scanner/detectors/stringliteral" + "github.com/bearer/bearer/internal/scanner/language" +) + +type implementation struct { + pattern pattern.Pattern +} + +func Get() language.Language { + return &implementation{} +} + +func (*implementation) ID() string { + return "python" +} + +func (*implementation) EnryLanguages() []string { + return []string{"Python"} +} + +func (*implementation) NewBuiltInDetectors(schemaClassifier *schema.Classifier, querySet *query.Set) []detectortypes.Detector { + return []detectortypes.Detector{ + object.New(querySet), + datatype.New(detectors.DetectorPython, schemaClassifier), + stringdetector.New(querySet), + stringliteral.New(querySet), + insecureurl.New(querySet), + } +} + +func (*implementation) SitterLanguage() *sitter.Language { + return python.GetLanguage() +} + +func (language *implementation) Pattern() language.Pattern { + return &language.pattern +} + +func (*implementation) NewAnalyzer(builder *tree.Builder) language.Analyzer { + return analyzer.New(builder) +} diff --git a/internal/languages/python/python_test.go b/internal/languages/python/python_test.go new file mode 100644 index 000000000..af95751cf --- /dev/null +++ b/internal/languages/python/python_test.go @@ -0,0 +1,22 @@ +package python_test + +import ( + _ "embed" + "testing" + + "github.com/bearer/bearer/internal/languages/testhelper" +) + +//go:embed testdata/logger.yml +var loggerRule []byte + +//go:embed testdata/scope_rule.yml +var scopeRule []byte + +func TestFlow(t *testing.T) { + testhelper.GetRunner(t, loggerRule, "python").RunTest(t, "./testdata/testcases/flow", ".snapshots/flow/") +} + +func TestScope(t *testing.T) { + testhelper.GetRunner(t, scopeRule, "python").RunTest(t, "./testdata/scope", ".snapshots/") +} diff --git a/internal/languages/python/testdata/logger.yml b/internal/languages/python/testdata/logger.yml new file mode 100644 index 000000000..53a3c2ffc --- /dev/null +++ b/internal/languages/python/testdata/logger.yml @@ -0,0 +1,13 @@ +type: risk +languages: + - python +patterns: + - pattern: logging.$($) + filters: + - variable: METHOD + values: + - error + - variable: DATA_TYPE + detection: datatype +metadata: + id: rule_logger_test diff --git a/internal/languages/python/testdata/scope/scope.py b/internal/languages/python/testdata/scope/scope.py new file mode 100644 index 000000000..09b6b0fbe --- /dev/null +++ b/internal/languages/python/testdata/scope/scope.py @@ -0,0 +1,17 @@ +scopeCursor(request.GET.get('oops')) +scopeCursor(x + request.GET.get("ok")) +scopeCursor(request.GET.get('oops') if x else y) +scopeCursor(x if request.GET.get('ok') else y) +scopeCursor(request.GET.get('oops') or y) # wrong + +scopeNested(request.GET.get('oops')) +scopeNested(x + request.GET.get('oops')) +scopeNested(request.GET.get('oops') if x else y) +scopeNested(x if request.GET.get('oops') else y) +scopeNested(request.GET.get('oops') or y) + +scopeResult(request.GET.get('oops')) +scopeResult(x + request.GET.get('oops')) +scopeResult(request.GET.get('oops') if x else y) +scopeResult(x if request.GET.get('ok') else y) +scopeResult(request.GET.get('oops') or y) \ No newline at end of file diff --git a/internal/languages/python/testdata/scope_rule.yml b/internal/languages/python/testdata/scope_rule.yml new file mode 100644 index 000000000..f312dd447 --- /dev/null +++ b/internal/languages/python/testdata/scope_rule.yml @@ -0,0 +1,30 @@ +languages: + - python +patterns: + - pattern: scopeCursor($) + filters: + - variable: USER_INPUT + detection: scope_test_user_input + scope: cursor + - pattern: scopeNested($) + filters: + - variable: USER_INPUT + detection: scope_test_user_input + scope: nested + - pattern: scopeResult($) + filters: + - variable: USER_INPUT + detection: scope_test_user_input + scope: result +auxiliary: + - id: scope_test_user_input + patterns: + - request.GET.get() + - request.POST.get() +severity: high +metadata: + description: Test detection filter scopes + remediation_message: Test detection filter scopes + cwe_id: + - 42 + id: scope_test diff --git a/internal/languages/python/testdata/testcases/flow/different-line.py b/internal/languages/python/testdata/testcases/flow/different-line.py new file mode 100644 index 000000000..8d69eca5d --- /dev/null +++ b/internal/languages/python/testdata/testcases/flow/different-line.py @@ -0,0 +1,3 @@ +user = User("Foo", "foo@example.com") +name = user.name +logging.error(name) \ No newline at end of file diff --git a/internal/languages/python/testdata/testcases/flow/same-line.py b/internal/languages/python/testdata/testcases/flow/same-line.py new file mode 100644 index 000000000..daf0a215c --- /dev/null +++ b/internal/languages/python/testdata/testcases/flow/same-line.py @@ -0,0 +1,2 @@ +logging.error(user.name) +logging.error(user.name()) \ No newline at end of file diff --git a/internal/parser/parser.go b/internal/parser/parser.go index df31fff80..4d0b015cc 100644 --- a/internal/parser/parser.go +++ b/internal/parser/parser.go @@ -440,6 +440,8 @@ func (node *Node) Query(query *sitter.Query, onMatch func(captures Captures) err break } + // match = cursor.FilterPredicates(match, node.tree.input) + captures := make(Captures) for _, capture := range match.Captures { captures[query.CaptureNameForId(capture.Index)] = node.tree.wrap(capture.Node) diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 8f2db79e3..5f44551e0 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -11,6 +11,7 @@ import ( "github.com/bearer/bearer/internal/languages/java" "github.com/bearer/bearer/internal/languages/javascript" "github.com/bearer/bearer/internal/languages/php" + "github.com/bearer/bearer/internal/languages/python" "github.com/bearer/bearer/internal/languages/ruby" "github.com/bearer/bearer/internal/report" reportdetections "github.com/bearer/bearer/internal/report/detections" @@ -39,6 +40,7 @@ func New(schemaClassifier *schemaclassifier.Classifier, rules map[string]*settin ruby.Get(), php.Get(), golang.Get(), + python.Get(), } languageScanners := make([]*languagescanner.Scanner, len(languages))