From b50949886ce010dc4aa15d24ab960b08363db5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Fabianski?= Date: Mon, 16 Oct 2023 15:58:18 +0200 Subject: [PATCH] chore(python): improve string detections --- .../languages/python/analyzer/analyzer.go | 19 ++ .../.snapshots/TestPythonString-string | 34 ++- .../TestPythonString-string_literal | 231 ++++++++++++++++++ .../python/detectors/detectors_test.go | 1 + .../python/detectors/string/string.go | 56 ++++- .../detectors/testdata/string_literal.py | 7 + 6 files changed, 330 insertions(+), 18 deletions(-) create mode 100644 internal/languages/python/detectors/.snapshots/TestPythonString-string_literal create mode 100644 internal/languages/python/detectors/testdata/string_literal.py diff --git a/internal/languages/python/analyzer/analyzer.go b/internal/languages/python/analyzer/analyzer.go index 844c8aba9..6037e21ad 100644 --- a/internal/languages/python/analyzer/analyzer.go +++ b/internal/languages/python/analyzer/analyzer.go @@ -25,6 +25,8 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.withScope(language.NewScope(analyzer.scope), func() error { return visitChildren() }) + case "augmented_assignment": + return analyzer.analyzeAugmentedAssignment(node, visitChildren) case "assignment": return analyzer.analyzeAssignment(node, visitChildren) case "attribute": @@ -51,6 +53,23 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) } } +// foo += a +func (analyzer *analyzer) analyzeAugmentedAssignment(node *sitter.Node, visitChildren func() error) error { + left := node.ChildByFieldName("left") + right := node.ChildByFieldName("right") + analyzer.builder.Dataflow(node, left, right) + analyzer.lookupVariable(left) + analyzer.lookupVariable(right) + + err := visitChildren() + + if left.Type() == "identifier" { + analyzer.scope.Assign(analyzer.builder.ContentFor(left), node) + } + + return err +} + // foo = a func (analyzer *analyzer) analyzeAssignment(node *sitter.Node, visitChildren func() error) error { left := node.ChildByFieldName("left") diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string b/internal/languages/python/detectors/.snapshots/TestPythonString-string index 9fdffeb9c..87b70b09a 100644 --- a/internal/languages/python/detectors/.snapshots/TestPythonString-string +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string @@ -169,13 +169,14 @@ children: range: 6:9 - 6:18 dataflow_sources: - 37 - - 38 - 39 children: - type: identifier id: 37 range: 6:9 - 6:10 content: s + alias_of: + - 23 - type: '"+="' id: 38 range: 6:11 - 6:13 @@ -237,13 +238,14 @@ children: range: 9:9 - 9:22 dataflow_sources: - 51 - - 52 - 53 children: - type: identifier id: 51 range: 9:9 - 9:11 content: s2 + alias_of: + - 43 - type: '"+="' id: 52 range: 9:12 - 9:14 @@ -280,13 +282,14 @@ children: range: 10:9 - 10:23 dataflow_sources: - 60 - - 61 - 62 children: - type: identifier id: 60 range: 10:9 - 10:11 content: s2 + alias_of: + - 50 - type: '"+="' id: 61 range: 10:12 - 10:14 @@ -360,46 +363,51 @@ children: - node: 10 content: '"Hello World"' data: - value: '"Hello World"' + value: Hello World isliteral: true - node: 36 content: s += "!!" data: - value: �"!!" + value: �!!! + isliteral: false +- node: 50 + content: s2 += args[0] + data: + value: hey � isliteral: false - node: 59 content: s2 += " there" data: - value: �" there" + value: hey � there isliteral: false - node: 26 content: Greet.Greeting + "!" data: - value: �"!" + value: �! isliteral: false - node: 39 content: '"!!"' data: - value: '"!!"' + value: '!!' isliteral: true - node: 46 content: '"hey "' data: - value: '"hey "' + value: 'hey ' isliteral: true - node: 62 content: '" there"' data: - value: '" there"' + value: ' there' isliteral: true - node: 69 content: f"foo '{s2}' bar" data: - value: f"foo '{s2}' bar" - isliteral: true + value: foo '�' bar + isliteral: false - node: 32 content: '"!"' data: - value: '"!"' + value: '!' isliteral: true diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal new file mode 100644 index 000000000..0da162673 --- /dev/null +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal @@ -0,0 +1,231 @@ +type: module +id: 0 +range: 1:1 - 7:7 +dataflow_sources: + - 1 + - 5 + - 9 + - 17 + - 21 + - 25 + - 33 +children: + - type: expression_statement + id: 1 + range: 1:1 - 1:4 + dataflow_sources: + - 2 + children: + - type: string + id: 2 + range: 1:1 - 1:4 + dataflow_sources: + - 3 + - 4 + children: + - type: '"""' + id: 3 + range: 1:1 - 1:2 + - type: '"""' + id: 4 + range: 1:3 - 1:4 + - type: expression_statement + id: 5 + range: 2:1 - 2:4 + dataflow_sources: + - 6 + children: + - type: string + id: 6 + range: 2:1 - 2:4 + dataflow_sources: + - 7 + - 8 + children: + - type: '"""' + id: 7 + range: 2:1 - 2:2 + - type: '"""' + id: 8 + range: 2:3 - 2:4 + - type: expression_statement + id: 9 + range: 3:1 - 3:8 + dataflow_sources: + - 10 + children: + - type: concatenated_string + id: 10 + range: 3:1 - 3:8 + dataflow_sources: + - 11 + - 14 + children: + - type: string + id: 11 + range: 3:1 - 3:4 + dataflow_sources: + - 12 + - 13 + children: + - type: '"""' + id: 12 + range: 3:1 - 3:2 + - type: '"""' + id: 13 + range: 3:3 - 3:4 + - type: string + id: 14 + range: 3:5 - 3:8 + dataflow_sources: + - 15 + - 16 + children: + - type: '"""' + id: 15 + range: 3:5 - 3:6 + - type: '"""' + id: 16 + range: 3:7 - 3:8 + - type: expression_statement + id: 17 + range: 4:1 - 4:8 + dataflow_sources: + - 18 + children: + - type: string + id: 18 + range: 4:1 - 4:8 + dataflow_sources: + - 19 + - 20 + children: + - type: '"""' + id: 19 + range: 4:1 - 4:4 + - type: '"""' + id: 20 + range: 4:5 - 4:8 + - type: expression_statement + id: 21 + range: 5:1 - 5:8 + dataflow_sources: + - 22 + children: + - type: string + id: 22 + range: 5:1 - 5:8 + dataflow_sources: + - 23 + - 24 + children: + - type: '"""' + id: 23 + range: 5:1 - 5:4 + - type: '"""' + id: 24 + range: 5:5 - 5:8 + - type: expression_statement + id: 25 + range: 6:1 - 6:11 + dataflow_sources: + - 26 + children: + - type: string + id: 26 + range: 6:1 - 6:11 + dataflow_sources: + - 27 + - 28 + - 32 + children: + - type: '"""' + id: 27 + range: 6:1 - 6:3 + - type: interpolation + id: 28 + range: 6:3 - 6:8 + dataflow_sources: + - 29 + - 30 + - 31 + children: + - type: '"{"' + id: 29 + range: 6:3 - 6:4 + - type: identifier + id: 30 + range: 6:4 - 6:7 + content: foo + - type: '"}"' + id: 31 + range: 6:7 - 6:8 + - type: '"""' + id: 32 + range: 6:10 - 6:11 + - type: expression_statement + id: 33 + range: 7:1 - 7:7 + dataflow_sources: + - 34 + children: + - type: string + id: 34 + range: 7:1 - 7:7 + dataflow_sources: + - 35 + - 36 + children: + - type: '"""' + id: 35 + range: 7:1 - 7:3 + - type: '"""' + id: 36 + range: 7:6 - 7:7 + +- node: 2 + content: '''a''' + data: + value: a + isliteral: true +- node: 6 + content: '"a"' + data: + value: a + isliteral: true +- node: 10 + content: '"a" "b"' + data: + value: ab + isliteral: true +- node: 18 + content: '"""a"""' + data: + value: a + isliteral: true +- node: 22 + content: '''''''a''''''' + data: + value: a + isliteral: true +- node: 26 + content: f'{foo} a' + data: + value: � a + isliteral: false +- node: 34 + content: r'a\n' + data: + value: a\n + isliteral: true +- node: 11 + content: '"a"' + data: + value: a + isliteral: true +- node: 14 + content: '"b"' + data: + value: b + isliteral: true + diff --git a/internal/languages/python/detectors/detectors_test.go b/internal/languages/python/detectors/detectors_test.go index 341902c93..d2dc62d48 100644 --- a/internal/languages/python/detectors/detectors_test.go +++ b/internal/languages/python/detectors/detectors_test.go @@ -14,6 +14,7 @@ func TestPythonObjects(t *testing.T) { func TestPythonString(t *testing.T) { runTest(t, "string", "string", "testdata/string.py") + runTest(t, "string_literal", "string", "testdata/string_literal.py") } func runTest(t *testing.T, name, detectorType, fileName string) { diff --git a/internal/languages/python/detectors/string/string.go b/internal/languages/python/detectors/string/string.go index 19f4dc0e2..5cbe74738 100644 --- a/internal/languages/python/detectors/string/string.go +++ b/internal/languages/python/detectors/string/string.go @@ -1,6 +1,8 @@ package string import ( + "regexp" + "github.com/bearer/bearer/internal/scanner/ast/query" "github.com/bearer/bearer/internal/scanner/ast/tree" "github.com/bearer/bearer/internal/scanner/ruleset" @@ -9,6 +11,8 @@ import ( "github.com/bearer/bearer/internal/scanner/detectors/types" ) +var stringRegex = regexp.MustCompile(`\A\w?['"]{1,3}(.*?)['"]{1,3}\z`) + type stringDetector struct { types.DetectorBase } @@ -27,11 +31,9 @@ func (detector *stringDetector) DetectAt( ) ([]interface{}, error) { switch node.Type() { case "string": - // return common.ConcatenateChildStrings(node, detectorContext) - return []interface{}{common.String{ - Value: node.Content(), - IsLiteral: true, - }}, nil + return handleTemplateString(node, detectorContext) + case "concatenated_string": + return common.ConcatenateChildStrings(node, detectorContext) case "binary_operator": if node.Children()[1].Content() == "+" { return common.ConcatenateChildStrings(node, detectorContext) @@ -44,3 +46,47 @@ func (detector *stringDetector) DetectAt( return nil, nil } + +func handleTemplateString(node *tree.Node, detectorContext types.Context) ([]interface{}, error) { + text := "" + isLiteral := true + + err := node.EachContentPart(func(partText string) error { + text += partText + return nil + }, func(child *tree.Node) error { + var childValue string + var childIsLiteral bool + namedChildren := child.NamedChildren() + + if len(namedChildren) == 0 { + childValue = "" + childIsLiteral = true + } else { + var err error + childValue, childIsLiteral, err = common.GetStringValue(namedChildren[0], detectorContext) + if err != nil { + return err + } + } + + if childValue == "" && !childIsLiteral { + childValue = common.NonLiteralValue + } + + text += childValue + + if !childIsLiteral { + isLiteral = false + } + + return nil + }) + + text = stringRegex.ReplaceAllString(text, `$1`) + + return []interface{}{common.String{ + Value: text, + IsLiteral: isLiteral, + }}, err +} diff --git a/internal/languages/python/detectors/testdata/string_literal.py b/internal/languages/python/detectors/testdata/string_literal.py new file mode 100644 index 000000000..e9e5eed0d --- /dev/null +++ b/internal/languages/python/detectors/testdata/string_literal.py @@ -0,0 +1,7 @@ +'a' +"a" +"a" "b" +"""a""" +'''a''' +f'{foo} a' +r'a\n' \ No newline at end of file