From c4d147c5d36b4e2b8c74e56322eab9617a3fef3e Mon Sep 17 00:00:00 2001 From: David Roe Date: Tue, 9 Jan 2024 10:43:49 +0000 Subject: [PATCH] fix: support binary OR for strings --- ...estJavascriptStringDetector-string_literal | 137 +++++++++++++- .../javascript/detectors/string/string.go | 15 +- .../detectors/testdata/string_literal.js | 3 + .../.snapshots/TestPythonString-string | 2 +- .../TestPythonString-string_literal | 109 ++++++++++- .../python/detectors/string/string.go | 14 ++ .../python/detectors/testdata/string.py | 2 +- .../detectors/testdata/string_literal.py | 5 +- .../TestRubyStringDetector-string_literal | 178 +++++++++++++++++- .../languages/ruby/detectors/string/string.go | 15 +- .../ruby/detectors/testdata/string_literal.rb | 4 + internal/scanner/detectors/common/string.go | 14 ++ 12 files changed, 490 insertions(+), 8 deletions(-) diff --git a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal index 09d0d623b..4991e08f0 100644 --- a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal +++ b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal @@ -1,12 +1,14 @@ type: program id: 0 -range: 1:1 - 9:1 +range: 1:1 - 12:1 dataflow_sources: - 1 - 6 - 17 - 25 - 33 + - 41 + - 49 children: - type: expression_statement id: 1 @@ -193,6 +195,94 @@ children: - type: '"""' id: 40 range: 8:18 - 8:19 + - type: expression_statement + id: 41 + range: 10:1 - 10:15 + children: + - type: binary_expression + id: 42 + range: 10:1 - 10:15 + dataflow_sources: + - 43 + - 44 + - 45 + children: + - type: "false" + id: 43 + range: 10:1 - 10:6 + content: "false" + - type: '"||"' + id: 44 + range: 10:7 - 10:9 + - type: string + id: 45 + range: 10:10 - 10:15 + dataflow_sources: + - 46 + - 47 + - 48 + children: + - type: '"""' + id: 46 + range: 10:10 - 10:11 + - type: string_fragment + id: 47 + range: 10:11 - 10:14 + content: foo + - type: '"""' + id: 48 + range: 10:14 - 10:15 + - type: expression_statement + id: 49 + range: 11:1 - 11:15 + children: + - type: binary_expression + id: 50 + range: 11:1 - 11:15 + dataflow_sources: + - 51 + - 55 + - 56 + children: + - type: string + id: 51 + range: 11:1 - 11:6 + dataflow_sources: + - 52 + - 53 + - 54 + children: + - type: '"""' + id: 52 + range: 11:1 - 11:2 + - type: string_fragment + id: 53 + range: 11:2 - 11:5 + content: hey + - type: '"""' + id: 54 + range: 11:5 - 11:6 + - type: '"||"' + id: 55 + range: 11:7 - 11:9 + - type: string + id: 56 + range: 11:10 - 11:15 + dataflow_sources: + - 57 + - 58 + - 59 + children: + - type: '"""' + id: 57 + range: 11:10 - 11:11 + - type: string_fragment + id: 58 + range: 11:11 - 11:14 + content: foo + - type: '"""' + id: 59 + range: 11:14 - 11:15 - node: 2 content: '"ab"' @@ -214,6 +304,21 @@ children: data: value: hello world\ isliteral: true +- node: 42 + content: false || "foo" + data: + value: foo + isliteral: true +- node: 50 + content: '"hey" || "foo"' + data: + value: hey + isliteral: true +- node: 50 + content: '"hey" || "foo"' + data: + value: foo + isliteral: true - node: 4 content: ab data: @@ -254,6 +359,21 @@ children: data: value: \ isliteral: true +- node: 45 + content: '"foo"' + data: + value: foo + isliteral: true +- node: 51 + content: '"hey"' + data: + value: hey + isliteral: true +- node: 56 + content: '"foo"' + data: + value: foo + isliteral: true - node: 10 content: a data: @@ -269,4 +389,19 @@ children: data: value: a isliteral: true +- node: 47 + content: foo + data: + value: foo + isliteral: true +- node: 53 + content: hey + data: + value: hey + isliteral: true +- node: 58 + content: foo + data: + value: foo + isliteral: true diff --git a/internal/languages/javascript/detectors/string/string.go b/internal/languages/javascript/detectors/string/string.go index de099adaf..53022b17b 100644 --- a/internal/languages/javascript/detectors/string/string.go +++ b/internal/languages/javascript/detectors/string/string.go @@ -49,8 +49,21 @@ func (detector *stringDetector) DetectAt( case "template_string": return handleTemplateString(node, detectorContext) case "binary_expression": - if node.Children()[1].Content() == "+" { + switch node.Children()[1].Content() { + case "+": return common.ConcatenateChildStrings(node, detectorContext) + case "||": + leftData, err := common.GetStringData(node.ChildByFieldName("left"), detectorContext) + if err != nil { + return nil, err + } + + rightData, err := common.GetStringData(node.ChildByFieldName("right"), detectorContext) + if err != nil { + return nil, err + } + + return append(leftData, rightData...), nil } case "augmented_assignment_expression": if node.Children()[1].Content() == "+=" { diff --git a/internal/languages/javascript/detectors/testdata/string_literal.js b/internal/languages/javascript/detectors/testdata/string_literal.js index fc4e54d57..f530442f7 100644 --- a/internal/languages/javascript/detectors/testdata/string_literal.js +++ b/internal/languages/javascript/detectors/testdata/string_literal.js @@ -6,3 +6,6 @@ x = "a" `${x} b` "hello\x20world\\" + +false || "foo" +"hey" || "foo" diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string b/internal/languages/python/detectors/.snapshots/TestPythonString-string index 87b70b09a..fd3c84d34 100644 --- a/internal/languages/python/detectors/.snapshots/TestPythonString-string +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string @@ -1,6 +1,6 @@ type: module id: 0 -range: 1:1 - 12:31 +range: 1:1 - 13:1 dataflow_sources: - 1 children: diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal index 0da162673..b033538ee 100644 --- a/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal @@ -1,6 +1,6 @@ type: module id: 0 -range: 1:1 - 7:7 +range: 1:1 - 11:1 dataflow_sources: - 1 - 5 @@ -9,6 +9,8 @@ dataflow_sources: - 21 - 25 - 33 + - 37 + - 44 children: - type: expression_statement id: 1 @@ -182,6 +184,81 @@ children: - type: '"""' id: 36 range: 7:6 - 7:7 + - type: expression_statement + id: 37 + range: 9:1 - 9:15 + dataflow_sources: + - 38 + children: + - type: boolean_operator + id: 38 + range: 9:1 - 9:15 + alias_of: + - 39 + - 41 + children: + - type: "false" + id: 39 + range: 9:1 - 9:6 + content: "False" + - type: '"or"' + id: 40 + range: 9:7 - 9:9 + - type: string + id: 41 + range: 9:10 - 9:15 + dataflow_sources: + - 42 + - 43 + children: + - type: '"""' + id: 42 + range: 9:10 - 9:11 + - type: '"""' + id: 43 + range: 9:14 - 9:15 + - type: expression_statement + id: 44 + range: 10:1 - 10:15 + dataflow_sources: + - 45 + children: + - type: boolean_operator + id: 45 + range: 10:1 - 10:15 + alias_of: + - 46 + - 50 + children: + - type: string + id: 46 + range: 10:1 - 10:6 + dataflow_sources: + - 47 + - 48 + children: + - type: '"""' + id: 47 + range: 10:1 - 10:2 + - type: '"""' + id: 48 + range: 10:5 - 10:6 + - type: '"or"' + id: 49 + range: 10:7 - 10:9 + - type: string + id: 50 + range: 10:10 - 10:15 + dataflow_sources: + - 51 + - 52 + children: + - type: '"""' + id: 51 + range: 10:10 - 10:11 + - type: '"""' + id: 52 + range: 10:14 - 10:15 - node: 2 content: '''a''' @@ -218,6 +295,21 @@ children: data: value: a\n isliteral: true +- node: 38 + content: False or "foo" + data: + value: foo + isliteral: true +- node: 45 + content: '"hey" or "foo"' + data: + value: hey + isliteral: true +- node: 45 + content: '"hey" or "foo"' + data: + value: foo + isliteral: true - node: 11 content: '"a"' data: @@ -228,4 +320,19 @@ children: data: value: b isliteral: true +- node: 41 + content: '"foo"' + data: + value: foo + isliteral: true +- node: 46 + content: '"hey"' + data: + value: hey + isliteral: true +- node: 50 + content: '"foo"' + data: + value: foo + isliteral: true diff --git a/internal/languages/python/detectors/string/string.go b/internal/languages/python/detectors/string/string.go index 5cbe74738..29aba8217 100644 --- a/internal/languages/python/detectors/string/string.go +++ b/internal/languages/python/detectors/string/string.go @@ -38,6 +38,20 @@ func (detector *stringDetector) DetectAt( if node.Children()[1].Content() == "+" { return common.ConcatenateChildStrings(node, detectorContext) } + case "boolean_operator": + if node.Children()[1].Content() == "or" { + leftData, err := common.GetStringData(node.ChildByFieldName("left"), detectorContext) + if err != nil { + return nil, err + } + + rightData, err := common.GetStringData(node.ChildByFieldName("right"), detectorContext) + if err != nil { + return nil, err + } + + return append(leftData, rightData...), nil + } case "augmented_assignment": if node.Children()[1].Content() == "+=" { return common.ConcatenateAssignEquals(node, detectorContext) diff --git a/internal/languages/python/detectors/testdata/string.py b/internal/languages/python/detectors/testdata/string.py index a6539d00a..5075a1fea 100644 --- a/internal/languages/python/detectors/testdata/string.py +++ b/internal/languages/python/detectors/testdata/string.py @@ -9,4 +9,4 @@ def main(args): s2 += args[0] s2 += " there" - s3 = f"foo '{s2}' bar" \ No newline at end of file + s3 = f"foo '{s2}' bar" diff --git a/internal/languages/python/detectors/testdata/string_literal.py b/internal/languages/python/detectors/testdata/string_literal.py index e9e5eed0d..32f26f184 100644 --- a/internal/languages/python/detectors/testdata/string_literal.py +++ b/internal/languages/python/detectors/testdata/string_literal.py @@ -4,4 +4,7 @@ """a""" '''a''' f'{foo} a' -r'a\n' \ No newline at end of file +r'a\n' + +False or "foo" +"hey" or "foo" diff --git a/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_literal b/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_literal index d6015d185..984d09928 100644 --- a/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_literal +++ b/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_literal @@ -1,11 +1,14 @@ type: program id: 0 -range: 1:1 - 7:1 +range: 1:1 - 11:1 dataflow_sources: - 1 - 5 - 15 - 22 + - 30 + - 37 + - 44 children: - type: string id: 1 @@ -144,6 +147,119 @@ children: - type: '"""' id: 29 range: 6:8 - 6:9 + - type: binary + id: 30 + range: 8:1 - 8:15 + dataflow_sources: + - 31 + - 32 + - 33 + children: + - type: "false" + id: 31 + range: 8:1 - 8:6 + content: "false" + - type: '"||"' + id: 32 + range: 8:7 - 8:9 + - type: string + id: 33 + range: 8:10 - 8:15 + dataflow_sources: + - 34 + - 35 + - 36 + children: + - type: '"""' + id: 34 + range: 8:10 - 8:11 + - type: string_content + id: 35 + range: 8:11 - 8:14 + content: foo + - type: '"""' + id: 36 + range: 8:14 - 8:15 + - type: binary + id: 37 + range: 9:1 - 9:15 + dataflow_sources: + - 38 + - 39 + - 40 + children: + - type: "false" + id: 38 + range: 9:1 - 9:6 + content: "false" + - type: '"or"' + id: 39 + range: 9:7 - 9:9 + - type: string + id: 40 + range: 9:10 - 9:15 + dataflow_sources: + - 41 + - 42 + - 43 + children: + - type: '"""' + id: 41 + range: 9:10 - 9:11 + - type: string_content + id: 42 + range: 9:11 - 9:14 + content: foo + - type: '"""' + id: 43 + range: 9:14 - 9:15 + - type: binary + id: 44 + range: 10:1 - 10:15 + dataflow_sources: + - 45 + - 49 + - 50 + children: + - type: string + id: 45 + range: 10:1 - 10:6 + dataflow_sources: + - 46 + - 47 + - 48 + children: + - type: '"""' + id: 46 + range: 10:1 - 10:2 + - type: string_content + id: 47 + range: 10:2 - 10:5 + content: hey + - type: '"""' + id: 48 + range: 10:5 - 10:6 + - type: '"||"' + id: 49 + range: 10:7 - 10:9 + - type: string + id: 50 + range: 10:10 - 10:15 + dataflow_sources: + - 51 + - 52 + - 53 + children: + - type: '"""' + id: 51 + range: 10:10 - 10:11 + - type: string_content + id: 52 + range: 10:11 - 10:14 + content: foo + - type: '"""' + id: 53 + range: 10:14 - 10:15 - node: 1 content: '"ab"' @@ -160,6 +276,26 @@ children: data: value: a b isliteral: true +- node: 30 + content: false || "foo" + data: + value: foo + isliteral: true +- node: 37 + content: false or "foo" + data: + value: foo + isliteral: true +- node: 44 + content: '"hey" || "foo"' + data: + value: hey + isliteral: true +- node: 44 + content: '"hey" || "foo"' + data: + value: foo + isliteral: true - node: 3 content: ab data: @@ -190,6 +326,26 @@ children: data: value: ' b' isliteral: true +- node: 33 + content: '"foo"' + data: + value: foo + isliteral: true +- node: 40 + content: '"foo"' + data: + value: foo + isliteral: true +- node: 45 + content: '"hey"' + data: + value: hey + isliteral: true +- node: 50 + content: '"foo"' + data: + value: foo + isliteral: true - node: 8 content: a data: @@ -205,4 +361,24 @@ children: data: value: a isliteral: true +- node: 35 + content: foo + data: + value: foo + isliteral: true +- node: 42 + content: foo + data: + value: foo + isliteral: true +- node: 47 + content: hey + data: + value: hey + isliteral: true +- node: 52 + content: foo + data: + value: foo + isliteral: true diff --git a/internal/languages/ruby/detectors/string/string.go b/internal/languages/ruby/detectors/string/string.go index 972d62e34..85c6e1db4 100644 --- a/internal/languages/ruby/detectors/string/string.go +++ b/internal/languages/ruby/detectors/string/string.go @@ -34,8 +34,21 @@ func (detector *stringDetector) DetectAt( case "interpolation", "string": return common.ConcatenateChildStrings(node, detectorContext) case "binary": - if node.Children()[1].Content() == "+" { + switch node.Children()[1].Content() { + case "+": return common.ConcatenateChildStrings(node, detectorContext) + case "||", "or": + leftData, err := common.GetStringData(node.ChildByFieldName("left"), detectorContext) + if err != nil { + return nil, err + } + + rightData, err := common.GetStringData(node.ChildByFieldName("right"), detectorContext) + if err != nil { + return nil, err + } + + return append(leftData, rightData...), nil } case "operator_assignment": if node.Children()[1].Content() == "+=" { diff --git a/internal/languages/ruby/detectors/testdata/string_literal.rb b/internal/languages/ruby/detectors/testdata/string_literal.rb index 8f440c4a6..c24f48001 100644 --- a/internal/languages/ruby/detectors/testdata/string_literal.rb +++ b/internal/languages/ruby/detectors/testdata/string_literal.rb @@ -4,3 +4,7 @@ x = "a" "#{x} b" + +false || "foo" +false or "foo" +"hey" || "foo" diff --git a/internal/scanner/detectors/common/string.go b/internal/scanner/detectors/common/string.go index 6bdd57a78..420a2b02c 100644 --- a/internal/scanner/detectors/common/string.go +++ b/internal/scanner/detectors/common/string.go @@ -15,6 +15,20 @@ type String struct { IsLiteral bool } +func GetStringData(node *tree.Node, detectorContext types.Context) ([]interface{}, error) { + detections, err := detectorContext.Scan(node, ruleset.BuiltinStringRule, traversalstrategy.Cursor) + if err != nil { + return nil, err + } + + result := make([]interface{}, len(detections)) + for i, detection := range detections { + result[i] = detection.Data + } + + return result, nil +} + func GetStringValue(node *tree.Node, detectorContext types.Context) (string, bool, error) { detections, err := detectorContext.Scan(node, ruleset.BuiltinStringRule, traversalstrategy.Cursor) if err != nil {