From d1b835799f85ba4fcdce2b2c980df03b48b056c5 Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 23 May 2024 12:21:14 +0100 Subject: [PATCH] fix(python): handle string line continuation (#1610) --- .../TestPythonString-string_literal | 35 ++++++++++++++++++- .../python/detectors/string/string.go | 14 +++++--- .../detectors/testdata/string_literal.py | 3 ++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal index c10474f24..6996b96ac 100644 --- a/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal +++ b/internal/languages/python/detectors/.snapshots/TestPythonString-string_literal @@ -1,6 +1,6 @@ type: module id: 0 -range: 1:1 - 12:1 +range: 1:1 - 14:3 dataflow_sources: - 1 - 5 @@ -12,6 +12,7 @@ dataflow_sources: - 37 - 42 - 49 + - 58 children: - type: expression_statement id: 1 @@ -284,6 +285,31 @@ children: - type: '"""' id: 57 range: 11:14 - 11:15 + - type: expression_statement + id: 58 + range: 13:1 - 14:3 + dataflow_sources: + - 59 + children: + - type: string + id: 59 + range: 13:1 - 14:3 + dataflow_sources: + - 60 + - 61 + - 62 + children: + - type: '"""' + id: 60 + range: 13:1 - 13:2 + - type: escape_sequence + id: 61 + range: 13:3 - 14:1 + content: | + \ + - type: '"""' + id: 62 + range: 14:2 - 14:3 - node: 2 content: '''a''' @@ -342,6 +368,13 @@ children: data: value: foo isliteral: true +- node: 59 + content: |- + "a\ + b" + data: + value: ab + isliteral: true - node: 11 content: '"a"' data: diff --git a/internal/languages/python/detectors/string/string.go b/internal/languages/python/detectors/string/string.go index 95b90425d..b3b65b896 100644 --- a/internal/languages/python/detectors/string/string.go +++ b/internal/languages/python/detectors/string/string.go @@ -77,12 +77,18 @@ func handleTemplateString(node *tree.Node, detectorContext types.Context) ([]int switch { case child.Type() == "escape_sequence": - value, err := stringutil.Unescape(child.Content()) - if err != nil { - return fmt.Errorf("failed to decode escape sequence: %w", err) + // tree sitter parser doesn't handle line continuation inside a string + if child.Content() == "\\\n" || child.Content() == "\\\r\n" { + childValue = "" + } else { + value, err := stringutil.Unescape(child.Content()) + if err != nil { + return fmt.Errorf("failed to decode escape sequence '%s': %w", child.Content(), err) + } + + childValue = value } - childValue = value childIsLiteral = true case len(namedChildren) == 0: childValue = "" diff --git a/internal/languages/python/detectors/testdata/string_literal.py b/internal/languages/python/detectors/testdata/string_literal.py index 058723bc2..1a4cc4522 100644 --- a/internal/languages/python/detectors/testdata/string_literal.py +++ b/internal/languages/python/detectors/testdata/string_literal.py @@ -9,3 +9,6 @@ False or "foo" "hey" or "foo" + +"a\ +b" \ No newline at end of file