From 09a4984bd03cab9d9fd7aae859b35533e618a83c Mon Sep 17 00:00:00 2001 From: David Roe Date: Tue, 3 Oct 2023 14:02:39 +0100 Subject: [PATCH 1/2] fix(java): add missing pattern container types --- ...estPattern-catch_types_is_a_container_type | 21 ++++++++++++++ ...tPattern-method_params_is_a_container_type | 18 ++++++++++++ internal/languages/java/java_test.go | 29 +++++++++++++++++++ internal/languages/java/pattern/pattern.go | 22 +++++++++----- 4 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 internal/languages/java/.snapshots/TestPattern-catch_types_is_a_container_type create mode 100644 internal/languages/java/.snapshots/TestPattern-method_params_is_a_container_type diff --git a/internal/languages/java/.snapshots/TestPattern-catch_types_is_a_container_type b/internal/languages/java/.snapshots/TestPattern-catch_types_is_a_container_type new file mode 100644 index 000000000..93c80aa84 --- /dev/null +++ b/internal/languages/java/.snapshots/TestPattern-catch_types_is_a_container_type @@ -0,0 +1,21 @@ +(*builder.Result)({ + Query: (string) (len=328) "([(class_declaration . (_) . [(class_body [(method_declaration . [ (void_type )] @param1 . [ (identifier )] @param2 . [ (formal_parameters )] . [(block [(try_statement [ (block )] [(catch_clause . [(catch_formal_parameter . [(catch_type (_) @match)] . [ (identifier )] @param3 .)] . [ (block )] .)])] )] .)] )] .)] @root)", + VariableNames: ([]string) (len=1) { + (string) (len=1) "_" + }, + ParamToVariable: (map[string]string) { + }, + EqualParams: ([][]string) , + ParamToContent: (map[string]map[string]string) (len=3) { + (string) (len=6) "param1": (map[string]string) (len=1) { + (string) (len=9) "void_type": (string) (len=4) "void" + }, + (string) (len=6) "param2": (map[string]string) (len=1) { + (string) (len=10) "identifier": (string) (len=4) "main" + }, + (string) (len=6) "param3": (map[string]string) (len=1) { + (string) (len=10) "identifier": (string) (len=1) "e" + } + }, + RootVariable: (*language.PatternVariable)() +}) diff --git a/internal/languages/java/.snapshots/TestPattern-method_params_is_a_container_type b/internal/languages/java/.snapshots/TestPattern-method_params_is_a_container_type new file mode 100644 index 000000000..647de0550 --- /dev/null +++ b/internal/languages/java/.snapshots/TestPattern-method_params_is_a_container_type @@ -0,0 +1,18 @@ +(*builder.Result)({ + Query: (string) (len=190) "([(class_declaration . (_) . [(class_body [(method_declaration . [ (void_type )] @param1 . [ (identifier )] @param2 . [(formal_parameters . (_) @match . )] . [ (block )] .)] )] .)] @root)", + VariableNames: ([]string) (len=1) { + (string) (len=1) "_" + }, + ParamToVariable: (map[string]string) { + }, + EqualParams: ([][]string) , + ParamToContent: (map[string]map[string]string) (len=2) { + (string) (len=6) "param1": (map[string]string) (len=1) { + (string) (len=9) "void_type": (string) (len=4) "void" + }, + (string) (len=6) "param2": (map[string]string) (len=1) { + (string) (len=10) "identifier": (string) (len=4) "main" + } + }, + RootVariable: (*language.PatternVariable)() +}) diff --git a/internal/languages/java/java_test.go b/internal/languages/java/java_test.go index d3d8ee6fe..15ea1bed2 100644 --- a/internal/languages/java/java_test.go +++ b/internal/languages/java/java_test.go @@ -4,7 +4,10 @@ import ( _ "embed" "testing" + "github.com/bearer/bearer/internal/languages/java" "github.com/bearer/bearer/internal/languages/testhelper" + patternquerybuilder "github.com/bearer/bearer/internal/scanner/detectors/customrule/patternquery/builder" + "github.com/bradleyjkemp/cupaloy" ) //go:embed testdata/logger.yml @@ -20,3 +23,29 @@ func TestFlow(t *testing.T) { func TestScope(t *testing.T) { testhelper.GetRunner(t, scopeRule, "Java").RunTest(t, "./testdata/scope", ".snapshots/") } + +func TestPattern(t *testing.T) { + for _, test := range []struct{ name, pattern string }{ + {"method params is a container type", ` + class $<_> { + void main($$<_>) {} + } + `}, + {"catch types is a container type", ` + class $<_> { + void main() { + try {} catch ($$<_> e) {} + } + } + `}, + } { + t.Run(test.name, func(tt *testing.T) { + result, err := patternquerybuilder.Build(java.Get(), test.pattern, "") + if err != nil { + tt.Fatalf("failed to build pattern: %s", err) + } + + cupaloy.SnapshotT(tt, result) + }) + } +} diff --git a/internal/languages/java/pattern/pattern.go b/internal/languages/java/pattern/pattern.go index cd85bf7db..ecabb591d 100644 --- a/internal/languages/java/pattern/pattern.go +++ b/internal/languages/java/pattern/pattern.go @@ -13,12 +13,14 @@ import ( var ( // $ or $ or $ - patternQueryVariableRegex = regexp.MustCompile(`\$<(?P[^>:!\.]+)(?::(?P[^>]+))?>`) - matchNodeRegex = regexp.MustCompile(`\$`) - ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) + queryVariableRegex = regexp.MustCompile(`\$<(?P[^>:!\.]+)(?::(?P[^>]+))?>`) + matchNodeRegex = regexp.MustCompile(`\$`) + ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) + + matchNodeContainerTypes = []string{"catch_formal_parameter", "catch_type", "formal_parameters"} // todo: see if it is ok to replace typescripts `member_expression` with javas `field_access` and `method_invocation` - allowedPatternQueryTypes = []string{"identifier", "type_identifier", "_", "field_access", "method_invocation", "string_literal"} + allowedQueryTypes = []string{"identifier", "type_identifier", "_", "field_access", "method_invocation", "string_literal"} ) type Pattern struct { @@ -26,20 +28,20 @@ type Pattern struct { } func (*Pattern) ExtractVariables(input string) (string, []language.PatternVariable, error) { - nameIndex := patternQueryVariableRegex.SubexpIndex("name") - typesIndex := patternQueryVariableRegex.SubexpIndex("types") + nameIndex := queryVariableRegex.SubexpIndex("name") + typesIndex := queryVariableRegex.SubexpIndex("types") i := 0 var params []language.PatternVariable - replaced, err := regex.ReplaceAllWithSubmatches(patternQueryVariableRegex, input, func(submatches []string) (string, error) { + replaced, err := regex.ReplaceAllWithSubmatches(queryVariableRegex, input, func(submatches []string) (string, error) { nodeTypes := strings.Split(submatches[typesIndex], "|") if nodeTypes[0] == "" { nodeTypes = []string{"_"} } for _, nodeType := range nodeTypes { - if !slices.Contains(allowedPatternQueryTypes, nodeType) { + if !slices.Contains(allowedQueryTypes, nodeType) { return "", fmt.Errorf("invalid node type '%s' in pattern query", nodeType) } } @@ -128,3 +130,7 @@ func (*Pattern) NodeTypes(node *tree.Node) []string { return []string{node.Type()} } + +func (*Pattern) ContainerTypes() []string { + return matchNodeContainerTypes +} From b1ce25ec675f30d2825b9de270cd98828203a910 Mon Sep 17 00:00:00 2001 From: David Roe Date: Tue, 3 Oct 2023 15:51:55 +0100 Subject: [PATCH 2/2] fix(javascript): match strings correctly --- ...tJavascriptStringDetector-string_assign_eq | 15 ++++ ...estJavascriptStringDetector-string_literal | 85 ++++++++++++++++++- ...avascriptStringDetector-string_non_literal | 5 ++ .../javascript/detectors/string/string.go | 18 +++- .../detectors/testdata/string_literal.js | 2 + .../languages/javascript/pattern/pattern.go | 4 - 6 files changed, 122 insertions(+), 7 deletions(-) diff --git a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq index 46d3f41ba..c4a0732cf 100644 --- a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq +++ b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq @@ -211,4 +211,19 @@ children: data: value: c isliteral: true +- node: 8 + content: a + data: + value: a + isliteral: true +- node: 16 + content: b + data: + value: b + isliteral: true +- node: 35 + content: c + data: + value: c + isliteral: true diff --git a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal index 9bd77fcf8..09d0d623b 100644 --- a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal +++ b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_literal @@ -1,11 +1,12 @@ type: program id: 0 -range: 1:1 - 7:1 +range: 1:1 - 9:1 dataflow_sources: - 1 - 6 - 17 - 25 + - 33 children: - type: expression_statement id: 1 @@ -155,6 +156,43 @@ children: - type: '"`"' id: 32 range: 6:8 - 6:9 + - type: expression_statement + id: 33 + range: 8:1 - 8:19 + children: + - type: string + id: 34 + range: 8:1 - 8:19 + dataflow_sources: + - 35 + - 36 + - 37 + - 38 + - 39 + - 40 + children: + - type: '"""' + id: 35 + range: 8:1 - 8:2 + - type: string_fragment + id: 36 + range: 8:2 - 8:7 + content: hello + - type: escape_sequence + id: 37 + range: 8:7 - 8:11 + content: \x20 + - type: string_fragment + id: 38 + range: 8:11 - 8:16 + content: world + - type: escape_sequence + id: 39 + range: 8:16 - 8:18 + content: \\ + - type: '"""' + id: 40 + range: 8:18 - 8:19 - node: 2 content: '"ab"' @@ -171,6 +209,16 @@ children: data: value: a b isliteral: true +- node: 34 + content: '"hello\x20world\\"' + data: + value: hello world\ + isliteral: true +- node: 4 + content: ab + data: + value: ab + isliteral: true - node: 8 content: '"a"' data: @@ -186,4 +234,39 @@ children: data: value: a isliteral: true +- node: 36 + content: hello + data: + value: hello + isliteral: true +- node: 37 + content: \x20 + data: + value: ' ' + isliteral: true +- node: 38 + content: world + data: + value: world + isliteral: true +- node: 39 + content: \\ + data: + value: \ + isliteral: true +- node: 10 + content: a + data: + value: a + isliteral: true +- node: 15 + content: b + data: + value: b + isliteral: true +- node: 23 + content: a + data: + value: a + isliteral: true diff --git a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_non_literal b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_non_literal index 99ec52f20..b61a6319b 100644 --- a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_non_literal +++ b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_non_literal @@ -94,4 +94,9 @@ children: data: value: a isliteral: true +- node: 5 + content: a + data: + value: a + isliteral: true diff --git a/internal/languages/javascript/detectors/string/string.go b/internal/languages/javascript/detectors/string/string.go index 3d4441458..920c74a11 100644 --- a/internal/languages/javascript/detectors/string/string.go +++ b/internal/languages/javascript/detectors/string/string.go @@ -1,10 +1,12 @@ package string import ( + "fmt" + "strconv" + "github.com/bearer/bearer/internal/scanner/ast/query" "github.com/bearer/bearer/internal/scanner/ast/tree" "github.com/bearer/bearer/internal/scanner/ruleset" - "github.com/bearer/bearer/internal/util/stringutil" "github.com/bearer/bearer/internal/scanner/detectors/common" "github.com/bearer/bearer/internal/scanner/detectors/types" @@ -28,8 +30,20 @@ func (detector *stringDetector) DetectAt( ) ([]interface{}, error) { switch node.Type() { case "string": + return common.ConcatenateChildStrings(node, detectorContext) + case "string_fragment": + return []interface{}{common.String{ + Value: node.Content(), + IsLiteral: true, + }}, nil + case "escape_sequence": + value, err := strconv.Unquote(fmt.Sprintf(`"%s"`, node.Content())) + if err != nil { + return nil, fmt.Errorf("failed to decode escape sequence: %w", err) + } + return []interface{}{common.String{ - Value: stringutil.StripQuotes(node.Content()), + Value: value, IsLiteral: true, }}, nil case "template_string": diff --git a/internal/languages/javascript/detectors/testdata/string_literal.js b/internal/languages/javascript/detectors/testdata/string_literal.js index d264d6ad9..fc4e54d57 100644 --- a/internal/languages/javascript/detectors/testdata/string_literal.js +++ b/internal/languages/javascript/detectors/testdata/string_literal.js @@ -4,3 +4,5 @@ x = "a" `${x} b` + +"hello\x20world\\" diff --git a/internal/languages/javascript/pattern/pattern.go b/internal/languages/javascript/pattern/pattern.go index f6c723858..2ce0d8154 100644 --- a/internal/languages/javascript/pattern/pattern.go +++ b/internal/languages/javascript/pattern/pattern.go @@ -26,10 +26,6 @@ type Pattern struct { language.PatternBase } -func (*Pattern) IsLeaf(node *tree.Node) bool { - return node.Type() == "string" -} - func (*Pattern) ExtractVariables(input string) (string, []language.PatternVariable, error) { nameIndex := patternQueryVariableRegex.SubexpIndex("name") typesIndex := patternQueryVariableRegex.SubexpIndex("types")