From ce2e4efee42ba4bc5f91f551da33689a35dce279 Mon Sep 17 00:00:00 2001 From: David Roe Date: Mon, 25 Sep 2023 12:27:16 +0100 Subject: [PATCH 01/13] fix: make associative array elements unanchored --- internal/languages/php/pattern/pattern.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/internal/languages/php/pattern/pattern.go b/internal/languages/php/pattern/pattern.go index e06a50998..20bdac254 100644 --- a/internal/languages/php/pattern/pattern.go +++ b/internal/languages/php/pattern/pattern.go @@ -104,12 +104,17 @@ func (*Pattern) IsAnchored(node *tree.Node) (bool, bool) { return true, true } - // Class body class_body - // function block - // lambda () -> {} block - // try {} catch () {} - // unAnchored := []string{"class_body", "block", "try_statement", "catch_type", "resource_specification"} - unAnchored := []string{""} + // Associative array elements are unanchored + // eg. array("foo" => 42) + if parent.Type() == "array_creation_expression" && + node.Type() == "array_element_initializer" && + len(node.NamedChildren()) == 2 { + return false, false + } + + // Class body declaration_list + // function/block compound_statement + unAnchored := []string{"declaration_list", "compound_statement"} isUnanchored := !slices.Contains(unAnchored, parent.Type()) return isUnanchored, isUnanchored From 51f66ba769d84af7bc688ebed2d951dfbfa261e2 Mon Sep 17 00:00:00 2001 From: David Roe Date: Mon, 25 Sep 2023 12:27:42 +0100 Subject: [PATCH 02/13] fix: improve variable shape error message --- internal/scanner/variableshape/variableshape.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scanner/variableshape/variableshape.go b/internal/scanner/variableshape/variableshape.go index c8eb722dc..d47cfba6f 100644 --- a/internal/scanner/variableshape/variableshape.go +++ b/internal/scanner/variableshape/variableshape.go @@ -143,7 +143,7 @@ func NewSet(language language.Language, ruleSet *ruleset.Set) (*Set, error) { for _, rule := range ruleSet.Rules() { if err := set.add(language, rule); err != nil { - return nil, err + return nil, fmt.Errorf("error adding %s: %w", rule.ID(), err) } } From 11161efa15b4cd69e61c712e0003989769709048 Mon Sep 17 00:00:00 2001 From: David Roe Date: Mon, 25 Sep 2023 13:16:10 +0100 Subject: [PATCH 03/13] fix: handle both kinds of strings in patterns --- internal/languages/php/pattern/pattern.go | 36 ++++++++++++++++++- .../patternquery/builder/builder.go | 2 +- .../languagescanner/languagescanner.go | 2 +- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/internal/languages/php/pattern/pattern.go b/internal/languages/php/pattern/pattern.go index 20bdac254..50e7b2d41 100644 --- a/internal/languages/php/pattern/pattern.go +++ b/internal/languages/php/pattern/pattern.go @@ -87,8 +87,21 @@ func (*Pattern) FindUnanchoredPoints(input []byte) [][]int { return ellipsisRegex.FindAllIndex(input, -1) } +func (*Pattern) IsLeaf(node *tree.Node) bool { + // Encapsed string literal + if node.Type() == "encapsed_string" { + namedChildren := node.NamedChildren() + if len(namedChildren) == 1 && namedChildren[0].Type() == "string" { + return true + } + } + + return false +} + func (*Pattern) LeafContentTypes() []string { return []string{ + "encapsed_string", "string", "name", "integer", @@ -124,6 +137,27 @@ func (*Pattern) IsRoot(node *tree.Node) bool { return !slices.Contains([]string{"expression_statement", "php_tag", "program"}, node.Type()) && !node.IsMissing() } -func (*Pattern) NodeTypes(node *tree.Node) []string { +func (patternLanguage *Pattern) NodeTypes(node *tree.Node) []string { + parent := node.Parent() + if parent == nil { + return []string{node.Type()} + } + + if (node.Type() == "string" && parent.Type() != "encapsed_string") || + (node.Type() == "encapsed_string" && patternLanguage.IsLeaf(node)) { + return []string{"encapsed_string", "string"} + } + return []string{node.Type()} } + +func (*Pattern) TranslateContent(fromNodeType, toNodeType, content string) string { + if fromNodeType == "string" && toNodeType == "encapsed_string" { + return fmt.Sprintf(`"%s"`, content[1:len(content)-1]) + } + if fromNodeType == "encapsed_string" && toNodeType == "string" { + return fmt.Sprintf("'%s'", content[1:len(content)-1]) + } + + return content +} diff --git a/internal/scanner/detectors/customrule/patternquery/builder/builder.go b/internal/scanner/detectors/customrule/patternquery/builder/builder.go index 102e6eba4..92f7cc1b4 100644 --- a/internal/scanner/detectors/customrule/patternquery/builder/builder.go +++ b/internal/scanner/detectors/customrule/patternquery/builder/builder.go @@ -257,7 +257,7 @@ func (builder *builder) compileNode(node *asttree.Node, isRoot bool, isLastChild builder.compileVariableNode(variable) } else if !node.IsNamed() { builder.compileAnonymousNode(node) - } else if len(node.NamedChildren()) == 0 { + } else if len(node.NamedChildren()) == 0 || builder.patternLanguage.IsLeaf(node) { builder.compileLeafNode(node) } else if err := builder.compileNodeWithChildren(node); err != nil { return err diff --git a/internal/scanner/languagescanner/languagescanner.go b/internal/scanner/languagescanner/languagescanner.go index 574d36776..3f41cce99 100644 --- a/internal/scanner/languagescanner/languagescanner.go +++ b/internal/scanner/languagescanner/languagescanner.go @@ -93,7 +93,7 @@ func (scanner *Scanner) Scan( } if log.Trace().Enabled() { - log.Trace().Msgf("tree (%d nodes):\n%s", tree.NodeCount(), tree.RootNode().SitterNode().String()) + log.Trace().Msgf("tree (%d nodes):\n%s", tree.NodeCount(), tree.RootNode().Dump()) } sharedCache := cache.NewShared(scanner.ruleSet.Rules()) From e5549dda29ad9e521cada0ebe6e9f9098ab0441b Mon Sep 17 00:00:00 2001 From: David Roe Date: Mon, 25 Sep 2023 13:29:32 +0100 Subject: [PATCH 04/13] fix: lookup variables in array element initializers --- internal/languages/php/analyzer/analyzer.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/languages/php/analyzer/analyzer.go b/internal/languages/php/analyzer/analyzer.go index 13eeb2e68..2beb9f41a 100644 --- a/internal/languages/php/analyzer/analyzer.go +++ b/internal/languages/php/analyzer/analyzer.go @@ -50,7 +50,12 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.analyzeGenericConstruct(node, visitChildren) case "switch_label": return visitChildren() - case "binary_expression", "unary_op_expression", "argument", "encapsed_string", "sequence_expression": + case "binary_expression", + "unary_op_expression", + "argument", + "encapsed_string", + "sequence_expression", + "array_element_initializer": return analyzer.analyzeGenericOperation(node, visitChildren) case "while_statement", "do_statement", "if_statement", "expression_statement", "compound_statement": // statements don't have results return visitChildren() From 4b96b3cb357490a74086e3649104960b8235aff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Fabianski?= Date: Wed, 27 Sep 2023 11:53:53 +0200 Subject: [PATCH 05/13] fix: adjust input offsets when fixing up input --- internal/languages/php/analyzer/analyzer.go | 40 +++-- internal/languages/php/pattern/pattern.go | 49 ++++++- .../patternquery/builder/builder.go | 72 ++++----- .../builder/bytereplacer/bytereplacer.go | 91 ++++++++++++ .../bytereplacer/bytereplacer_suite_test.go | 13 ++ .../builder/bytereplacer/bytereplacer_test.go | 138 ++++++++++++++++++ 6 files changed, 339 insertions(+), 64 deletions(-) create mode 100644 internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer.go create mode 100644 internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_suite_test.go create mode 100644 internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go diff --git a/internal/languages/php/analyzer/analyzer.go b/internal/languages/php/analyzer/analyzer.go index 2beb9f41a..b5287d046 100644 --- a/internal/languages/php/analyzer/analyzer.go +++ b/internal/languages/php/analyzer/analyzer.go @@ -1,6 +1,7 @@ package analyzer import ( + "github.com/rs/zerolog/log" sitter "github.com/smacker/go-tree-sitter" "github.com/bearer/bearer/internal/scanner/ast/tree" @@ -21,15 +22,12 @@ func New(builder *tree.Builder) language.Analyzer { func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) error { switch node.Type() { - case "declaration_list", - "class_declaration", - "method_declaration", - "anonymous_function_creation_expression", - "for_statement", - "block": + case "declaration_list", "class_declaration", "anonymous_function_creation_expression", "for_statement", "block", "method_declaration": return analyzer.withScope(language.NewScope(analyzer.scope), func() error { return visitChildren() }) + // case "formal_parameters": + // return analyzer.analyzeParameterList(node, visitChildren) case "augmented_assignment_expression": return analyzer.analyzeAugmentedAssignment(node, visitChildren) case "assignment_expression": @@ -50,12 +48,7 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.analyzeGenericConstruct(node, visitChildren) case "switch_label": return visitChildren() - case "binary_expression", - "unary_op_expression", - "argument", - "encapsed_string", - "sequence_expression", - "array_element_initializer": + case "binary_expression", "unary_op_expression", "argument", "encapsed_string", "sequence_expression", "array_element_initializer", "formal_parameters": return analyzer.analyzeGenericOperation(node, visitChildren) case "while_statement", "do_statement", "if_statement", "expression_statement", "compound_statement": // statements don't have results return visitChildren() @@ -103,6 +96,26 @@ func (analyzer *analyzer) analyzeAugmentedAssignment(node *sitter.Node, visitChi return err } +// // I think there is still an issue with the linking between the parameter for functions??? +// // all parameter definitions for a method +// // function m($a, int $b) +// func (analyzer *analyzer) analyzeParameterList(node *sitter.Node, visitChildren func() error) error { +// children := analyzer.builder.ChildrenFor(node) +// analyzer.builder.Dataflow(node, children...) + +// for _, child := range children { +// log.Error().Msgf("analyzerParameterList[%s]", child.Type()) +// if child.Type() == "simple_parameter" { +// name := child.ChildByFieldName("name") +// log.Error().Msgf("analyzerParameterList -> %s", analyzer.builder.ContentFor(name)) +// analyzer.builder.Alias(node, name) +// analyzer.scope.Declare(analyzer.builder.ContentFor(name), name) +// } +// } + +// return visitChildren() +// } + func (analyzer *analyzer) analyzeParentheses(node *sitter.Node, visitChildren func() error) error { analyzer.builder.Alias(node, node.NamedChild(0)) analyzer.lookupVariable(node.NamedChild(0)) @@ -155,8 +168,10 @@ func (analyzer *analyzer) analyzeFieldAccess(node *sitter.Node, visitChildren fu // fn($x = 42) => $x; // fn($x, ...$rest) => $rest; func (analyzer *analyzer) analyzeParameter(node *sitter.Node, visitChildren func() error) error { + log.Error().Msgf("analyzeParameter[%s] %s", node.Type(), analyzer.builder.ContentFor(node)) name := node.ChildByFieldName("name") analyzer.builder.Alias(node, name) + analyzer.scope.Declare(analyzer.builder.ContentFor(name), name) return visitChildren() } @@ -202,6 +217,7 @@ func (analyzer *analyzer) lookupVariable(node *sitter.Node) { } if pointsToNode := analyzer.scope.Lookup(analyzer.builder.ContentFor(node)); pointsToNode != nil { + log.Error().Msgf("lookupVariable[%s] %s", node.Type(), analyzer.builder.ContentFor(node)) analyzer.builder.Alias(node, pointsToNode) } } diff --git a/internal/languages/php/pattern/pattern.go b/internal/languages/php/pattern/pattern.go index 50e7b2d41..98c8853d8 100644 --- a/internal/languages/php/pattern/pattern.go +++ b/internal/languages/php/pattern/pattern.go @@ -13,9 +13,12 @@ import ( var ( // $ or $ or $ - patternQueryVariableRegex = regexp.MustCompile(`\$<(?P[^>:!\.]+)(?::(?P[^>]+))?>`) - matchNodeRegex = regexp.MustCompile(`\$`) - ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) + patternQueryVariableRegex = regexp.MustCompile(`\$<(?P[^>:!\.]+)(?::(?P[^>]+))?>`) + matchNodeRegex = regexp.MustCompile(`\$`) + ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) + unanchoredPatternNodeTypes = []string{} + patternMatchNodeContainerTypes = []string{"formal_parameters"} + // unanchoredPatternNodeTypes = []string{"simple_parameter"} allowedPatternQueryTypes = []string{"_"} ) @@ -36,6 +39,14 @@ func (*Pattern) FixupMissing(node *tree.Node) string { return ";" } +func (*Pattern) FixupVariableDummyValue(input []byte, node *tree.Node, dummyValue string) string { + if slices.Contains([]string{"named_type"}, node.Parent().Type()) { + return "$" + dummyValue + } + + return dummyValue +} + func (*Pattern) ExtractVariables(input string) (string, []language.PatternVariable, error) { nameIndex := patternQueryVariableRegex.SubexpIndex("name") typesIndex := patternQueryVariableRegex.SubexpIndex("types") @@ -89,13 +100,13 @@ func (*Pattern) FindUnanchoredPoints(input []byte) [][]int { func (*Pattern) IsLeaf(node *tree.Node) bool { // Encapsed string literal - if node.Type() == "encapsed_string" { + switch node.Type() { + case "encapsed_string": namedChildren := node.NamedChildren() if len(namedChildren) == 1 && namedChildren[0].Type() == "string" { return true } } - return false } @@ -110,13 +121,30 @@ func (*Pattern) LeafContentTypes() []string { } } -// ToDo: func (*Pattern) IsAnchored(node *tree.Node) (bool, bool) { + if slices.Contains(unanchoredPatternNodeTypes, node.Type()) { + return false, false + } + parent := node.Parent() if parent == nil { return true, true } + if parent.Type() == "method_declaration" { + // visibility + if node == parent.ChildByFieldName("name") { + return false, true + } + + // type + if node == parent.ChildByFieldName("parameters") { + return true, false + } + + return false, false + } + // Associative array elements are unanchored // eg. array("foo" => 42) if parent.Type() == "array_creation_expression" && @@ -127,7 +155,10 @@ func (*Pattern) IsAnchored(node *tree.Node) (bool, bool) { // Class body declaration_list // function/block compound_statement - unAnchored := []string{"declaration_list", "compound_statement"} + unAnchored := []string{ + "declaration_list", + "compound_statement", + } isUnanchored := !slices.Contains(unAnchored, parent.Type()) return isUnanchored, isUnanchored @@ -161,3 +192,7 @@ func (*Pattern) TranslateContent(fromNodeType, toNodeType, content string) strin return content } + +func (*Pattern) ContainerTypes() []string { + return patternMatchNodeContainerTypes +} diff --git a/internal/scanner/detectors/customrule/patternquery/builder/builder.go b/internal/scanner/detectors/customrule/patternquery/builder/builder.go index 92f7cc1b4..e60a93bb8 100644 --- a/internal/scanner/detectors/customrule/patternquery/builder/builder.go +++ b/internal/scanner/detectors/customrule/patternquery/builder/builder.go @@ -12,6 +12,7 @@ import ( "github.com/bearer/bearer/internal/parser/nodeid" "github.com/bearer/bearer/internal/scanner/ast" asttree "github.com/bearer/bearer/internal/scanner/ast/tree" + "github.com/bearer/bearer/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer" "github.com/bearer/bearer/internal/scanner/language" ) @@ -57,16 +58,30 @@ func Build( return nil, err } - if fixedInput, fixed := fixupInput( + fixupResult, err := fixupInput( patternLanguage, processedInput, inputParams.Variables, tree.RootNode(), - ); fixed { - tree, err = ast.Parse(context.TODO(), language, fixedInput) + ) + if err != nil { + return nil, err + } + + if fixupResult.Changed() { + if log.Trace().Enabled() { + log.Trace().Msgf("fixedInput -> %s", fixupResult.Value()) + } + + tree, err = ast.Parse(context.TODO(), language, fixupResult.Value()) if err != nil { return nil, err } + + inputParams.MatchNodeOffset = fixupResult.Translate(inputParams.MatchNodeOffset) + for i := range inputParams.UnanchoredOffsets { + inputParams.UnanchoredOffsets[i] = fixupResult.Translate(inputParams.UnanchoredOffsets[i]) + } } root := tree.RootNode() @@ -118,13 +133,9 @@ func fixupInput( byteInput []byte, variables []language.PatternVariable, rootNode *asttree.Node, -) ([]byte, bool) { +) (*bytereplacer.Result, error) { + replacer := bytereplacer.New(byteInput) insideError := false - inputOffset := 0 - - newInput := make([]byte, len(byteInput)) - copy(newInput, byteInput) - fixed := false err := rootNode.Walk(func(node *asttree.Node, visitChildren func() error) error { oldInsideError := insideError @@ -141,7 +152,6 @@ func fixupInput( } var newValue string - var originalValue string if insideError { variable := getVariableFor(node, patternLanguage, variables) @@ -158,7 +168,6 @@ func fixupInput( return nil } variable.DummyValue = newValue - originalValue = variable.DummyValue } else { if log.Trace().Enabled() { log.Trace().Msgf("attempting pattern fixup (missing node). node: %s", node.Debug()) @@ -170,42 +179,10 @@ func fixupInput( } } - fixed = true - valueOffset := len(newValue) - len(originalValue) - - prefix := newInput[:node.ContentStart.Byte+inputOffset] - suffix := newInput[node.ContentEnd.Byte+inputOffset:] - // FIXME: We need to append suffix before - // newInput seems to be sharing memory in some circumstances - // suffix before and after the first append are not equal - appendedInput := appendByte([]byte(newValue), suffix...) - newInput = appendByte(prefix, appendedInput...) - - inputOffset += valueOffset - - return nil + return replacer.Replace(node.ContentStart.Byte, node.ContentEnd.Byte, []byte(newValue)) }) - // walk errors are only ones we produce, and we don't make any - if err != nil { - panic(err) - } - - return newInput, fixed -} - -func appendByte(slice []byte, data ...byte) []byte { - m := len(slice) - n := m + len(data) - if n > cap(slice) { // if necessary, reallocate - // allocate double what's needed, for future growth. - newSlice := make([]byte, (n+1)*2) - copy(newSlice, slice) - slice = newSlice - } - slice = slice[0:n] - copy(slice[m:n], data) - return slice + return replacer.Done(), err } func (builder *builder) build(rootNode *asttree.Node) (*Result, error) { @@ -224,6 +201,9 @@ func (builder *builder) build(rootNode *asttree.Node) (*Result, error) { builder.write(" @root") builder.write(")") + log.Error().Msgf("build %s", rootNode.SitterNode().String()) + log.Error().Msgf("string builder %s", builder.stringBuilder.String()) + log.Error().Msgf("input params builder %s", builder.stringBuilder.String()) paramToVariable, equalParams := builder.processVariableToParams() @@ -271,6 +251,8 @@ func (builder *builder) compileNode(node *asttree.Node, isRoot bool, isLastChild builder.write(" .") } + log.Error().Msgf("stringBuilder %s", builder.stringBuilder.String()) + return nil } diff --git a/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer.go b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer.go new file mode 100644 index 000000000..e949124a4 --- /dev/null +++ b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer.go @@ -0,0 +1,91 @@ +package bytereplacer + +import ( + "bytes" + "fmt" +) + +type Replacer struct { + offsetDelta + result *Result +} + +type offsetDelta struct { + originalOffset, + delta int +} + +type Result struct { + value []byte + offsetDeltas []offsetDelta + changed bool +} + +func New(original []byte) *Replacer { + value := make([]byte, len(original)) + copy(value, original) + + return &Replacer{result: &Result{value: value}} +} + +func (replacer *Replacer) Replace(originalStart, originalEnd int, newValue []byte) error { + if originalStart < replacer.originalOffset { + return fmt.Errorf( + "replacements must be made in sequential order. last offset %d, replacement start %d", + replacer.originalOffset, + originalStart, + ) + } + + if bytes.Equal(replacer.result.value[originalStart:originalEnd], newValue) { + return nil + } + + replacer.result.changed = true + + currentLength := len(replacer.result.value) + suffix := replacer.result.value[replacer.delta+originalEnd : currentLength : currentLength] + + replacer.result.value = append( + replacer.result.value[:replacer.delta+originalStart], + append(newValue, suffix...)..., + ) + + delta := len(newValue) - originalEnd + originalStart + + replacer.originalOffset = originalEnd + replacer.delta += delta + + replacer.result.offsetDeltas = append(replacer.result.offsetDeltas, offsetDelta{ + originalOffset: originalEnd, + delta: delta, + }) + + return nil +} + +func (replacer *Replacer) Done() *Result { + return replacer.result +} + +func (result *Result) Changed() bool { + return result.changed +} + +func (result *Result) Value() []byte { + return result.value +} + +func (result *Result) Translate(originalOffset int) int { + delta := 0 + + for _, offsetDelta := range result.offsetDeltas { + if offsetDelta.originalOffset > originalOffset { + break + } + + delta += offsetDelta.delta + } + + return delta + originalOffset +} diff --git a/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_suite_test.go b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_suite_test.go new file mode 100644 index 000000000..76e366da4 --- /dev/null +++ b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_suite_test.go @@ -0,0 +1,13 @@ +package bytereplacer_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestFilters(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "ByteReplacer Suite") +} diff --git a/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go new file mode 100644 index 000000000..08bc86b32 --- /dev/null +++ b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go @@ -0,0 +1,138 @@ +package bytereplacer_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/bearer/bearer/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer" +) + +var _ = Describe("Replacer", func() { + var replacer *bytereplacer.Replacer + + BeforeEach(func(ctx SpecContext) { + replacer = bytereplacer.New([]byte("hello world")) + }) + + Describe("Replace", func() { + When("replacements are made in sequential order", func() { + BeforeEach(func(ctx SpecContext) { + Expect(replacer.Replace(0, 1, nil)).To(Succeed()) + }) + + It("does not fail", func(ctx SpecContext) { + Expect(replacer.Replace(1, 2, []byte("foo"))).To(Succeed()) + }) + }) + + When("replacements are made out of order", func() { + BeforeEach(func(ctx SpecContext) { + Expect(replacer.Replace(0, 2, nil)).To(Succeed()) + }) + + It("returns an error", func(ctx SpecContext) { + Expect(replacer.Replace(1, 2, []byte("foo"))).To( + MatchError(ContainSubstring("replacements must be made in sequential order")), + ) + }) + }) + }) +}) + +var _ = Describe("Result", func() { + var replacer *bytereplacer.Replacer + original := []byte("hello world") + + BeforeEach(func(ctx SpecContext) { + replacer = bytereplacer.New(original) + }) + + When("no replacements are made", func() { + var result *bytereplacer.Result + + BeforeEach(func(ctx SpecContext) { + result = replacer.Done() + }) + + Describe("Changed", func() { + It("returns false", func(ctx SpecContext) { + Expect(result.Changed()).To(BeFalse()) + }) + }) + + Describe("Value", func() { + It("returns the original value", func(ctx SpecContext) { + Expect(result.Value()).To(Equal(original)) + }) + }) + + Describe("Translate", func() { + It("returns the original offset", func(ctx SpecContext) { + Expect(result.Translate(0)).To(Equal(0)) + Expect(result.Translate(5)).To(Equal(5)) + Expect(result.Translate(10)).To(Equal(10)) + }) + }) + }) + + When("noop replacements are made", func() { + var result *bytereplacer.Result + + BeforeEach(func(ctx SpecContext) { + replacer.Replace(0, 5, []byte("hello")) + replacer.Replace(6, 6, nil) + result = replacer.Done() + }) + + Describe("Changed", func() { + It("returns false", func(ctx SpecContext) { + Expect(result.Changed()).To(BeFalse()) + }) + }) + + Describe("Value", func() { + It("returns the original value", func(ctx SpecContext) { + Expect(result.Value()).To(Equal(original)) + }) + }) + + Describe("Translate", func() { + It("returns the original offset", func(ctx SpecContext) { + Expect(result.Translate(0)).To(Equal(0)) + Expect(result.Translate(5)).To(Equal(5)) + Expect(result.Translate(10)).To(Equal(10)) + }) + }) + }) + + When("replacements are made", func() { + var result *bytereplacer.Result + + BeforeEach(func(ctx SpecContext) { + replacer.Replace(0, 5, []byte("hi")) + replacer.Replace(5, 5, []byte("!")) + replacer.Replace(6, 11, []byte("testing123")) + result = replacer.Done() + }) + + Describe("Changed", func() { + It("returns true", func(ctx SpecContext) { + Expect(result.Changed()).To(BeTrue()) + }) + }) + + Describe("Value", func() { + It("returns the updated value", func(ctx SpecContext) { + Expect(result.Value()).To(Equal([]byte("hi! testing123"))) + }) + }) + + Describe("Translate", func() { + It("returns the new offset", func(ctx SpecContext) { + Expect(result.Translate(0)).To(Equal(0)) + Expect(result.Translate(5)).To(Equal(3)) + Expect(result.Translate(11)).To(Equal(14)) + }) + }) + }) +}) From 65bf5710c5e5bb009ca2a9ad67721a1657a08f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Fabianski?= Date: Wed, 27 Sep 2023 15:09:42 +0200 Subject: [PATCH 06/13] fix: use container types to discriminate variables --- internal/languages/php/pattern/pattern.go | 3 +-- .../detectors/customrule/patternquery/builder/builder.go | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/languages/php/pattern/pattern.go b/internal/languages/php/pattern/pattern.go index 98c8853d8..90915f7bb 100644 --- a/internal/languages/php/pattern/pattern.go +++ b/internal/languages/php/pattern/pattern.go @@ -17,8 +17,7 @@ var ( matchNodeRegex = regexp.MustCompile(`\$`) ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) unanchoredPatternNodeTypes = []string{} - patternMatchNodeContainerTypes = []string{"formal_parameters"} - // unanchoredPatternNodeTypes = []string{"simple_parameter"} + patternMatchNodeContainerTypes = []string{"formal_parameters", "simple_parameter"} allowedPatternQueryTypes = []string{"_"} ) diff --git a/internal/scanner/detectors/customrule/patternquery/builder/builder.go b/internal/scanner/detectors/customrule/patternquery/builder/builder.go index e60a93bb8..ea3cca07c 100644 --- a/internal/scanner/detectors/customrule/patternquery/builder/builder.go +++ b/internal/scanner/detectors/customrule/patternquery/builder/builder.go @@ -201,9 +201,6 @@ func (builder *builder) build(rootNode *asttree.Node) (*Result, error) { builder.write(" @root") builder.write(")") - log.Error().Msgf("build %s", rootNode.SitterNode().String()) - log.Error().Msgf("string builder %s", builder.stringBuilder.String()) - log.Error().Msgf("input params builder %s", builder.stringBuilder.String()) paramToVariable, equalParams := builder.processVariableToParams() @@ -382,8 +379,12 @@ func getVariableFor( patternLanguage language.Pattern, variables []language.PatternVariable, ) *language.PatternVariable { + if slices.Contains(patternLanguage.ContainerTypes(), node.Type()) { + return nil + } + for i, variable := range variables { - if (len(node.NamedChildren()) == 0 || patternLanguage.IsLeaf(node)) && node.Content() == variable.DummyValue { + if node.Content() == variable.DummyValue { return &variables[i] } } From eb877220c8e0908d7dacc85ff5666db2977aec9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Fabianski?= Date: Wed, 27 Sep 2023 15:12:26 +0200 Subject: [PATCH 07/13] chore: clean up --- internal/languages/php/analyzer/analyzer.go | 25 --------------------- 1 file changed, 25 deletions(-) diff --git a/internal/languages/php/analyzer/analyzer.go b/internal/languages/php/analyzer/analyzer.go index b5287d046..c23cb26f5 100644 --- a/internal/languages/php/analyzer/analyzer.go +++ b/internal/languages/php/analyzer/analyzer.go @@ -1,7 +1,6 @@ package analyzer import ( - "github.com/rs/zerolog/log" sitter "github.com/smacker/go-tree-sitter" "github.com/bearer/bearer/internal/scanner/ast/tree" @@ -26,8 +25,6 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.withScope(language.NewScope(analyzer.scope), func() error { return visitChildren() }) - // case "formal_parameters": - // return analyzer.analyzeParameterList(node, visitChildren) case "augmented_assignment_expression": return analyzer.analyzeAugmentedAssignment(node, visitChildren) case "assignment_expression": @@ -96,26 +93,6 @@ func (analyzer *analyzer) analyzeAugmentedAssignment(node *sitter.Node, visitChi return err } -// // I think there is still an issue with the linking between the parameter for functions??? -// // all parameter definitions for a method -// // function m($a, int $b) -// func (analyzer *analyzer) analyzeParameterList(node *sitter.Node, visitChildren func() error) error { -// children := analyzer.builder.ChildrenFor(node) -// analyzer.builder.Dataflow(node, children...) - -// for _, child := range children { -// log.Error().Msgf("analyzerParameterList[%s]", child.Type()) -// if child.Type() == "simple_parameter" { -// name := child.ChildByFieldName("name") -// log.Error().Msgf("analyzerParameterList -> %s", analyzer.builder.ContentFor(name)) -// analyzer.builder.Alias(node, name) -// analyzer.scope.Declare(analyzer.builder.ContentFor(name), name) -// } -// } - -// return visitChildren() -// } - func (analyzer *analyzer) analyzeParentheses(node *sitter.Node, visitChildren func() error) error { analyzer.builder.Alias(node, node.NamedChild(0)) analyzer.lookupVariable(node.NamedChild(0)) @@ -168,7 +145,6 @@ func (analyzer *analyzer) analyzeFieldAccess(node *sitter.Node, visitChildren fu // fn($x = 42) => $x; // fn($x, ...$rest) => $rest; func (analyzer *analyzer) analyzeParameter(node *sitter.Node, visitChildren func() error) error { - log.Error().Msgf("analyzeParameter[%s] %s", node.Type(), analyzer.builder.ContentFor(node)) name := node.ChildByFieldName("name") analyzer.builder.Alias(node, name) analyzer.scope.Declare(analyzer.builder.ContentFor(name), name) @@ -217,7 +193,6 @@ func (analyzer *analyzer) lookupVariable(node *sitter.Node) { } if pointsToNode := analyzer.scope.Lookup(analyzer.builder.ContentFor(node)); pointsToNode != nil { - log.Error().Msgf("lookupVariable[%s] %s", node.Type(), analyzer.builder.ContentFor(node)) analyzer.builder.Alias(node, pointsToNode) } } From 3fa607d3a953994b74ee05e9314c2a994457c5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Fabianski?= Date: Wed, 27 Sep 2023 15:17:30 +0200 Subject: [PATCH 08/13] ci: appease linter --- .../customrule/patternquery/builder/builder.go | 2 -- .../builder/bytereplacer/bytereplacer_test.go | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/internal/scanner/detectors/customrule/patternquery/builder/builder.go b/internal/scanner/detectors/customrule/patternquery/builder/builder.go index ea3cca07c..7d674eeb6 100644 --- a/internal/scanner/detectors/customrule/patternquery/builder/builder.go +++ b/internal/scanner/detectors/customrule/patternquery/builder/builder.go @@ -248,8 +248,6 @@ func (builder *builder) compileNode(node *asttree.Node, isRoot bool, isLastChild builder.write(" .") } - log.Error().Msgf("stringBuilder %s", builder.stringBuilder.String()) - return nil } diff --git a/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go index 08bc86b32..a5bf9190c 100644 --- a/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go +++ b/internal/scanner/detectors/customrule/patternquery/builder/bytereplacer/bytereplacer_test.go @@ -79,8 +79,8 @@ var _ = Describe("Result", func() { var result *bytereplacer.Result BeforeEach(func(ctx SpecContext) { - replacer.Replace(0, 5, []byte("hello")) - replacer.Replace(6, 6, nil) + replacer.Replace(0, 5, []byte("hello")) // nolint:errcheck + replacer.Replace(6, 6, nil) // nolint:errcheck result = replacer.Done() }) @@ -109,9 +109,9 @@ var _ = Describe("Result", func() { var result *bytereplacer.Result BeforeEach(func(ctx SpecContext) { - replacer.Replace(0, 5, []byte("hi")) - replacer.Replace(5, 5, []byte("!")) - replacer.Replace(6, 11, []byte("testing123")) + replacer.Replace(0, 5, []byte("hi")) // nolint:errcheck + replacer.Replace(5, 5, []byte("!")) // nolint:errcheck + replacer.Replace(6, 11, []byte("testing123")) // nolint:errcheck result = replacer.Done() }) From f7c38ded97f48826cc800bd397d1bc0d3b47bbd6 Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 28 Sep 2023 11:59:38 +0100 Subject: [PATCH 09/13] fix: add argument to pattern container types --- internal/languages/php/pattern/pattern.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/languages/php/pattern/pattern.go b/internal/languages/php/pattern/pattern.go index 90915f7bb..14d0d95be 100644 --- a/internal/languages/php/pattern/pattern.go +++ b/internal/languages/php/pattern/pattern.go @@ -17,7 +17,7 @@ var ( matchNodeRegex = regexp.MustCompile(`\$`) ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) unanchoredPatternNodeTypes = []string{} - patternMatchNodeContainerTypes = []string{"formal_parameters", "simple_parameter"} + patternMatchNodeContainerTypes = []string{"formal_parameters", "simple_parameter", "argument"} allowedPatternQueryTypes = []string{"_"} ) From 1e90e2b48208e9afc51f2d68811195a1be89c3ce Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 28 Sep 2023 14:34:29 +0100 Subject: [PATCH 10/13] fix: string handling --- .../.snapshots/TestJavaString-string | 15 ++ ...tJavascriptStringDetector-string_assign_eq | 15 ++ .../detectors/.snapshots/TestPHPString-string | 161 ++++++++++++++++-- .../languages/php/detectors/string/string.go | 9 +- .../php/detectors/testdata/string.php | 4 +- .../TestRubyStringDetector-string_assign_eq | 15 ++ internal/scanner/detectors/common/string.go | 12 +- 7 files changed, 202 insertions(+), 29 deletions(-) diff --git a/internal/languages/java/detectors/.snapshots/TestJavaString-string b/internal/languages/java/detectors/.snapshots/TestJavaString-string index bc4c22bf9..cedcbab2c 100644 --- a/internal/languages/java/detectors/.snapshots/TestJavaString-string +++ b/internal/languages/java/detectors/.snapshots/TestJavaString-string @@ -363,6 +363,21 @@ children: data: value: Hello World isliteral: true +- node: 44 + content: s += "!!" + data: + value: Hello World!!! + isliteral: true +- node: 57 + content: s2 += args[0] + data: + value: hey * + isliteral: false +- node: 67 + content: s2 += " there" + data: + value: hey * there + isliteral: false - node: 38 content: Greeting + "!" data: diff --git a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq index e9818bd76..46d3f41ba 100644 --- a/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq +++ b/internal/languages/javascript/detectors/.snapshots/TestJavascriptStringDetector-string_assign_eq @@ -181,6 +181,21 @@ children: id: 36 range: 6:8 - 6:9 +- node: 11 + content: x += "b" + data: + value: ab + isliteral: true +- node: 19 + content: x += name + data: + value: ab* + isliteral: false +- node: 30 + content: y += "c" + data: + value: '*c' + isliteral: false - node: 6 content: '"a"' data: diff --git a/internal/languages/php/detectors/.snapshots/TestPHPString-string b/internal/languages/php/detectors/.snapshots/TestPHPString-string index d8cd6c926..720960508 100644 --- a/internal/languages/php/detectors/.snapshots/TestPHPString-string +++ b/internal/languages/php/detectors/.snapshots/TestPHPString-string @@ -1,10 +1,10 @@ type: program id: 0 -range: 1:1 - 15:3 +range: 1:1 - 18:1 dataflow_sources: - 1 - 2 - - 100 + - 117 children: - type: php_tag id: 1 @@ -12,7 +12,7 @@ children: content: "' - id: 101 - range: 15:1 - 15:3 + id: 118 + range: 17:1 - 17:3 +- node: 12 + content: '"Hello World"' + data: + value: Hello World + isliteral: true - node: 14 content: Hello World data: value: Hello World isliteral: true +- node: 52 + content: $s .= "!!" + data: + value: '*!!!' + isliteral: false +- node: 74 + content: $s2 .= $args[0] + data: + value: hey * + isliteral: false +- node: 88 + content: $s2 .= " there" + data: + value: hey * there + isliteral: false - node: 39 content: self::Greeting . "!" data: - value: '**' + value: '*!' + isliteral: false +- node: 57 + content: '"!!"' + data: + value: '!!' + isliteral: true +- node: 68 + content: '"hey "' + data: + value: 'hey ' + isliteral: true +- node: 93 + content: '" there"' + data: + value: ' there' + isliteral: true +- node: 104 + content: '"foo ''{$s2}'' bar"' + data: + value: foo 'hey * there' bar isliteral: false +- node: 46 + content: '"!"' + data: + value: '!' + isliteral: true - node: 59 content: '!!' data: @@ -472,6 +591,16 @@ children: data: value: ' there' isliteral: true +- node: 106 + content: foo ' + data: + value: foo ' + isliteral: true +- node: 112 + content: ''' bar' + data: + value: ''' bar' + isliteral: true - node: 48 content: '!' data: diff --git a/internal/languages/php/detectors/string/string.go b/internal/languages/php/detectors/string/string.go index 1b77938ab..1993b87dd 100644 --- a/internal/languages/php/detectors/string/string.go +++ b/internal/languages/php/detectors/string/string.go @@ -28,10 +28,17 @@ func (detector *stringDetector) DetectAt( ) ([]interface{}, error) { switch node.Type() { case "string": + value := node.Content() + if node.Parent() != nil && node.Parent().Type() != "encapsed_string" { + value = stringutil.StripQuotes(value) + } + return []interface{}{common.String{ - Value: stringutil.StripQuotes(node.Content()), + Value: value, IsLiteral: true, }}, nil + case "encapsed_string": + return common.ConcatenateChildStrings(node, detectorContext) case "binary_expression": if node.Children()[1].Content() == "." { return common.ConcatenateChildStrings(node, detectorContext) diff --git a/internal/languages/php/detectors/testdata/string.php b/internal/languages/php/detectors/testdata/string.php index 6ddb251ac..395d88ac2 100644 --- a/internal/languages/php/detectors/testdata/string.php +++ b/internal/languages/php/detectors/testdata/string.php @@ -10,6 +10,8 @@ public static function main($args) $s2 = "hey "; $s2 .= $args[0]; $s2 .= " there"; + + $s3 = "foo '{$s2}' bar"; } } -?> \ No newline at end of file +?> diff --git a/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_assign_eq b/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_assign_eq index f4346c113..da9671ae1 100644 --- a/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_assign_eq +++ b/internal/languages/ruby/detectors/.snapshots/TestRubyStringDetector-string_assign_eq @@ -149,6 +149,21 @@ children: id: 29 range: 6:8 - 6:9 +- node: 8 + content: x += "b" + data: + value: ab + isliteral: true +- node: 15 + content: x += name + data: + value: ab* + isliteral: false +- node: 23 + content: y += "c" + data: + value: '*c' + isliteral: false - node: 4 content: '"a"' data: diff --git a/internal/scanner/detectors/common/string.go b/internal/scanner/detectors/common/string.go index 442256ba8..1b00c83e9 100644 --- a/internal/scanner/detectors/common/string.go +++ b/internal/scanner/detectors/common/string.go @@ -1,8 +1,6 @@ package common import ( - "fmt" - "github.com/bearer/bearer/internal/scanner/ast/traversalstrategy" "github.com/bearer/bearer/internal/scanner/ast/tree" "github.com/bearer/bearer/internal/scanner/ruleset" @@ -77,15 +75,7 @@ func ConcatenateChildStrings(node *tree.Node, detectorContext types.Context) ([] } func ConcatenateAssignEquals(node *tree.Node, detectorContext types.Context) ([]interface{}, error) { - dataflowSources := node.ChildByFieldName("left").DataflowSources() - if len(dataflowSources) == 0 { - return nil, nil - } - if len(dataflowSources) != 1 { - return nil, fmt.Errorf("expected exactly one data source for `+=` node but got %d", len(dataflowSources)) - } - - left, leftIsLiteral, err := GetStringValue(dataflowSources[0], detectorContext) + left, leftIsLiteral, err := GetStringValue(node.ChildByFieldName("left"), detectorContext) if err != nil { return nil, err } From d45b869109ddabcedb275f4c7f24802506fc6421 Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 28 Sep 2023 16:17:18 +0100 Subject: [PATCH 11/13] fix: lookup variables in require/include --- internal/languages/php/analyzer/analyzer.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/internal/languages/php/analyzer/analyzer.go b/internal/languages/php/analyzer/analyzer.go index c23cb26f5..7949b3cdd 100644 --- a/internal/languages/php/analyzer/analyzer.go +++ b/internal/languages/php/analyzer/analyzer.go @@ -45,7 +45,17 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.analyzeGenericConstruct(node, visitChildren) case "switch_label": return visitChildren() - case "binary_expression", "unary_op_expression", "argument", "encapsed_string", "sequence_expression", "array_element_initializer", "formal_parameters": + case "binary_expression", + "unary_op_expression", + "argument", + "encapsed_string", + "sequence_expression", + "array_element_initializer", + "formal_parameters", + "include_expression", + "include_once_expression", + "require_expression", + "require_once_expression": return analyzer.analyzeGenericOperation(node, visitChildren) case "while_statement", "do_statement", "if_statement", "expression_statement", "compound_statement": // statements don't have results return visitChildren() From b0067d7202eadc2bb6ec7a01167fe40aae5ffed2 Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 28 Sep 2023 16:25:00 +0100 Subject: [PATCH 12/13] fix: lookup variables in dynamic variables --- internal/languages/php/analyzer/analyzer.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/internal/languages/php/analyzer/analyzer.go b/internal/languages/php/analyzer/analyzer.go index 7949b3cdd..95af2e610 100644 --- a/internal/languages/php/analyzer/analyzer.go +++ b/internal/languages/php/analyzer/analyzer.go @@ -45,6 +45,8 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.analyzeGenericConstruct(node, visitChildren) case "switch_label": return visitChildren() + case "dynamic_variable_name": + return analyzer.analyzeDynamicVariableName(node, visitChildren) case "binary_expression", "unary_op_expression", "argument", @@ -168,6 +170,12 @@ func (analyzer *analyzer) analyzeSwitch(node *sitter.Node, visitChildren func() return visitChildren() } +func (analyzer *analyzer) analyzeDynamicVariableName(node *sitter.Node, visitChildren func() error) error { + analyzer.lookupVariable(node.NamedChild(0)) + + return visitChildren() +} + // default analysis, where the children are assumed to be aliases func (analyzer *analyzer) analyzeGenericConstruct(node *sitter.Node, visitChildren func() error) error { analyzer.builder.Alias(node, analyzer.builder.ChildrenFor(node)...) From 8127e492ff8545e7b7dc96981aba5c7de127b080 Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 28 Sep 2023 16:30:41 +0100 Subject: [PATCH 13/13] fix: lookup variables in function names --- internal/languages/php/analyzer/analyzer.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/languages/php/analyzer/analyzer.go b/internal/languages/php/analyzer/analyzer.go index 95af2e610..6669c5056 100644 --- a/internal/languages/php/analyzer/analyzer.go +++ b/internal/languages/php/analyzer/analyzer.go @@ -133,9 +133,11 @@ func (analyzer *analyzer) analyzeConditional(node *sitter.Node, visitChildren fu return visitChildren() } +// foo(1, 2); // foo->bar(1, 2); func (analyzer *analyzer) analyzeMethodInvocation(node *sitter.Node, visitChildren func() error) error { - analyzer.lookupVariable(node.ChildByFieldName("object")) + analyzer.lookupVariable(node.ChildByFieldName("object")) // method + analyzer.lookupVariable(node.ChildByFieldName("function")) // function if arguments := node.ChildByFieldName("arguments"); arguments != nil { analyzer.builder.Dataflow(node, arguments)