From a684dccb39a75b14ec026fcb6296837ac32431b2 Mon Sep 17 00:00:00 2001 From: David Roe Date: Thu, 21 Sep 2023 11:58:37 +0100 Subject: [PATCH] fix: disabling of rules using comments (#1284) * test: add test for disabled rules * fix: disabling of rules with comments * perf: use bitset for disabled rules * perf: store transitive disabled rules * test: improve test coverage --- ...DisabledRules-testdata-data-disabled_rules | 35 ++++ e2e/rules/rules_test.go | 4 + .../testdata/data/disabled_rules/main.rb | 8 + e2e/rules/testdata/rules/match_sink.yml | 9 + .../scanner/ast/.snapshots/TestDisabledRules | 175 ++++++++++++++++++ internal/scanner/ast/ast.go | 9 +- internal/scanner/ast/ast_test.go | 77 ++++++++ internal/scanner/ast/tree/builder.go | 24 ++- internal/scanner/ast/tree/tree.go | 28 ++- internal/scanner/ast/tree/tree_test.go | 2 +- internal/scanner/rulescanner/rulescanner.go | 13 +- 11 files changed, 357 insertions(+), 27 deletions(-) create mode 100644 e2e/rules/.snapshots/TestDisabledRules-testdata-data-disabled_rules create mode 100644 e2e/rules/testdata/data/disabled_rules/main.rb create mode 100644 e2e/rules/testdata/rules/match_sink.yml create mode 100644 internal/scanner/ast/.snapshots/TestDisabledRules create mode 100644 internal/scanner/ast/ast_test.go diff --git a/e2e/rules/.snapshots/TestDisabledRules-testdata-data-disabled_rules b/e2e/rules/.snapshots/TestDisabledRules-testdata-data-disabled_rules new file mode 100644 index 000000000..f819f28ff --- /dev/null +++ b/e2e/rules/.snapshots/TestDisabledRules-testdata-data-disabled_rules @@ -0,0 +1,35 @@ +low: + - rule: + cwe_ids: + - "319" + id: match_sink + title: "" + description: "" + documentation_url: "" + line_number: 7 + full_filename: e2e/rules/testdata/data/disabled_rules/main.rb + filename: main.rb + source: + location: + start: 7 + end: 7 + column: + start: 3 + end: 7 + sink: + location: + start: 7 + end: 7 + column: + start: 3 + end: 7 + content: sink + parent_line_number: 7 + snippet: sink + fingerprint: eb59f129d5424fb58e3bfcb5bfa83159_0 + old_fingerprint: e94b7fee5e58e735f107aa1cb3cfb75b_0 + code_extract: ' sink' + + +-- + diff --git a/e2e/rules/rules_test.go b/e2e/rules/rules_test.go index d5ca26375..b81364bda 100644 --- a/e2e/rules/rules_test.go +++ b/e2e/rules/rules_test.go @@ -31,6 +31,10 @@ func TestAuxilary(t *testing.T) { runRulesTest("auxilary", "javascript_third_parties_datadog_test", t) } +func TestDisabledRules(t *testing.T) { + runRulesTest("disabled_rules", "match_sink", t) +} + func TestReferenceFilters(t *testing.T) { runRulesTest("reference_filters", "reference_filters_test", t) } diff --git a/e2e/rules/testdata/data/disabled_rules/main.rb b/e2e/rules/testdata/data/disabled_rules/main.rb new file mode 100644 index 000000000..e63c74297 --- /dev/null +++ b/e2e/rules/testdata/data/disabled_rules/main.rb @@ -0,0 +1,8 @@ +# bearer:disable match_sink +def m + sink +end + +def n + sink +end diff --git a/e2e/rules/testdata/rules/match_sink.yml b/e2e/rules/testdata/rules/match_sink.yml new file mode 100644 index 000000000..9bc86b602 --- /dev/null +++ b/e2e/rules/testdata/rules/match_sink.yml @@ -0,0 +1,9 @@ +patterns: + - sink +languages: + - ruby +severity: low +metadata: + cwe_id: + - 319 + id: match_sink diff --git a/internal/scanner/ast/.snapshots/TestDisabledRules b/internal/scanner/ast/.snapshots/TestDisabledRules new file mode 100644 index 000000000..a25f33134 --- /dev/null +++ b/internal/scanner/ast/.snapshots/TestDisabledRules @@ -0,0 +1,175 @@ +([]ast_test.ruleInfo) (len=4) { + (ast_test.ruleInfo) { + ID: (string) (len=5) "rule1", + Index: (int) 5 + }, + (ast_test.ruleInfo) { + ID: (string) (len=5) "rule2", + Index: (int) 6 + }, + (ast_test.ruleInfo) { + ID: (string) (len=5) "rule3", + Index: (int) 7 + }, + (ast_test.ruleInfo) { + ID: (string) (len=5) "rule4", + Index: (int) 8 + } +} +type: program +id: 0 +range: 2:3 - 9:2 +dataflow_sources: + - 1 + - 2 + - 3 +children: + - type: comment + id: 1 + range: 2:3 - 2:31 + content: '# bearer:disable rule1,rule2' + - type: comment + id: 2 + range: 3:3 - 3:25 + content: '# bearer:disable rule3' + - type: method + id: 3 + range: 4:3 - 8:6 + disabledrules: + - 5 + - 6 + - 7 + children: + - type: '"def"' + id: 4 + range: 4:3 - 4:6 + disabledrules: + - 5 + - 6 + - 7 + - type: identifier + id: 5 + range: 4:7 - 4:8 + content: m + disabledrules: + - 5 + - 6 + - 7 + - type: method_parameters + id: 6 + range: 4:8 - 4:11 + dataflow_sources: + - 7 + - 8 + - 9 + disabledrules: + - 5 + - 6 + - 7 + children: + - type: '"("' + id: 7 + range: 4:8 - 4:9 + disabledrules: + - 5 + - 6 + - 7 + - type: identifier + id: 8 + range: 4:9 - 4:10 + content: a + disabledrules: + - 5 + - 6 + - 7 + - type: '")"' + id: 9 + range: 4:10 - 4:11 + disabledrules: + - 5 + - 6 + - 7 + - type: comment + id: 10 + range: 5:4 - 5:26 + content: '# bearer:disable rule4' + disabledrules: + - 5 + - 6 + - 7 + - type: call + id: 11 + range: 6:4 - 6:9 + disabledrules: + - 5 + - 6 + - 7 + - 8 + children: + - type: identifier + id: 12 + range: 6:4 - 6:5 + content: a + alias_of: + - 8 + disabledrules: + - 5 + - 6 + - 7 + - 8 + - type: '"."' + id: 13 + range: 6:5 - 6:6 + disabledrules: + - 5 + - 6 + - 7 + - 8 + - type: identifier + id: 14 + range: 6:6 - 6:9 + content: foo + disabledrules: + - 5 + - 6 + - 7 + - 8 + - type: call + id: 15 + range: 7:4 - 7:9 + disabledrules: + - 5 + - 6 + - 7 + children: + - type: identifier + id: 16 + range: 7:4 - 7:5 + content: b + disabledrules: + - 5 + - 6 + - 7 + - type: '"."' + id: 17 + range: 7:5 - 7:6 + disabledrules: + - 5 + - 6 + - 7 + - type: identifier + id: 18 + range: 7:6 - 7:9 + content: bar + disabledrules: + - 5 + - 6 + - 7 + - type: '"end"' + id: 19 + range: 8:3 - 8:6 + disabledrules: + - 5 + - 6 + - 7 + diff --git a/internal/scanner/ast/ast.go b/internal/scanner/ast/ast.go index 8c3eda7c6..bde69d0f3 100644 --- a/internal/scanner/ast/ast.go +++ b/internal/scanner/ast/ast.go @@ -20,7 +20,7 @@ func Parse( language language.Language, contentBytes []byte, ) (*tree.Tree, error) { - builder, err := parseBuilder(ctx, language, contentBytes) + builder, err := parseBuilder(ctx, language, contentBytes, 0) if err != nil { return nil, err } @@ -35,7 +35,7 @@ func ParseAndAnalyze( querySet *query.Set, contentBytes []byte, ) (*tree.Tree, error) { - builder, err := parseBuilder(ctx, language, contentBytes) + builder, err := parseBuilder(ctx, language, contentBytes, len(ruleSet.Rules())) if err != nil { return nil, err } @@ -56,6 +56,7 @@ func parseBuilder( ctx context.Context, language language.Language, contentBytes []byte, + ruleCount int, ) (*tree.Builder, error) { parser := sitter.NewParser() defer parser.Close() @@ -67,7 +68,7 @@ func parseBuilder( return nil, err } - return tree.NewBuilder(contentBytes, sitterTree.RootNode()), nil + return tree.NewBuilder(contentBytes, sitterTree.RootNode(), ruleCount), nil } func analyzeNode( @@ -91,7 +92,7 @@ func analyzeNode( continue } - disabledRules = addDisabledRules(ruleSet, builder, disabledRules, node) + disabledRules = addDisabledRules(ruleSet, builder, disabledRules, child) if err := analyzeNode(ctx, ruleSet, builder, analyzer, child); err != nil { return err } diff --git a/internal/scanner/ast/ast_test.go b/internal/scanner/ast/ast_test.go new file mode 100644 index 000000000..e5c7e2d85 --- /dev/null +++ b/internal/scanner/ast/ast_test.go @@ -0,0 +1,77 @@ +package ast_test + +import ( + "context" + "testing" + + "github.com/bradleyjkemp/cupaloy" + + "github.com/bearer/bearer/internal/commands/process/settings" + "github.com/bearer/bearer/internal/languages/ruby" + "github.com/bearer/bearer/internal/scanner/ast" + "github.com/bearer/bearer/internal/scanner/ast/query" + "github.com/bearer/bearer/internal/scanner/ruleset" +) + +type ruleInfo struct { + ID string + Index int +} + +func TestDisabledRules(t *testing.T) { + content := ` + # bearer:disable rule1,rule2 + # bearer:disable rule3 + def m(a) + # bearer:disable rule4 + a.foo + b.bar + end + ` + + language := ruby.Get() + languageIDs := []string{language.ID()} + + ruleSet, err := ruleset.New( + language.ID(), + map[string]*settings.Rule{ + "rule1": {Id: "rule1", Languages: languageIDs}, + "rule2": {Id: "rule2", Languages: languageIDs}, + "rule3": {Id: "rule3", Languages: languageIDs}, + "rule4": {Id: "rule4", Languages: languageIDs}, + }, + ) + if err != nil { + t.Fatalf("failed to create rule set: %s", err) + } + + var ruleDump []ruleInfo + for _, rule := range ruleSet.Rules() { + if rule.Type() != ruleset.RuleTypeBuiltin { + ruleDump = append(ruleDump, ruleInfo{ID: rule.ID(), Index: rule.Index()}) + } + } + + querySet := query.NewSet(language.ID(), language.SitterLanguage()) + if err := querySet.Compile(); err != nil { + t.Fatalf("failed to compile query set: %s", err) + } + + tree, err := ast.ParseAndAnalyze( + context.Background(), + language, + ruleSet, + querySet, + []byte(content), + ) + + if err != nil { + t.Fatalf("failed to parse and analyze input: %s", err) + } + + cupaloy.SnapshotT( + t, + ruleDump, + tree.RootNode().Dump(), + ) +} diff --git a/internal/scanner/ast/tree/builder.go b/internal/scanner/ast/tree/builder.go index 5c1657181..c75d6cf5e 100644 --- a/internal/scanner/ast/tree/builder.go +++ b/internal/scanner/ast/tree/builder.go @@ -4,6 +4,7 @@ import ( "slices" "github.com/bearer/bearer/internal/scanner/ruleset" + "github.com/bits-and-blooms/bitset" sitter "github.com/smacker/go-tree-sitter" ) @@ -17,9 +18,10 @@ type Builder struct { aliasOf map[int][]int sitterRootNode *sitter.Node sitterToNodeID map[*sitter.Node]int + ruleCount int } -func NewBuilder(contentBytes []byte, sitterRootNode *sitter.Node) *Builder { +func NewBuilder(contentBytes []byte, sitterRootNode *sitter.Node, ruleCount int) *Builder { builder := &Builder{ contentBytes: contentBytes, nodes: make([]Node, 0, 1000), @@ -28,6 +30,7 @@ func NewBuilder(contentBytes []byte, sitterRootNode *sitter.Node) *Builder { aliasOf: make(map[int][]int), sitterRootNode: sitterRootNode, sitterToNodeID: make(map[*sitter.Node]int), + ruleCount: ruleCount, } builder.rootNodeID = builder.addNode(sitterRootNode) @@ -94,10 +97,25 @@ func (builder *Builder) Alias(toNode *sitter.Node, fromNodes ...*sitter.Node) { } func (builder *Builder) AddDisabledRules(sitterNode *sitter.Node, rules []*ruleset.Rule) { - node := &builder.nodes[builder.sitterToNodeID[sitterNode]] + if len(rules) == 0 { + return + } + + builder.addDisabledRulesForNode(builder.sitterToNodeID[sitterNode], rules) +} + +func (builder *Builder) addDisabledRulesForNode(nodeID int, rules []*ruleset.Rule) { + node := &builder.nodes[nodeID] + if node.disabledRuleIndices == nil { + node.disabledRuleIndices = bitset.New(uint(builder.ruleCount)) + } for _, rule := range rules { - node.disabledRuleIndices = append(node.disabledRuleIndices, rule.Index()) + node.disabledRuleIndices.Set(uint(rule.Index())) + } + + for _, childID := range builder.children[nodeID] { + builder.addDisabledRulesForNode(childID, rules) } } diff --git a/internal/scanner/ast/tree/tree.go b/internal/scanner/ast/tree/tree.go index 9485c0e95..af2a9bdd9 100644 --- a/internal/scanner/ast/tree/tree.go +++ b/internal/scanner/ast/tree/tree.go @@ -3,7 +3,9 @@ package tree import ( "fmt" + "github.com/bits-and-blooms/bitset" sitter "github.com/smacker/go-tree-sitter" + "golang.org/x/exp/maps" "golang.org/x/exp/slices" "gopkg.in/yaml.v3" ) @@ -28,7 +30,7 @@ type Node struct { children, dataflowSources, aliasOf []*Node - disabledRuleIndices []int + disabledRuleIndices *bitset.BitSet // FIXME: remove the need for this sitterNode *sitter.Node queryResults map[int][]QueryResult @@ -123,8 +125,12 @@ func (node *Node) AliasOf() []*Node { return node.aliasOf } -func (node *Node) DisabledRuleIndices() []int { - return node.disabledRuleIndices +func (node *Node) RuleDisabled(index int) bool { + if node.disabledRuleIndices == nil { + return false + } + + return node.disabledRuleIndices.Test(uint(index)) } func (node *Node) QueryResults(queryID int) []QueryResult { @@ -143,6 +149,7 @@ type nodeDump struct { DataflowSources []int `yaml:"dataflow_sources,omitempty"` AliasOf []int `yaml:"alias_of,omitempty"` Queries []int `yaml:",omitempty"` + DisabledRules []int `yaml:",omitempty"` Children []nodeDump `yaml:",omitempty"` } @@ -162,12 +169,18 @@ func (node *Node) dumpValue() nodeDump { childDump[i] = child.dumpValue() } - var queries []int - for queryID := range node.queryResults { - queries = append(queries, queryID) - } + queries := maps.Keys(node.queryResults) slices.Sort(queries) + var disabledRules []int + if node.disabledRuleIndices != nil { + for i := 0; i < int(node.disabledRuleIndices.Len()); i++ { + if node.disabledRuleIndices.Test(uint(i)) { + disabledRules = append(disabledRules, i) + } + } + } + contentRange := fmt.Sprintf( "%d:%d - %d:%d", node.ContentStart.Line, @@ -190,6 +203,7 @@ func (node *Node) dumpValue() nodeDump { AliasOf: nodeListToID(node.aliasOf), Children: childDump, Queries: queries, + DisabledRules: disabledRules, } } diff --git a/internal/scanner/ast/tree/tree_test.go b/internal/scanner/ast/tree/tree_test.go index b1970ce55..23583e148 100644 --- a/internal/scanner/ast/tree/tree_test.go +++ b/internal/scanner/ast/tree/tree_test.go @@ -20,7 +20,7 @@ func parseTree(t *testing.T, content string) *tree.Tree { t.Fatalf("failed to parse input: %s", err) } - return tree.NewBuilder(contentBytes, sitterRootNode).Build() + return tree.NewBuilder(contentBytes, sitterRootNode, 0).Build() } func TestTree(t *testing.T) { diff --git a/internal/scanner/rulescanner/rulescanner.go b/internal/scanner/rulescanner/rulescanner.go index 8fce2be2e..f0fb88b10 100644 --- a/internal/scanner/rulescanner/rulescanner.go +++ b/internal/scanner/rulescanner/rulescanner.go @@ -3,7 +3,6 @@ package rulescanner import ( "context" "fmt" - "slices" "time" "github.com/rs/zerolog/log" @@ -115,7 +114,7 @@ func (scanner *Scanner) detectAtNode(rule *ruleset.Rule, node *tree.Node) (*dete return result, nil } - if scanner.ruleDisabledForNode(rule, node) { + if node.RuleDisabled(rule.Index()) { if log.Trace().Enabled() { log.Trace().Msgf( "detect at node end: %s at %s: rule disabled", @@ -146,16 +145,6 @@ func (scanner *Scanner) detectAtNode(rule *ruleset.Rule, node *tree.Node) (*dete return result, nil } -func (scanner *Scanner) ruleDisabledForNode(rule *ruleset.Rule, node *tree.Node) bool { - for current := node; current != nil; current = current.Parent() { - if slices.Contains(node.DisabledRuleIndices(), rule.Index()) { - return true - } - } - - return false -} - func traceResultText(result *detectorset.Result) string { if result.Sanitized { return "sanitized"