From e6577502f3f4b9c38819e6566883d69249979768 Mon Sep 17 00:00:00 2001 From: Camden Cheek Date: Fri, 3 Nov 2023 19:20:08 -0600 Subject: [PATCH 1/5] determine if we are looking for an exact symbol --- matchtree.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/matchtree.go b/matchtree.go index fad840c0..ef08c1a0 100644 --- a/matchtree.go +++ b/matchtree.go @@ -239,6 +239,7 @@ func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map type symbolSubstrMatchTree struct { *substrMatchTree + exact bool patternSize uint32 fileEndRunes []uint32 fileEndSymbol []uint32 @@ -983,6 +984,26 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) optCopy := opt optCopy.DisableWordMatchOptimization = true + exact := false + switch e := s.Expr.(type) { + case *query.Regexp: + if e.Regexp.Op != syntax.OpConcat || len(e.Regexp.Sub) != 3 { + break + } + ops := e.Regexp.Sub + if !(ops[0].Op == syntax.OpBeginLine || ops[0].Op == syntax.OpBeginText) { + break + } + if ops[1].Op != syntax.OpLiteral { + break + } + if !(ops[2].Op == syntax.OpEndLine || ops[2].Op == syntax.OpEndText) { + break + } + exact = true + e.Regexp = ops[1] + } + subMT, err := d.newMatchTree(s.Expr, optCopy) if err != nil { return nil, err @@ -991,6 +1012,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) if substr, ok := subMT.(*substrMatchTree); ok { return &symbolSubstrMatchTree{ substrMatchTree: substr, + exact: exact, patternSize: uint32(utf8.RuneCountInString(substr.query.Pattern)), fileEndRunes: d.fileEndRunes, fileEndSymbol: d.fileEndSymbol, From e005a75cfcd99da6437151ee0c6c4e115bf590ce Mon Sep 17 00:00:00 2001 From: Camden Cheek Date: Fri, 3 Nov 2023 19:28:23 -0600 Subject: [PATCH 2/5] invert condition --- matchtree.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/matchtree.go b/matchtree.go index ef08c1a0..dbbf809d 100644 --- a/matchtree.go +++ b/matchtree.go @@ -293,13 +293,14 @@ func (t *symbolSubstrMatchTree) prepare(doc uint32) { continue } - if end <= sections[secIdx].End { - t.current[0].symbol = true - t.current[0].symbolIdx = uint32(secIdx) - trimmed = append(trimmed, t.current[0]) + if end > sections[secIdx].End { + t.current = t.current[1:] + continue } - t.current = t.current[1:] + t.current[0].symbol = true + t.current[0].symbolIdx = uint32(secIdx) + trimmed = append(trimmed, t.current[0]) } t.current = trimmed } From 5c4f6a0b51d56e0357994fcee627c1481376e127 Mon Sep 17 00:00:00 2001 From: Camden Cheek Date: Fri, 3 Nov 2023 19:56:39 -0600 Subject: [PATCH 3/5] working --- matchtree.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/matchtree.go b/matchtree.go index dbbf809d..49184f50 100644 --- a/matchtree.go +++ b/matchtree.go @@ -298,9 +298,15 @@ func (t *symbolSubstrMatchTree) prepare(doc uint32) { continue } + if t.exact && !(start == sections[secIdx].Start && end == sections[secIdx].End) { + t.current = t.current[1:] + continue + } + t.current[0].symbol = true t.current[0].symbolIdx = uint32(secIdx) trimmed = append(trimmed, t.current[0]) + t.current = t.current[1:] } t.current = trimmed } @@ -986,6 +992,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) optCopy.DisableWordMatchOptimization = true exact := false + expr := s.Expr switch e := s.Expr.(type) { case *query.Regexp: if e.Regexp.Op != syntax.OpConcat || len(e.Regexp.Sub) != 3 { @@ -1002,10 +1009,12 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) break } exact = true - e.Regexp = ops[1] + exprCopy := *e + exprCopy.Regexp = ops[1] + expr = &exprCopy } - subMT, err := d.newMatchTree(s.Expr, optCopy) + subMT, err := d.newMatchTree(expr, optCopy) if err != nil { return nil, err } From d7d9375e8ae0c0695be5b005c593b83a661c525d Mon Sep 17 00:00:00 2001 From: Camden Cheek Date: Mon, 6 Nov 2023 13:31:24 -0700 Subject: [PATCH 4/5] simplify --- matchtree.go | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/matchtree.go b/matchtree.go index 49184f50..d4904a0f 100644 --- a/matchtree.go +++ b/matchtree.go @@ -239,7 +239,7 @@ func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map type symbolSubstrMatchTree struct { *substrMatchTree - exact bool + anchored bool patternSize uint32 fileEndRunes []uint32 fileEndSymbol []uint32 @@ -298,7 +298,7 @@ func (t *symbolSubstrMatchTree) prepare(doc uint32) { continue } - if t.exact && !(start == sections[secIdx].Start && end == sections[secIdx].End) { + if t.anchored && !(start == sections[secIdx].Start && end == sections[secIdx].End) { t.current = t.current[1:] continue } @@ -991,27 +991,22 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) optCopy := opt optCopy.DisableWordMatchOptimization = true - exact := false + anchored := false expr := s.Expr switch e := s.Expr.(type) { case *query.Regexp: - if e.Regexp.Op != syntax.OpConcat || len(e.Regexp.Sub) != 3 { - break - } - ops := e.Regexp.Sub - if !(ops[0].Op == syntax.OpBeginLine || ops[0].Op == syntax.OpBeginText) { - break - } - if ops[1].Op != syntax.OpLiteral { - break - } - if !(ops[2].Op == syntax.OpEndLine || ops[2].Op == syntax.OpEndText) { - break + pattern := e.Regexp.String() + if strings.HasPrefix(pattern, "^") && strings.HasSuffix(pattern, "$") { + pattern = pattern[1 : len(pattern)-1] + parsedPattern, err := syntax.Parse(pattern, e.Regexp.Flags) + if err != nil { + return nil, err + } + eCopy := *e + eCopy.Regexp = parsedPattern + expr = &eCopy + anchored = true } - exact = true - exprCopy := *e - exprCopy.Regexp = ops[1] - expr = &exprCopy } subMT, err := d.newMatchTree(expr, optCopy) @@ -1022,7 +1017,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) if substr, ok := subMT.(*substrMatchTree); ok { return &symbolSubstrMatchTree{ substrMatchTree: substr, - exact: exact, + anchored: anchored, patternSize: uint32(utf8.RuneCountInString(substr.query.Pattern)), fileEndRunes: d.fileEndRunes, fileEndSymbol: d.fileEndSymbol, From 186704f68cb7205bb032452961be173e8c8746df Mon Sep 17 00:00:00 2001 From: Camden Cheek Date: Tue, 7 Nov 2023 15:44:34 -0700 Subject: [PATCH 5/5] use AST --- matchtree.go | 59 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/matchtree.go b/matchtree.go index d4904a0f..e74c3641 100644 --- a/matchtree.go +++ b/matchtree.go @@ -239,7 +239,7 @@ func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map type symbolSubstrMatchTree struct { *substrMatchTree - anchored bool + exact bool patternSize uint32 fileEndRunes []uint32 fileEndSymbol []uint32 @@ -298,7 +298,7 @@ func (t *symbolSubstrMatchTree) prepare(doc uint32) { continue } - if t.anchored && !(start == sections[secIdx].Start && end == sections[secIdx].End) { + if t.exact && !(start == sections[secIdx].Start && end == sections[secIdx].End) { t.current = t.current[1:] continue } @@ -991,22 +991,9 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) optCopy := opt optCopy.DisableWordMatchOptimization = true - anchored := false - expr := s.Expr - switch e := s.Expr.(type) { - case *query.Regexp: - pattern := e.Regexp.String() - if strings.HasPrefix(pattern, "^") && strings.HasSuffix(pattern, "$") { - pattern = pattern[1 : len(pattern)-1] - parsedPattern, err := syntax.Parse(pattern, e.Regexp.Flags) - if err != nil { - return nil, err - } - eCopy := *e - eCopy.Regexp = parsedPattern - expr = &eCopy - anchored = true - } + expr, wasAnchored := s.Expr, false + if regexpExpr, ok := expr.(*query.Regexp); ok { + expr, wasAnchored = stripAnchors(regexpExpr) } subMT, err := d.newMatchTree(expr, optCopy) @@ -1017,7 +1004,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) if substr, ok := subMT.(*substrMatchTree); ok { return &symbolSubstrMatchTree{ substrMatchTree: substr, - anchored: anchored, + exact: wasAnchored, patternSize: uint32(utf8.RuneCountInString(substr.query.Pattern)), fileEndRunes: d.fileEndRunes, fileEndSymbol: d.fileEndSymbol, @@ -1283,3 +1270,37 @@ func pruneMatchTree(mt matchTree) (matchTree, error) { } return mt, err } + +func stripAnchors(in *query.Regexp) (out *query.Regexp, stripped bool) { + stripRegexpAnchors := func(in *syntax.Regexp) (out *syntax.Regexp, stripped bool) { + if in.Op != syntax.OpConcat { + return out, false + } + + if len(in.Sub) < 3 { + return out, false + } + + firstOp, lastOp := in.Sub[0].Op, in.Sub[len(in.Sub)-1].Op + + if firstOp != syntax.OpBeginLine && firstOp != syntax.OpBeginText { + return out, false + } + if lastOp != syntax.OpEndLine && lastOp != syntax.OpEndText { + return out, false + } + + inCopy := *in + inCopy.Sub = in.Sub[1 : len(in.Sub)-1] // remove the first and last ops, which are the anchors + return &inCopy, true + } + + newRegexp, stripped := stripRegexpAnchors(in.Regexp) + if !stripped { + return in, false + } + + inCopy := *in + inCopy.Regexp = newRegexp + return &inCopy, true +}