From 3ad3296d45df0099d7cd4374dd85d4fdb045d10f Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Tue, 23 May 2023 11:09:14 +0200 Subject: [PATCH] wip own search --- indexdata.go | 4 ++++ matchtree.go | 15 +++++++++++++++ own/own.go | 42 ++++++++++++++++++++++++++++++++++++++++++ query/parse.go | 8 ++++++++ query/parse_test.go | 3 +++ query/query.go | 20 ++++++++++++++++++++ read.go | 15 +++++++++++++++ 7 files changed, 107 insertions(+) create mode 100644 own/own.go diff --git a/indexdata.go b/indexdata.go index 0e1e7609f..0bf3eab73 100644 --- a/indexdata.go +++ b/indexdata.go @@ -22,6 +22,7 @@ import ( "math/bits" "unicode/utf8" + "github.com/sourcegraph/zoekt/own" "github.com/sourcegraph/zoekt/query" ) @@ -100,6 +101,9 @@ type indexData struct { // rawConfigMasks contains the encoded RawConfig for each repository rawConfigMasks []uint8 + + // own per repo + own []own.Own } type symbolData struct { diff --git a/matchtree.go b/matchtree.go index 141c1c0c1..e3c19e7bd 100644 --- a/matchtree.go +++ b/matchtree.go @@ -1022,6 +1022,21 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) }, }, nil + case *query.Own: + var compiled []func([]byte) bool + for _, o := range d.own { + compiled = append(compiled, o.CompileForAuthor(s.SearchTerm)) + } + + return &docMatchTree{ + reason: "Own", + numDocs: d.numDocs(), + predicate: func(docID uint32) bool { + fileName := d.fileName(docID) + return compiled[d.repos[docID]](fileName) + }, + }, nil + case *query.BranchesRepos: reposBranchesWant := make([]uint64, len(d.repoMetaData)) for repoIdx := range d.repoMetaData { diff --git a/own/own.go b/own/own.go new file mode 100644 index 000000000..cce81f4c8 --- /dev/null +++ b/own/own.go @@ -0,0 +1,42 @@ +package own + +import ( + "bytes" + "fmt" + "os" +) + +func Load(ownPath string) (Own, error) { + b, err := os.ReadFile(ownPath) + if err != nil { + return nil, fmt.Errorf("failed to read owner file: %w", err) + } + + owners := make(own) + fields := bytes.Fields(b) + for i := 0; i < len(fields); i += 2 { + owners[string(fields[i])] = fields[i+1] + } + return owners, nil +} + +var Empty = make(own) + +type Own interface { + // This is the way to go until we are proven it is too slow. + CompileForAuthor(author string) func(path []byte) bool +} + +type own map[string][]byte + +func (o own) CompileForAuthor(author string) func(path []byte) bool { + pattern, ok := o[author] + if ok { + return func(path []byte) bool { + return bytes.Contains(path, pattern) + } + } + return func(path []byte) bool { + return false + } +} diff --git a/query/parse.go b/query/parse.go index 8602d7574..5d0c832cd 100644 --- a/query/parse.go +++ b/query/parse.go @@ -191,6 +191,11 @@ func parseExpr(in []byte) (Q, int, error) { } expr = &Symbol{q} + + case tokOwn: + // TODO support any and none + expr = &Own{SearchTerm: text} + case tokParenClose: // Caller must consume paren. expr = nil @@ -393,6 +398,7 @@ const ( tokArchived = 15 tokPublic = 16 tokFork = 17 + tokOwn = 18 ) var tokNames = map[int]string{ @@ -404,6 +410,7 @@ var tokNames = map[int]string{ tokFork: "Fork", tokNegate: "Negate", tokOr: "Or", + tokOwn: "Own", tokParenClose: "ParenClose", tokParenOpen: "ParenOpen", tokPublic: "Public", @@ -425,6 +432,7 @@ var prefixes = map[string]int{ "f:": tokFile, "file:": tokFile, "fork:": tokFork, + "own:": tokOwn, "public:": tokPublic, "r:": tokRepo, "regex:": tokRegex, diff --git a/query/parse_test.go b/query/parse_test.go index 233a1545d..1986b8adc 100644 --- a/query/parse_test.go +++ b/query/parse_test.go @@ -114,6 +114,9 @@ func TestParseQuery(t *testing.T) { {"type:file abc def", &Type{Type: TypeFileName, Child: NewAnd(&Substring{Pattern: "abc"}, &Substring{Pattern: "def"})}}, {"(type:repo abc) def", NewAnd(&Type{Type: TypeRepo, Child: &Substring{Pattern: "abc"}}, &Substring{Pattern: "def"})}, + // own + {"foo own:cezary", NewAnd(&Substring{Pattern: "foo"}, &Own{SearchTerm: "cezary"})}, + // errors. {"--", nil}, {"\"abc", nil}, diff --git a/query/query.go b/query/query.go index 6a8c5dd3a..87336fc23 100644 --- a/query/query.go +++ b/query/query.go @@ -498,6 +498,26 @@ func (q *GobCache) String() string { return fmt.Sprintf("GobCache(%s)", q.Q) } +// Own searches ownership. The fields are mutually exclusive. +type Own struct { + SearchTerm string + NoOwner bool + AnyOwner bool +} + +func (q *Own) String() string { + if q.SearchTerm != "" { + return fmt.Sprintf("(own %q)", q.SearchTerm) + } + if q.NoOwner { + return "(own none)" + } + if q.AnyOwner { + return "(own any)" + } + return "(own malformed)" +} + // Or is matched when any of its children is matched. type Or struct { Children []Q diff --git a/read.go b/read.go index 9f49b3b6a..71eeea7d3 100644 --- a/read.go +++ b/read.go @@ -20,10 +20,13 @@ import ( "fmt" "hash/crc64" "log" + "net/url" "os" + "path/filepath" "sort" "github.com/rs/xid" + "github.com/sourcegraph/zoekt/own" ) // IndexFile is a file suitable for concurrent read access. For performance @@ -397,6 +400,18 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { } } + { + dir := filepath.Dir(r.r.Name()) + for _, md := range d.repoMetaData { + o, err := own.Load(filepath.Join(dir, url.QueryEscape(md.Name)) + ".own") + if err != nil { + log.Printf("ignoring error for loading own for %s: %s", md.Name, err) + o = own.Empty + } + d.own = append(d.own, o) + } + } + if d.metaData.IndexFormatVersion >= 17 { blob, err := d.readSectionBlob(toc.repos) if err != nil {