From 0504a347d36dbff48b21f53ccfedb46f3803855e Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Fri, 2 Aug 2024 16:40:08 +0800 Subject: [PATCH] feat: Add utility function for binary search based on symbol names (#273) Also add clearer documentation related to guarantees around document canonicalization. --- bindings/go/scip/canonicalize.go | 10 +++++++++- bindings/go/scip/sort.go | 23 ++++++++++++++++++++- bindings/go/scip/sort_test.go | 34 ++++++++++++++++++++++++++++++++ go.mod | 1 + go.sum | 4 +++- 5 files changed, 69 insertions(+), 3 deletions(-) diff --git a/bindings/go/scip/canonicalize.go b/bindings/go/scip/canonicalize.go index bcd469c1..ae6877b0 100644 --- a/bindings/go/scip/canonicalize.go +++ b/bindings/go/scip/canonicalize.go @@ -1,6 +1,14 @@ package scip -// CanonicalizeDocument deterministically re-orders the fields of the given document. +// CanonicalizeDocument deterministically sorts and merges fields of the given document. +// +// Post-conditions: +// 1. The Occurrences field only contains those with well-formed ranges +// (length 3 or 4, potentially empty). +// 2. The Occurrences field is sorted in ascending order of ranges based on +// Range.CompareStrict +// 3. The Symbols field is sorted in ascending order based on the symbol name, +// and SymbolInformation values for the same name will have been merged. func CanonicalizeDocument(document *Document) *Document { document.Occurrences = CanonicalizeOccurrences(document.Occurrences) document.Symbols = CanonicalizeSymbols(document.Symbols) diff --git a/bindings/go/scip/sort.go b/bindings/go/scip/sort.go index e8d145d3..3e76ad2b 100644 --- a/bindings/go/scip/sort.go +++ b/bindings/go/scip/sort.go @@ -2,10 +2,12 @@ package scip import ( "sort" + + "golang.org/x/exp/slices" ) // FindSymbol returns the symbol with the given name in the given document. If there is no symbol by -// that name, this function returns nil. +// that name, this function returns nil. Prefer using FindSymbolBinarySearch over this function. func FindSymbol(document *Document, symbolName string) *SymbolInformation { for _, symbol := range document.Symbols { if symbol.Symbol == symbolName { @@ -16,6 +18,25 @@ func FindSymbol(document *Document, symbolName string) *SymbolInformation { return nil } +// FindSymbolBinarySearch attempts to find the SymbolInformation in the given document. +// +// Pre-condition: The symbols array must be sorted in ascending order based on the symbol name, +// and SymbolInformation values must be merged. This guarantee is upheld by CanonicalizeDocument. +func FindSymbolBinarySearch(canonicalizedDocument *Document, symbolName string) *SymbolInformation { + i, found := slices.BinarySearchFunc(canonicalizedDocument.Symbols, symbolName, func(sym *SymbolInformation, lookup string) int { + if sym.Symbol < lookup { + return -1 + } else if sym.Symbol == lookup { + return 0 + } + return 1 + }) + if found { + return canonicalizedDocument.Symbols[i] + } + return nil +} + // SortDocuments sorts the given documents slice (in-place) and returns it (for convenience). Documents // are sorted in ascending order of their relative path. func SortDocuments(documents []*Document) []*Document { diff --git a/bindings/go/scip/sort_test.go b/bindings/go/scip/sort_test.go index a783e3ab..e0b2c6e9 100644 --- a/bindings/go/scip/sort_test.go +++ b/bindings/go/scip/sort_test.go @@ -4,6 +4,9 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" + "golang.org/x/exp/slices" + "pgregory.net/rapid" ) func TestFindOccurrences(t *testing.T) { @@ -122,3 +125,34 @@ func TestSortRanges(t *testing.T) { t.Errorf("unexpected occurrence order (-want +got):\n%s", diff) } } + +func genSymbolInfo() *rapid.Generator[*SymbolInformation] { + return rapid.Custom(func(t *rapid.T) *SymbolInformation { + return &SymbolInformation{Symbol: rapid.String().Draw(t, "symbol")} + }) +} + +func TestFindSymbolBinarySearch(t *testing.T) { + rapid.Check(t, func(t *rapid.T) { + symbolInfoGen := genSymbolInfo() + symbolInfos := rapid.SliceOfN(symbolInfoGen, 0, 10).Draw(t, "symbolInfos") + doc := &Document{Symbols: symbolInfos} + canonicalDoc := CanonicalizeDocument(doc) + for _, symbolInfo := range symbolInfos { + got := FindSymbolBinarySearch(canonicalDoc, symbolInfo.Symbol) + require.NotNil(t, got) + require.Equal(t, symbolInfo.Symbol, got.Symbol) + } + other := rapid.String().Draw(t, "otherSymbol") + isInOriginalSlice := slices.ContainsFunc(symbolInfos, func(info *SymbolInformation) bool { + return info.Symbol == other + }) + got := FindSymbolBinarySearch(canonicalDoc, other) + if isInOriginalSlice { + require.NotNil(t, got) + require.Equal(t, other, got.Symbol) + } else { + require.Nil(t, got) + } + }) +} diff --git a/go.mod b/go.mod index 3f40f280..0bc031eb 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/sourcegraph/sourcegraph/lib v0.0.0-20220511160847-5a43d3ea24eb github.com/stretchr/testify v1.8.4 github.com/urfave/cli/v2 v2.25.7 + golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 golang.org/x/tools v0.12.0 google.golang.org/protobuf v1.31.0 pgregory.net/rapid v1.1.0 diff --git a/go.sum b/go.sum index 39db4695..2a8d1842 100644 --- a/go.sum +++ b/go.sum @@ -454,6 +454,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk= golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 h1:MGwJjxBy0HJshjDNfLsYO8xppfqWlA5ZT9OhtUUhTNw= +golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -661,4 +663,4 @@ mvdan.cc/gofumpt v0.4.0/go.mod h1:PljLOHDeZqgS8opHRKLzp2It2VBuSdteAgqUfzMTxlQ= mvdan.cc/gofumpt v0.5.0 h1:0EQ+Z56k8tXjj/6TQD25BFNKQXpCvT0rnansIc7Ug5E= mvdan.cc/gofumpt v0.5.0/go.mod h1:HBeVDtMKRZpXyxFciAirzdKklDlGu8aAy1wEbH5Y9js= pgregory.net/rapid v1.1.0 h1:CMa0sjHSru3puNx+J0MIAuiiEV4N0qj8/cMWGBBCsjw= -pgregory.net/rapid v1.1.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04= \ No newline at end of file +pgregory.net/rapid v1.1.0/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04=