Skip to content

Commit

Permalink
score: experimental extension novelty in sorting
Browse files Browse the repository at this point in the history
Right now we boost a file extension that hasn't been seen to the 3rd
position. This is gated by an environment variable. I want to explore if
there are ways we can turn on this behaviour with the query language.

Test Plan: ZOEKT_NOVELTY=1 go run ./cmd/zoekt foo
  • Loading branch information
keegancsmith committed Oct 19, 2023
1 parent 7cc2872 commit 6984e15
Showing 1 changed file with 48 additions and 0 deletions.
48 changes: 48 additions & 0 deletions contentprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@ import (
"bytes"
"fmt"
"log"
"os"
"path"
"sort"
"strings"
"unicode/utf8"

"golang.org/x/exp/slices"
)

var _ = log.Println
Expand Down Expand Up @@ -913,9 +917,53 @@ func sortChunkMatchesByScore(ms []ChunkMatch) {
sort.Sort(chunkMatchScoreSlice(ms))
}

var doNovelty = os.Getenv("ZOEKT_NOVELTY") != ""

// SortFiles sorts files matches. The order depends on the match score, which includes both
// query-dependent signals like word overlap, and file-only signals like the file ranks (if
// file ranks are enabled).
func SortFiles(ms []FileMatch) {
sort.Sort(fileMatchesByScore(ms))

if doNovelty {
// Experimentally boost something into the third filematch
boostNovelExtension(ms, 2, 0.9)
}
}

func boostNovelExtension(ms []FileMatch, boostOffset int, minScoreRatio float64) {
if len(ms) <= boostOffset+1 {
return
}

top := ms[:boostOffset]
candidates := ms[boostOffset:]

// Don't bother boosting something which is significantly different to the
// result it replaces.
minScoreForNovelty := candidates[0].Score * minScoreRatio

// We want to look for an ext that isn't in the top exts
exts := make([]string, len(top))
for i := range top {
exts[i] = path.Ext(top[i].FileName)
}

for i := range candidates {
// Do not assume sorted due to boostNovelExtension being called on subsets
if candidates[i].Score < minScoreForNovelty {
continue
}

if slices.Contains(exts, path.Ext(candidates[i].FileName)) {
continue
}

// Found what we are looking for, now boost to front of candidates (which
// is ms[boostOffset])
for ; i > 0; i-- {
candidates[i], candidates[i-1] = candidates[i-1], candidates[i]
}
return
}
}

0 comments on commit 6984e15

Please sign in to comment.