From f7e136aa01b808227d61f3e0b09898de0a22a3df Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Thu, 19 Oct 2023 18:14:34 +0200 Subject: [PATCH] score: remove repetition-boost The definition of how this is applied is very narrow and more often than not works poorly. Originally I sent out a commit to dampen this using log, but Julie suggested just removing which sounds better. Test Plan: go test --- contentprovider.go | 1 - eval.go | 7 ------- 2 files changed, 8 deletions(-) diff --git a/contentprovider.go b/contentprovider.go index 5f27d2050..86e33cae2 100644 --- a/contentprovider.go +++ b/contentprovider.go @@ -461,7 +461,6 @@ const ( scoreSymbol = 7000.0 scorePartialSymbol = 4000.0 scoreKindMatch = 100.0 - scoreRepetitionFactor = 1.0 scoreFactorAtomMatch = 400.0 // File-only scoring signals. For now these are also bounded ~9000 to give them diff --git a/eval.go b/eval.go index c6eed6533..538de354d 100644 --- a/eval.go +++ b/eval.go @@ -415,13 +415,9 @@ func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, kn } maxFileScore := 0.0 - repetitions := 0 for i := range fileMatch.LineMatches { if maxFileScore < fileMatch.LineMatches[i].Score { maxFileScore = fileMatch.LineMatches[i].Score - repetitions = 0 - } else if maxFileScore == fileMatch.LineMatches[i].Score { - repetitions += 1 } // Order by ordering in file. @@ -442,9 +438,6 @@ func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, kn // the matches. fileMatch.addScore("fragment", maxFileScore, opts.DebugScore) - // Prefer docs with several top-scored matches. - fileMatch.addScore("repetition-boost", scoreRepetitionFactor*float64(repetitions), opts.DebugScore) - if opts.UseDocumentRanks && len(d.ranks) > int(doc) { weight := scoreFileRankFactor if opts.DocumentRanksWeight > 0.0 {