From 5212eea01155816fc9dcb5732085be519eb26ba8 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Thu, 9 Dec 2021 11:07:44 -0700 Subject: [PATCH] api: don't send IndexMetadata.LanguageMap (#221) This mapping is purely internal from per-shard language IDs to language names and useless for RPCs, but the IndexMetadata type is shared between the on-disk format (as JSON) and the RPC format (with every request). With the recent go-enry change, there are far more languages in the mapping, and we noticed an unwanted increase in RAM usage. This is especially wasteful as the client never even looks at the field. Gob doesn't support tagging fields to mark them as non-exported, and a custom encoder is weird, so instead when a shard is read from disk and indexData is built, we set IndexMetadata.LanguageMap to nil. This addresses sourcegraph/sourcegraph#28799 Change-Id: Ief24dfc7e40cf3abdb7b557a1d2b6f3fad5ed61f --- api.go | 2 +- eval.go | 4 ++-- indexdata.go | 5 ++++- matchtree.go | 2 +- merge.go | 2 +- read.go | 9 ++++++--- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/api.go b/api.go index 40f90f4e..2af58b60 100644 --- a/api.go +++ b/api.go @@ -381,7 +381,7 @@ type IndexMetadata struct { IndexMinReaderVersion int IndexTime time.Time PlainASCII bool - LanguageMap map[string]uint16 + LanguageMap map[string]uint16 // not exported in RPCs ZoektVersion string ID string } diff --git a/eval.go b/eval.go index 314076ac..8943a8e2 100644 --- a/eval.go +++ b/eval.go @@ -99,7 +99,7 @@ func (d *indexData) simplify(in query.Q) query.Q { return r.Set[repo.Name] }) case *query.Language: - _, has := d.metaData.LanguageMap[r.Language] + _, has := d.languageMap[r.Language] if !has && d.metaData.IndexFeatureVersion < 12 { // For index files that haven't been re-indexed by go-enry, // fall back to file-based matching and continue even if this @@ -270,7 +270,7 @@ nextFileMatch: RepositoryPriority: md.priority, FileName: string(d.fileName(nextDoc)), Checksum: d.getChecksum(nextDoc), - Language: d.languageMap[d.getLanguage(nextDoc)], + Language: d.languageMapRev[d.getLanguage(nextDoc)], } if s := d.subRepos[nextDoc]; s > 0 { diff --git a/indexdata.go b/indexdata.go index c115ffab..8b97175d 100644 --- a/indexdata.go +++ b/indexdata.go @@ -86,8 +86,11 @@ type indexData struct { // languages for all the files. languages []byte + // copy of LanguageMap from Metadata before wiping + languageMap map[string]uint16 + // inverse of LanguageMap in metaData - languageMap map[uint16]string + languageMapRev []string repoListEntry []RepoListEntry diff --git a/matchtree.go b/matchtree.go index 7b0f86f8..172efedd 100644 --- a/matchtree.go +++ b/matchtree.go @@ -870,7 +870,7 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) { return &noMatchTree{"const"}, nil } case *query.Language: - code, ok := d.metaData.LanguageMap[s.Language] + code, ok := d.languageMap[s.Language] if !ok { return &noMatchTree{"lang"}, nil } diff --git a/merge.go b/merge.go index b02e2833..83f91c24 100644 --- a/merge.go +++ b/merge.go @@ -124,7 +124,7 @@ func merge(ds ...*indexData) (*IndexBuilder, error) { // Content set below since it can return an error // Branches set below since it requires lookups SubRepositoryPath: d.subRepoPaths[repoID][d.subRepos[docID]], - Language: d.languageMap[d.getLanguage(docID)], + Language: d.languageMapRev[d.getLanguage(docID)], // SkipReason not set, will be part of content from original indexer. } diff --git a/read.go b/read.go index 39965b82..eb3d2bd3 100644 --- a/read.go +++ b/read.go @@ -370,10 +370,13 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { d.subRepoPaths = append(d.subRepoPaths, keys) } - d.languageMap = map[uint16]string{} - for k, v := range d.metaData.LanguageMap { - d.languageMap[v] = k + d.languageMap = d.metaData.LanguageMap + d.languageMapRev = make([]string, len(d.languageMap)) + for k, v := range d.languageMap { + d.languageMapRev[v] = k } + // LanguageMap is entirely useless for readers + d.metaData.LanguageMap = nil if err := d.verify(); err != nil { return nil, err