From d25ae1f3c242fe4044d6359eb8f16818c62dbe45 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 21 Aug 2024 20:51:07 -0700 Subject: [PATCH 1/4] Fix index too many file names bug --- modules/indexer/code/git.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index bc345f2325481..9b3c0c19904d5 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -113,7 +113,8 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio var changes internal.RepoChanges var err error updatedFilenames := make([]string, 0, 10) - for _, line := range strings.Split(stdout, "\n") { + lines := strings.Split(stdout, "\n") + for i, line := range lines { line = strings.TrimSpace(line) if len(line) == 0 { continue @@ -161,15 +162,23 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio default: log.Warn("Unrecognized status: %c (line=%s)", status, line) } - } - cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). - AddDashesAndList(updatedFilenames...) - lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) - if err != nil { - return nil, err + if (i%50 == 0 || i == len(lines)-1) && len(updatedFilenames) > 0 { + cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). + AddDashesAndList(updatedFilenames...) + lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + if err != nil { + return nil, err + } + + updates, err1 := parseGitLsTreeOutput(lsTreeStdout) + if err1 != nil { + return nil, err1 + } + changes.Updates = append(changes.Updates, updates...) + updatedFilenames = updatedFilenames[0:0] + } } - changes.Updates, err = parseGitLsTreeOutput(lsTreeStdout) return &changes, err } From ecdc522badea9988149306a4db90e06af8415778 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 25 Aug 2024 22:11:19 -0700 Subject: [PATCH 2/4] Use 30 instead of 50 --- modules/indexer/code/git.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 9b3c0c19904d5..f5695722ce4e2 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -163,7 +163,9 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio log.Warn("Unrecognized status: %c (line=%s)", status, line) } - if (i%50 == 0 || i == len(lines)-1) && len(updatedFilenames) > 0 { + // Accroding to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information + // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30 + if (i%30 == 0 || i == len(lines)-1) && len(updatedFilenames) > 0 { cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). AddDashesAndList(updatedFilenames...) lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) From 02d04bf80c266a9e194b94747ad0f76a02dee20e Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 25 Aug 2024 22:15:33 -0700 Subject: [PATCH 3/4] Fix lint --- modules/indexer/code/git.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index f5695722ce4e2..d590c05cee86e 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -163,7 +163,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio log.Warn("Unrecognized status: %c (line=%s)", status, line) } - // Accroding to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information + // According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30 if (i%30 == 0 || i == len(lines)-1) && len(updatedFilenames) > 0 { cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). From e16bb63be589f15bf32262080ae3fe1b0c574eaf Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 26 Aug 2024 11:22:38 -0700 Subject: [PATCH 4/4] Fix bugs if lines have been ignored --- modules/indexer/code/git.go | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index d590c05cee86e..df9783288b032 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -113,8 +113,24 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio var changes internal.RepoChanges var err error updatedFilenames := make([]string, 0, 10) + + updateChanges := func() error { + cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). + AddDashesAndList(updatedFilenames...) + lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) + if err != nil { + return err + } + + updates, err1 := parseGitLsTreeOutput(lsTreeStdout) + if err1 != nil { + return err1 + } + changes.Updates = append(changes.Updates, updates...) + return nil + } lines := strings.Split(stdout, "\n") - for i, line := range lines { + for _, line := range lines { line = strings.TrimSpace(line) if len(line) == 0 { continue @@ -165,22 +181,19 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio // According to https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation#more-information // the command line length should less than 8191 characters, assume filepath is 256, then 8191/256 = 31, so we use 30 - if (i%30 == 0 || i == len(lines)-1) && len(updatedFilenames) > 0 { - cmd := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l").AddDynamicArguments(revision). - AddDashesAndList(updatedFilenames...) - lsTreeStdout, _, err := cmd.RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) - if err != nil { + if len(updatedFilenames) >= 30 { + if err := updateChanges(); err != nil { return nil, err } - - updates, err1 := parseGitLsTreeOutput(lsTreeStdout) - if err1 != nil { - return nil, err1 - } - changes.Updates = append(changes.Updates, updates...) updatedFilenames = updatedFilenames[0:0] } } + if len(updatedFilenames) > 0 { + if err := updateChanges(); err != nil { + return nil, err + } + } + return &changes, err }