From 2655fb00dbaaa00344ed931aa05b96e6277a4e05 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Tue, 25 Jul 2023 20:58:22 -0400 Subject: [PATCH] Add global cache for pushed times (#612) After testing, I'm worried that listing statuses on busy repositories will cause too many API requests, even with HTTP caching. Since these values are safe to cache forever, add a simple in-memory LRU cache in each server instance to minimize API calls. This required addional changes to how the PushedAt function works because we want to cache the evaluation timestamp if we use it. This meant moving the batch checking in to the PushedAt function instead of handling it in the approval logic. The new version is closer to how things used to work when we loaded the pushedDate field from the API. Also add a test for the PushedAt context function that verifies the caching behavior. --- .github/workflows/build.yml | 1 - go.mod | 2 +- policy/approval/approve.go | 20 --- policy/approval/approve_test.go | 80 ++++++----- pull/context.go | 1 - pull/github.go | 126 +++++++++++++++-- pull/github_test.go | 130 +++++++++++++++--- pull/global_cache.go | 63 +++++++++ ...5fcae367230ee709313dd2720da527d178ce43.yml | 50 +++++++ .../testdata/responses/repo_statuses_none.yml | 3 + server/config.go | 7 + server/handler/base.go | 3 +- server/server.go | 27 +++- 13 files changed, 422 insertions(+), 91 deletions(-) create mode 100644 pull/global_cache.go create mode 100644 pull/testdata/responses/repo_statuses_e05fcae367230ee709313dd2720da527d178ce43.yml create mode 100644 pull/testdata/responses/repo_statuses_none.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 346096be..5a55d0b9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,6 @@ on: - '**' pull_request: - branches: [develop] release: types: [published] diff --git a/go.mod b/go.mod index d664d5cd..57ed0e4f 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/google/go-github/v53 v53.2.0 github.com/google/go-querystring v1.1.0 github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 + github.com/hashicorp/golang-lru v0.6.0 github.com/palantir/go-baseapp v0.5.1 github.com/palantir/go-githubapp v0.18.0 github.com/pkg/errors v0.9.1 @@ -33,7 +34,6 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/golang/protobuf v1.5.3 // indirect - github.com/hashicorp/golang-lru v0.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/kr/text v0.2.0 // indirect github.com/mattn/go-colorable v0.1.12 // indirect diff --git a/policy/approval/approve.go b/policy/approval/approve.go index 5b1bf4bd..8b402027 100644 --- a/policy/approval/approve.go +++ b/policy/approval/approve.go @@ -315,26 +315,6 @@ func (r *Rule) filterInvalidCandidates(ctx context.Context, prctx pull.Context, return nil, nil, errors.Wrap(err, "failed to get last push timestamp") } - // The most recent filtered commit did not have a status. This can happen - // in two situations: - // - // The commit was pushed in a batch where the head commit is ignored... - if lastPushedAt.IsZero() && sha != prctx.HeadSHA() { - sha = prctx.HeadSHA() - lastPushedAt, err = prctx.PushedAt(sha) - if err != nil { - return nil, nil, errors.Wrap(err, "failed to get last push timestamp") - } - } - // ... or the commit (or the head commit) hasn't been evaluated yet - // - // In this case, we're the first evaluation for this commit, so set the - // push time to the curent time, which is guaranteed to be after the actual - // push due to webhook delivery and processing time. - if lastPushedAt.IsZero() { - lastPushedAt = prctx.EvaluationTimestamp() - } - var allowed []*common.Candidate var dismissed []*common.Dismissal for _, c := range candidates { diff --git a/policy/approval/approve_test.go b/policy/approval/approve_test.go index f5394997..27edb5bd 100644 --- a/policy/approval/approve_test.go +++ b/policy/approval/approve_test.go @@ -36,8 +36,7 @@ func TestIsApproved(t *testing.T) { now := time.Now() basePullContext := func() *pulltest.Context { return &pulltest.Context{ - EvaluationTimestampValue: now, - AuthorValue: "mhaypenny", + AuthorValue: "mhaypenny", BodyValue: &pull.Body{ Body: "/no-platform", CreatedAt: now.Add(10 * time.Second), @@ -386,32 +385,6 @@ func TestIsApproved(t *testing.T) { assertPending(t, prctx, r, "0/1 required approvals. Ignored 6 approvals from disqualified users") }) - t.Run("invalidateCommentOnPushFirstEvaluation", func(t *testing.T) { - prctx := basePullContext() - prctx.EvaluationTimestampValue = now.Add(25 * time.Second) - prctx.HeadSHAValue = "c6ade256ecfc755d8bc877ef22cc9e01745d46bb" - prctx.CommitsValue = []*pull.Commit{ - { - SHA: "c6ade256ecfc755d8bc877ef22cc9e01745d46bb", - Author: "mhaypenny", - Committer: "mhaypenny", - }, - } - - r := &Rule{ - Requires: common.Requires{ - Count: 1, - Actors: common.Actors{ - Users: []string{"comment-approver"}, - }, - }, - } - assertApproved(t, prctx, r, "Approved by comment-approver") - - r.Options.InvalidateOnPush = true - assertPending(t, prctx, r, "0/1 required approvals. Ignored 6 approvals from disqualified users") - }) - t.Run("invalidateReviewOnPush", func(t *testing.T) { prctx := basePullContext() prctx.PushedAtValue = map[string]time.Time{ @@ -442,9 +415,9 @@ func TestIsApproved(t *testing.T) { t.Run("ignoreUpdateMergeAfterReview", func(t *testing.T) { prctx := basePullContext() - prctx.EvaluationTimestampValue = now.Add(25 * time.Second) prctx.PushedAtValue = map[string]time.Time{ "c6ade256ecfc755d8bc877ef22cc9e01745d46bb": now, + "647c5078288f0ea9de27b5c280f25edaf2089045": now.Add(25 * time.Second), } prctx.HeadSHAValue = "647c5078288f0ea9de27b5c280f25edaf2089045" prctx.CommitsValue = append(prctx.CommitsValue[:1], &pull.Commit{ @@ -476,9 +449,6 @@ func TestIsApproved(t *testing.T) { t.Run("ignoreUpdateMergeContributor", func(t *testing.T) { prctx := basePullContext() - prctx.PushedAtValue = map[string]time.Time{ - "c6ade256ecfc755d8bc877ef22cc9e01745d46bb": now.Add(25 * time.Second), - } prctx.HeadSHAValue = "647c5078288f0ea9de27b5c280f25edaf2089045" prctx.CommitsValue = append(prctx.CommitsValue[:1], &pull.Commit{ SHA: "647c5078288f0ea9de27b5c280f25edaf2089045", @@ -584,6 +554,52 @@ func TestIsApproved(t *testing.T) { assertApproved(t, prctx, r, "Approved by comment-approver") }) + t.Run("ignoreCommitsInvalidateOnPushBatches", func(t *testing.T) { + prctx := basePullContext() + prctx.PushedAtValue = map[string]time.Time{ + "584b9232835ae85a2d8216fb55fd9cad8389092c": now.Add(25 * time.Second), + "7f4cd9d3999061605ce4cf261234e431b52e61ee": now, + } + prctx.HeadSHAValue = "584b9232835ae85a2d8216fb55fd9cad8389092c" + prctx.CommitsValue = []*pull.Commit{ + { + SHA: "584b9232835ae85a2d8216fb55fd9cad8389092c", + Author: "mhaypenny", + Committer: "mhaypenny", + Parents: []string{"7f4cd9d3999061605ce4cf261234e431b52e61ee"}, + }, + { + SHA: "7f4cd9d3999061605ce4cf261234e431b52e61ee", + Author: "mhaypenny", + Committer: "mhaypenny", + Parents: []string{"b2b73abd18bbad3ee3c6d0055697177cf476bf67"}, + }, + { + SHA: "b2b73abd18bbad3ee3c6d0055697177cf476bf67", + Author: "nstrawnickel", + Committer: "nstrawnickel", + }, + } + + r := &Rule{ + Requires: common.Requires{ + Count: 1, + Actors: common.Actors{ + Users: []string{"comment-approver"}, + }, + }, + } + assertApproved(t, prctx, r, "Approved by comment-approver") + + r.Options.InvalidateOnPush = true + assertPending(t, prctx, r, "0/1 required approvals. Ignored 6 approvals from disqualified users") + + r.Options.IgnoreCommitsBy = common.Actors{ + Users: []string{"mhaypenny"}, + } + assertApproved(t, prctx, r, "Approved by comment-approver") + }) + t.Run("ignoreEditedReviewComments", func(t *testing.T) { prctx := basePullContext() diff --git a/pull/context.go b/pull/context.go index be0b5597..1e3329dd 100644 --- a/pull/context.go +++ b/pull/context.go @@ -94,7 +94,6 @@ type Context interface { // PushedAt returns the time at which the commit with sha was pushed. The // returned time may be after the actual push time, but must not be before. - // PushedAt returns the zero time if the push time for is unknown. PushedAt(sha string) (time.Time, error) // Comments lists all comments on a Pull Request. The comment order is diff --git a/pull/github.go b/pull/github.go index 3a99c902..0e26764a 100644 --- a/pull/github.go +++ b/pull/github.go @@ -106,6 +106,7 @@ func (loc Locator) toV4(ctx context.Context, client *githubv4.Client) (*v4PullRe v4.HeadRepository.Name = loc.Value.GetHead().GetRepo().GetName() v4.HeadRepository.Owner.Login = loc.Value.GetHead().GetRepo().GetOwner().GetLogin() v4.BaseRefName = loc.Value.GetBase().GetRef() + v4.BaseRepository.DatabaseID = loc.Value.GetBase().GetRepo().GetID() v4.IsDraft = loc.Value.GetDraft() return &v4, nil } @@ -115,9 +116,10 @@ func (loc Locator) toV4(ctx context.Context, client *githubv4.Client) (*v4PullRe type GitHubContext struct { MembershipContext - ctx context.Context - client *github.Client - v4client *githubv4.Client + ctx context.Context + client *github.Client + v4client *githubv4.Client + globalCache GlobalCache evalTimestamp time.Time @@ -145,7 +147,14 @@ type GitHubContext struct { // obtain information. It caches responses for the lifetime of the context. The // pull request passed to the context must contain at least the base repository // and the number or the function panics. -func NewGitHubContext(ctx context.Context, mbrCtx MembershipContext, client *github.Client, v4client *githubv4.Client, loc Locator) (Context, error) { +func NewGitHubContext( + ctx context.Context, + mbrCtx MembershipContext, + globalCache GlobalCache, + client *github.Client, + v4client *githubv4.Client, + loc Locator, +) (Context, error) { if loc.Owner == "" || loc.Repo == "" || loc.Number == 0 { panic("pull request object does not contain full identifying information") } @@ -158,9 +167,10 @@ func NewGitHubContext(ctx context.Context, mbrCtx MembershipContext, client *git return &GitHubContext{ MembershipContext: mbrCtx, - ctx: ctx, - client: client, - v4client: v4client, + ctx: ctx, + client: client, + v4client: v4client, + globalCache: globalCache, evalTimestamp: time.Now(), @@ -328,20 +338,105 @@ func (ghc *GitHubContext) Commits() ([]*Commit, error) { } func (ghc *GitHubContext) PushedAt(sha string) (time.Time, error) { + repoID := ghc.pr.BaseRepository.DatabaseID + if ghc.pushedAt == nil { + ghc.pushedAt = make(map[string]time.Time) + } + + // This list contains the commits that may contain the push time. When + // commits are pushed in a batch, only the head commit of the batch has a + // recorded time in GitHub, but all commits in the batch share the same + // timestamp. This list lets us cache the times for all of the commits in + // the batch that we considered while looking for the head of the batch. + candidateSHAs := []string{sha} + + var pushedAt time.Time + var err error + for { + // Starting from the most recent SHA, search towards the head of the + // pull request until we find a commit that's either already in the + // cache or that has status checks. This commit is the head of the + // batch containing the initial commit. + sha := candidateSHAs[len(candidateSHAs)-1] + + pushedAt, err = ghc.tryPushedAt(repoID, sha) + if err != nil { + return time.Time{}, err + } + if !pushedAt.IsZero() { + break + } + + c, err := ghc.nextChildCommit(sha) + if err != nil { + return time.Time{}, err + } + if c == nil { + break + } + candidateSHAs = append(candidateSHAs, c.SHA) + } + + // After looking at all relevant commits, if we found no push time, default + // to the evaluation timestamp (i.e. now) + if pushedAt.IsZero() { + pushedAt = ghc.EvaluationTimestamp() + } + + for _, sha := range candidateSHAs { + ghc.pushedAt[sha] = pushedAt + if gc := ghc.globalCache; gc != nil { + gc.SetPushedAt(repoID, sha, pushedAt) + } + } + + return pushedAt, nil +} + +// tryPushedAt attempts to get the push time for a commit from the local cache, +// the global cache, or the GitHub API. It returns the zero time if it could +// not find a push time in any source. +// +// We prefer the local cache to the global cache because it helps when there is +// no global cache and avoids the possibility that the global cache evicts +// entries during the lifetime of the context. +// +// The local cache must be initialized before calling tryPushedAt. +func (ghc *GitHubContext) tryPushedAt(repoID int64, sha string) (time.Time, error) { if t, ok := ghc.pushedAt[sha]; ok { return t, nil } + if gc := ghc.globalCache; gc != nil { + if t, ok := gc.GetPushedAt(repoID, sha); ok { + ghc.pushedAt[sha] = t + return t, nil + } + } + return ghc.loadPushedAt(sha) +} + +// nextChildCommit returns the child commit for the given SHA or nil if the SHA +// is the head of the pull request. A child commit is a commit that has SHA as +// a parent. +func (ghc *GitHubContext) nextChildCommit(sha string) (*Commit, error) { + if sha == ghc.HeadSHA() { + // Optimization: exit early if asked about the head SHA + return nil, nil + } - pushedAt, err := ghc.loadPushedAt(sha) + commits, err := ghc.Commits() if err != nil { - return time.Time{}, err + return nil, err } - if ghc.pushedAt == nil { - ghc.pushedAt = make(map[string]time.Time) + for _, c := range commits { + for _, parentSHA := range c.Parents { + if sha == parentSHA { + return c, nil + } + } } - ghc.pushedAt[sha] = pushedAt - return pushedAt, nil + return nil, nil } func (ghc *GitHubContext) Comments() ([]*Comment, error) { @@ -916,7 +1011,10 @@ type v4PullRequest struct { Owner v4Actor } - BaseRefName string + BaseRefName string + BaseRepository struct { + DatabaseID int64 + } } type v4PageInfo struct { diff --git a/pull/github_test.go b/pull/github_test.go index ef7c9132..732fa901 100644 --- a/pull/github_test.go +++ b/pull/github_test.go @@ -16,6 +16,7 @@ package pull import ( "context" + "fmt" "net/http" "net/url" "sort" @@ -35,7 +36,7 @@ func TestChangedFiles(t *testing.T) { "testdata/responses/pull_files.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) files, err := ctx.ChangedFiles() require.NoError(t, err) @@ -77,7 +78,7 @@ func TestChangedFilesNoFiles(t *testing.T) { "testdata/responses/pull_no_files.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) files, err := ctx.ChangedFiles() require.NoError(t, err) @@ -100,7 +101,7 @@ func TestCommits(t *testing.T) { "testdata/responses/pull_commits.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) commits, err := ctx.Commits() require.NoError(t, err) @@ -143,7 +144,7 @@ func TestReviews(t *testing.T) { "testdata/responses/pull_reviews.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) reviews, err := ctx.Reviews() require.NoError(t, err) @@ -187,7 +188,7 @@ func TestNoReviews(t *testing.T) { "testdata/responses/pull_no_reviews.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) reviews, err := ctx.Reviews() require.NoError(t, err) @@ -208,7 +209,7 @@ func TestBody(t *testing.T) { "testdata/responses/pull_body.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) prBody, err := ctx.Body() require.NoError(t, err) @@ -229,7 +230,7 @@ func TestComments(t *testing.T) { "testdata/responses/pull_comments.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) comments, err := ctx.Comments() require.NoError(t, err) @@ -270,7 +271,7 @@ func TestNoComments(t *testing.T) { "testdata/responses/pull_no_comments.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) comments, err := ctx.Comments() require.NoError(t, err) @@ -303,7 +304,7 @@ func TestIsTeamMember(t *testing.T) { "testdata/responses/membership_team456_ttest.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) isMember, err := ctx.IsTeamMember("testorg/yes-team", "mhaypenny") require.NoError(t, err) @@ -350,7 +351,7 @@ func TestMixedReviewCommentPaging(t *testing.T) { "testdata/responses/pull_reviews_comments.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) comments, err := ctx.Comments() require.NoError(t, err) @@ -374,7 +375,7 @@ func TestIsOrgMember(t *testing.T) { "testdata/responses/membership_testorg_ttest.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) isMember, err := ctx.IsOrgMember("testorg", "mhaypenny") require.NoError(t, err) @@ -398,7 +399,7 @@ func TestIsOrgMember(t *testing.T) { func TestBranches(t *testing.T) { rp := &ResponsePlayer{} - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) base, head := ctx.Branches() assert.Equal(t, "develop", base, "base branch was not correctly set") @@ -418,7 +419,7 @@ func TestCrossRepoBranches(t *testing.T) { Name: github.String("testrepofork"), } - ctx := makeContext(t, rp, crossRepoPr) + ctx := makeContext(t, rp, crossRepoPr, nil) base, head := ctx.Branches() assert.Equal(t, "develop", base, "cross-repo base branch was not correctly set") @@ -432,7 +433,7 @@ func TestCollaboratorPermission(t *testing.T) { "testdata/responses/repo_collaborator_permission.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) p, err := ctx.CollaboratorPermission("direct-admin") require.NoError(t, err) @@ -475,7 +476,7 @@ func TestRepositoryCollaborators(t *testing.T) { "testdata/responses/repo_collaborators.yml", ) - ctx := makeContext(t, rp, nil) + ctx := makeContext(t, rp, nil, nil) collaborators, err := ctx.RepositoryCollaborators() require.NoError(t, err) @@ -534,7 +535,83 @@ func TestRepositoryCollaborators(t *testing.T) { }, c7.Permissions) } -func makeContext(t *testing.T, rp *ResponsePlayer, pr *github.PullRequest) Context { +func TestPushedAt(t *testing.T) { + rp := &ResponsePlayer{} + commitsRule := rp.AddRule( + GraphQLNodePrefixMatcher("repository.pullRequest.commits"), + "testdata/responses/pull_commits.yml", + ) + statusRuleA6F := rp.AddRule( + ExactPathMatcher("/repos/testorg/testrepo/commits/a6f3f69b64eaafece5a0d854eb4af11c0d64394c/statuses"), + "testdata/responses/repo_statuses_none.yml", + ) + statusRule1FC := rp.AddRule( + ExactPathMatcher("/repos/testorg/testrepo/commits/1fc89f1cedf8e3f3ce516ab75b5952295c8ea5e9/statuses"), + "testdata/responses/repo_statuses_none.yml", + ) + statusRuleE05 := rp.AddRule( + ExactPathMatcher("/repos/testorg/testrepo/commits/e05fcae367230ee709313dd2720da527d178ce43/statuses"), + "testdata/responses/repo_statuses_e05fcae367230ee709313dd2720da527d178ce43.yml", + ) + + expectedTime := time.Date(2020, 9, 30, 17, 30, 0, 0, time.UTC) + + gc := NewMockGlobalCache() + ctx := makeContext(t, rp, nil, gc) + + t.Run("fromStatus", func(t *testing.T) { + pushedAt, err := ctx.PushedAt("e05fcae367230ee709313dd2720da527d178ce43") + require.NoError(t, err) + + assert.Equal(t, expectedTime, pushedAt, "incorrect pushed at for commit") + assert.Equal(t, 3, statusRuleE05.Count, "incorrect http request count") + }) + + t.Run("fromCache", func(t *testing.T) { + pushedAt, err := ctx.PushedAt("e05fcae367230ee709313dd2720da527d178ce43") + require.NoError(t, err) + + assert.Equal(t, expectedTime, pushedAt, "incorrect pushed at for commit") + assert.Equal(t, 3, statusRuleE05.Count, "incorrect http request count") + + cachedPushedAt := gc.PushedAt["1234:e05fcae367230ee709313dd2720da527d178ce43"] + assert.Equal(t, expectedTime, cachedPushedAt, "incorrect value in global cache") + }) + + t.Run("fromBatch", func(t *testing.T) { + pushedAt, err := ctx.PushedAt("a6f3f69b64eaafece5a0d854eb4af11c0d64394c") + require.NoError(t, err) + + assert.Equal(t, expectedTime, pushedAt, "incorrect pushed at for commit") + assert.Equal(t, 2, commitsRule.Count, "incorrect http request count") + assert.Equal(t, 1, statusRuleA6F.Count, "incorrect http request count") + assert.Equal(t, 1, statusRule1FC.Count, "incorrect http request count") + assert.Equal(t, 3, statusRuleE05.Count, "incorrect http request count") + }) + + t.Run("fromBatchCache", func(t *testing.T) { + pushedAt, err := ctx.PushedAt("a6f3f69b64eaafece5a0d854eb4af11c0d64394c") + require.NoError(t, err) + + assert.Equal(t, expectedTime, pushedAt, "incorrect pushed at for commit") + assert.Equal(t, 2, commitsRule.Count, "incorrect http request count") + assert.Equal(t, 1, statusRuleA6F.Count, "incorrect http request count") + assert.Equal(t, 1, statusRule1FC.Count, "incorrect http request count") + assert.Equal(t, 3, statusRuleE05.Count, "incorrect http request count") + }) + + t.Run("fromGlobalCache", func(t *testing.T) { + ctx := makeContext(t, rp, nil, gc) + + pushedAt, err := ctx.PushedAt("e05fcae367230ee709313dd2720da527d178ce43") + require.NoError(t, err) + + assert.Equal(t, expectedTime, pushedAt, "incorrect pushed at for commit") + assert.Equal(t, 3, statusRuleE05.Count, "incorrect http request count") + }) +} + +func makeContext(t *testing.T, rp *ResponsePlayer, pr *github.PullRequest, gc GlobalCache) Context { ctx := context.Background() client := github.NewClient(&http.Client{Transport: rp}) v4client := githubv4.NewClient(&http.Client{Transport: rp}) @@ -547,7 +624,7 @@ func makeContext(t *testing.T, rp *ResponsePlayer, pr *github.PullRequest) Conte pr = defaultTestPR() } - prctx, err := NewGitHubContext(ctx, mbrCtx, client, v4client, Locator{ + prctx, err := NewGitHubContext(ctx, mbrCtx, gc, client, v4client, Locator{ Owner: pr.GetBase().GetRepo().GetOwner().GetLogin(), Repo: pr.GetBase().GetRepo().GetName(), Number: pr.GetNumber(), @@ -595,3 +672,22 @@ func defaultTestPR() *github.PullRequest { func newTime(t time.Time) *time.Time { return &t } + +type MockGlobalCache struct { + PushedAt map[string]time.Time +} + +func NewMockGlobalCache() *MockGlobalCache { + return &MockGlobalCache{ + PushedAt: make(map[string]time.Time), + } +} + +func (c *MockGlobalCache) GetPushedAt(repoID int64, sha string) (time.Time, bool) { + t, ok := c.PushedAt[fmt.Sprintf("%d:%s", repoID, sha)] + return t, ok +} + +func (c *MockGlobalCache) SetPushedAt(repoID int64, sha string, t time.Time) { + c.PushedAt[fmt.Sprintf("%d:%s", repoID, sha)] = t +} diff --git a/pull/global_cache.go b/pull/global_cache.go new file mode 100644 index 00000000..0eeb5a9b --- /dev/null +++ b/pull/global_cache.go @@ -0,0 +1,63 @@ +// Copyright 2023 Palantir Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pull + +import ( + "fmt" + "time" + + lru "github.com/hashicorp/golang-lru" +) + +// GlobalCache implementations provide a way to cache values that are safe to +// cache at the application level. Values in the global cache should not become +// stale due to external changes and should only expire to prevent the cache +// from becoming infinitely large. +type GlobalCache interface { + GetPushedAt(repoID int64, sha string) (time.Time, bool) + SetPushedAt(repoID int64, sha string, t time.Time) +} + +// LRUGlobalCache is a GlobalCache where each data type is stored in a separate +// LRU cache. This prevents frequently used data of one type from evicting less +// frequently used data of a different type. +type LRUGlobalCache struct { + pushedAt *lru.Cache +} + +func NewLRUGlobalCache(pushedAtSize int) (*LRUGlobalCache, error) { + pushedAt, err := lru.New(pushedAtSize) + if err != nil { + return nil, err + } + return &LRUGlobalCache{pushedAt: pushedAt}, nil +} + +func (c *LRUGlobalCache) GetPushedAt(repoID int64, sha string) (time.Time, bool) { + if val, ok := c.pushedAt.Get(pushedAtKey(repoID, sha)); ok { + if t, ok := val.(time.Time); ok { + return t, true + } + } + return time.Time{}, false +} + +func (c *LRUGlobalCache) SetPushedAt(repoID int64, sha string, t time.Time) { + c.pushedAt.Add(pushedAtKey(repoID, sha), t) +} + +func pushedAtKey(repoID int64, sha string) string { + return fmt.Sprintf("%d:%s", repoID, sha) +} diff --git a/pull/testdata/responses/repo_statuses_e05fcae367230ee709313dd2720da527d178ce43.yml b/pull/testdata/responses/repo_statuses_e05fcae367230ee709313dd2720da527d178ce43.yml new file mode 100644 index 00000000..26ad5dd7 --- /dev/null +++ b/pull/testdata/responses/repo_statuses_e05fcae367230ee709313dd2720da527d178ce43.yml @@ -0,0 +1,50 @@ +- status: 200 + headers: + Link: | + ; rel="next", + ; rel="last", + body: | + [ + { + "created_at": "2020-09-30T17:36:44Z" + }, + { + "created_at": "2020-09-30T17:35:01Z" + }, + { + "created_at": "2020-09-30T17:32:13Z" + } + ] +- status: 200 + headers: + Link: | + ; rel="first", + ; rel="prev", + ; rel="next", + ; rel="last", + body: | + [ + { + "created_at": "2020-09-30T17:32:00Z" + }, + { + "created_at": "2020-09-30T17:30:21Z" + }, + { + "created_at": "2020-09-30T17:30:11Z" + } + ] +- status: 200 + headers: + Link: | + ; rel="first", + ; rel="prev", + body: | + [ + { + "created_at": "2020-09-30T17:30:10Z" + }, + { + "created_at": "2020-09-30T17:30:00Z" + } + ] diff --git a/pull/testdata/responses/repo_statuses_none.yml b/pull/testdata/responses/repo_statuses_none.yml new file mode 100644 index 00000000..02965be8 --- /dev/null +++ b/pull/testdata/responses/repo_statuses_none.yml @@ -0,0 +1,3 @@ +- status: 200 + body: | + [] diff --git a/server/config.go b/server/config.go index e12d2437..0d7765a9 100644 --- a/server/config.go +++ b/server/config.go @@ -61,7 +61,14 @@ func (c *LoggingConfig) SetValuesFromEnv(prefix string) { } type CachingConfig struct { + // The maximum size of the the HTTP cache associated with each GitHub + // client. The total amount of memory used for caching is approximately + // this value multiplied by the total number of active GitHub clients. MaxSize datasize.ByteSize `yaml:"max_size"` + + // The size of the global cache for commit push times. Each entry uses + // roughly 100 bytes of memory. + PushedAtSize int `yaml:"pushed_at_size"` } type WorkerConfig struct { diff --git a/server/handler/base.go b/server/handler/base.go index 62e7fb62..34e34dbe 100644 --- a/server/handler/base.go +++ b/server/handler/base.go @@ -34,6 +34,7 @@ type Base struct { githubapp.ClientCreator Installations githubapp.InstallationsService + GlobalCache pull.GlobalCache ConfigFetcher *ConfigFetcher BaseConfig *baseapp.HTTPConfig PullOpts *PullEvaluationOptions @@ -69,7 +70,7 @@ func (b *Base) NewEvalContext(ctx context.Context, installationID int64, loc pul } mbrCtx := NewCrossOrgMembershipContext(ctx, client, loc.Owner, b.Installations, b.ClientCreator) - prctx, err := pull.NewGitHubContext(ctx, mbrCtx, client, v4client, loc) + prctx, err := pull.NewGitHubContext(ctx, mbrCtx, b.GlobalCache, client, v4client, loc) if err != nil { return nil, err } diff --git a/server/server.go b/server/server.go index 74e110fa..3b758ff0 100644 --- a/server/server.go +++ b/server/server.go @@ -32,6 +32,7 @@ import ( "github.com/palantir/go-githubapp/appconfig" "github.com/palantir/go-githubapp/githubapp" "github.com/palantir/go-githubapp/oauth2" + "github.com/palantir/policy-bot/pull" "github.com/palantir/policy-bot/server/handler" "github.com/palantir/policy-bot/version" "github.com/pkg/errors" @@ -42,6 +43,13 @@ import ( const ( DefaultSessionLifetime = 24 * time.Hour + DefaultGitHubTimeout = 10 * time.Second + + DefaultWebhookWorkers = 10 + DefaultWebhookQueueSize = 100 + + DefaultHTTPCacheSize = 50 * datasize.MB + DefaultPushedAtCacheSize = 100_000 ) type Server struct { @@ -85,14 +93,14 @@ func New(c *Config) (*Server, error) { return nil, errors.Wrap(err, "failed to initialize base server") } - maxSize := int64(50 * datasize.MB) + maxSize := int64(DefaultHTTPCacheSize) if c.Cache.MaxSize != 0 { maxSize = int64(c.Cache.MaxSize) } githubTimeout := c.Workers.GithubTimeout if githubTimeout == 0 { - githubTimeout = 10 * time.Second + githubTimeout = DefaultGitHubTimeout } v4URL, err := url.Parse(c.Github.V4APIURL) @@ -130,10 +138,21 @@ func New(c *Config) (*Server, error) { return nil, errors.Wrap(err, "failed to get configured GitHub app") } + pushedAtSize := c.Cache.PushedAtSize + if pushedAtSize == 0 { + pushedAtSize = DefaultPushedAtCacheSize + } + + globalCache, err := pull.NewLRUGlobalCache(pushedAtSize) + if err != nil { + return nil, errors.Wrap(err, "failed to initialize global cache") + } + basePolicyHandler := handler.Base{ ClientCreator: cc, BaseConfig: &c.Server, Installations: githubapp.NewInstallationsService(appClient), + GlobalCache: globalCache, PullOpts: &c.Options, ConfigFetcher: &handler.ConfigFetcher{ @@ -150,12 +169,12 @@ func New(c *Config) (*Server, error) { queueSize := c.Workers.QueueSize if queueSize < 1 { - queueSize = 100 + queueSize = DefaultWebhookQueueSize } workers := c.Workers.Workers if workers < 1 { - workers = 10 + workers = DefaultWebhookWorkers } dispatcher := githubapp.NewEventDispatcher(