Skip to content

Commit

Permalink
Add global cache for pushed times (#612)
Browse files Browse the repository at this point in the history
After testing, I'm worried that listing statuses on busy repositories
will cause too many API requests, even with HTTP caching. Since these
values are safe to cache forever, add a simple in-memory LRU cache in
each server instance to minimize API calls.

This required addional changes to how the PushedAt function works
because we want to cache the evaluation timestamp if we use it. This
meant moving the batch checking in to the PushedAt function instead of
handling it in the approval logic. The new version is closer to how
things used to work when we loaded the pushedDate field from the API.

Also add a test for the PushedAt context function that verifies the
caching behavior.
  • Loading branch information
bluekeyes authored Jul 26, 2023
1 parent 0b093d3 commit 2655fb0
Show file tree
Hide file tree
Showing 13 changed files with 422 additions and 91 deletions.
1 change: 0 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ on:
- '**'

pull_request:
branches: [develop]

release:
types: [published]
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/google/go-github/v53 v53.2.0
github.com/google/go-querystring v1.1.0
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79
github.com/hashicorp/golang-lru v0.6.0
github.com/palantir/go-baseapp v0.5.1
github.com/palantir/go-githubapp v0.18.0
github.com/pkg/errors v0.9.1
Expand All @@ -33,7 +34,6 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/hashicorp/golang-lru v0.6.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-colorable v0.1.12 // indirect
Expand Down
20 changes: 0 additions & 20 deletions policy/approval/approve.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,26 +315,6 @@ func (r *Rule) filterInvalidCandidates(ctx context.Context, prctx pull.Context,
return nil, nil, errors.Wrap(err, "failed to get last push timestamp")
}

// The most recent filtered commit did not have a status. This can happen
// in two situations:
//
// The commit was pushed in a batch where the head commit is ignored...
if lastPushedAt.IsZero() && sha != prctx.HeadSHA() {
sha = prctx.HeadSHA()
lastPushedAt, err = prctx.PushedAt(sha)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to get last push timestamp")
}
}
// ... or the commit (or the head commit) hasn't been evaluated yet
//
// In this case, we're the first evaluation for this commit, so set the
// push time to the curent time, which is guaranteed to be after the actual
// push due to webhook delivery and processing time.
if lastPushedAt.IsZero() {
lastPushedAt = prctx.EvaluationTimestamp()
}

var allowed []*common.Candidate
var dismissed []*common.Dismissal
for _, c := range candidates {
Expand Down
80 changes: 48 additions & 32 deletions policy/approval/approve_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ func TestIsApproved(t *testing.T) {
now := time.Now()
basePullContext := func() *pulltest.Context {
return &pulltest.Context{
EvaluationTimestampValue: now,
AuthorValue: "mhaypenny",
AuthorValue: "mhaypenny",
BodyValue: &pull.Body{
Body: "/no-platform",
CreatedAt: now.Add(10 * time.Second),
Expand Down Expand Up @@ -386,32 +385,6 @@ func TestIsApproved(t *testing.T) {
assertPending(t, prctx, r, "0/1 required approvals. Ignored 6 approvals from disqualified users")
})

t.Run("invalidateCommentOnPushFirstEvaluation", func(t *testing.T) {
prctx := basePullContext()
prctx.EvaluationTimestampValue = now.Add(25 * time.Second)
prctx.HeadSHAValue = "c6ade256ecfc755d8bc877ef22cc9e01745d46bb"
prctx.CommitsValue = []*pull.Commit{
{
SHA: "c6ade256ecfc755d8bc877ef22cc9e01745d46bb",
Author: "mhaypenny",
Committer: "mhaypenny",
},
}

r := &Rule{
Requires: common.Requires{
Count: 1,
Actors: common.Actors{
Users: []string{"comment-approver"},
},
},
}
assertApproved(t, prctx, r, "Approved by comment-approver")

r.Options.InvalidateOnPush = true
assertPending(t, prctx, r, "0/1 required approvals. Ignored 6 approvals from disqualified users")
})

t.Run("invalidateReviewOnPush", func(t *testing.T) {
prctx := basePullContext()
prctx.PushedAtValue = map[string]time.Time{
Expand Down Expand Up @@ -442,9 +415,9 @@ func TestIsApproved(t *testing.T) {

t.Run("ignoreUpdateMergeAfterReview", func(t *testing.T) {
prctx := basePullContext()
prctx.EvaluationTimestampValue = now.Add(25 * time.Second)
prctx.PushedAtValue = map[string]time.Time{
"c6ade256ecfc755d8bc877ef22cc9e01745d46bb": now,
"647c5078288f0ea9de27b5c280f25edaf2089045": now.Add(25 * time.Second),
}
prctx.HeadSHAValue = "647c5078288f0ea9de27b5c280f25edaf2089045"
prctx.CommitsValue = append(prctx.CommitsValue[:1], &pull.Commit{
Expand Down Expand Up @@ -476,9 +449,6 @@ func TestIsApproved(t *testing.T) {

t.Run("ignoreUpdateMergeContributor", func(t *testing.T) {
prctx := basePullContext()
prctx.PushedAtValue = map[string]time.Time{
"c6ade256ecfc755d8bc877ef22cc9e01745d46bb": now.Add(25 * time.Second),
}
prctx.HeadSHAValue = "647c5078288f0ea9de27b5c280f25edaf2089045"
prctx.CommitsValue = append(prctx.CommitsValue[:1], &pull.Commit{
SHA: "647c5078288f0ea9de27b5c280f25edaf2089045",
Expand Down Expand Up @@ -584,6 +554,52 @@ func TestIsApproved(t *testing.T) {
assertApproved(t, prctx, r, "Approved by comment-approver")
})

t.Run("ignoreCommitsInvalidateOnPushBatches", func(t *testing.T) {
prctx := basePullContext()
prctx.PushedAtValue = map[string]time.Time{
"584b9232835ae85a2d8216fb55fd9cad8389092c": now.Add(25 * time.Second),
"7f4cd9d3999061605ce4cf261234e431b52e61ee": now,
}
prctx.HeadSHAValue = "584b9232835ae85a2d8216fb55fd9cad8389092c"
prctx.CommitsValue = []*pull.Commit{
{
SHA: "584b9232835ae85a2d8216fb55fd9cad8389092c",
Author: "mhaypenny",
Committer: "mhaypenny",
Parents: []string{"7f4cd9d3999061605ce4cf261234e431b52e61ee"},
},
{
SHA: "7f4cd9d3999061605ce4cf261234e431b52e61ee",
Author: "mhaypenny",
Committer: "mhaypenny",
Parents: []string{"b2b73abd18bbad3ee3c6d0055697177cf476bf67"},
},
{
SHA: "b2b73abd18bbad3ee3c6d0055697177cf476bf67",
Author: "nstrawnickel",
Committer: "nstrawnickel",
},
}

r := &Rule{
Requires: common.Requires{
Count: 1,
Actors: common.Actors{
Users: []string{"comment-approver"},
},
},
}
assertApproved(t, prctx, r, "Approved by comment-approver")

r.Options.InvalidateOnPush = true
assertPending(t, prctx, r, "0/1 required approvals. Ignored 6 approvals from disqualified users")

r.Options.IgnoreCommitsBy = common.Actors{
Users: []string{"mhaypenny"},
}
assertApproved(t, prctx, r, "Approved by comment-approver")
})

t.Run("ignoreEditedReviewComments", func(t *testing.T) {
prctx := basePullContext()

Expand Down
1 change: 0 additions & 1 deletion pull/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ type Context interface {

// PushedAt returns the time at which the commit with sha was pushed. The
// returned time may be after the actual push time, but must not be before.
// PushedAt returns the zero time if the push time for is unknown.
PushedAt(sha string) (time.Time, error)

// Comments lists all comments on a Pull Request. The comment order is
Expand Down
126 changes: 112 additions & 14 deletions pull/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func (loc Locator) toV4(ctx context.Context, client *githubv4.Client) (*v4PullRe
v4.HeadRepository.Name = loc.Value.GetHead().GetRepo().GetName()
v4.HeadRepository.Owner.Login = loc.Value.GetHead().GetRepo().GetOwner().GetLogin()
v4.BaseRefName = loc.Value.GetBase().GetRef()
v4.BaseRepository.DatabaseID = loc.Value.GetBase().GetRepo().GetID()
v4.IsDraft = loc.Value.GetDraft()
return &v4, nil
}
Expand All @@ -115,9 +116,10 @@ func (loc Locator) toV4(ctx context.Context, client *githubv4.Client) (*v4PullRe
type GitHubContext struct {
MembershipContext

ctx context.Context
client *github.Client
v4client *githubv4.Client
ctx context.Context
client *github.Client
v4client *githubv4.Client
globalCache GlobalCache

evalTimestamp time.Time

Expand Down Expand Up @@ -145,7 +147,14 @@ type GitHubContext struct {
// obtain information. It caches responses for the lifetime of the context. The
// pull request passed to the context must contain at least the base repository
// and the number or the function panics.
func NewGitHubContext(ctx context.Context, mbrCtx MembershipContext, client *github.Client, v4client *githubv4.Client, loc Locator) (Context, error) {
func NewGitHubContext(
ctx context.Context,
mbrCtx MembershipContext,
globalCache GlobalCache,
client *github.Client,
v4client *githubv4.Client,
loc Locator,
) (Context, error) {
if loc.Owner == "" || loc.Repo == "" || loc.Number == 0 {
panic("pull request object does not contain full identifying information")
}
Expand All @@ -158,9 +167,10 @@ func NewGitHubContext(ctx context.Context, mbrCtx MembershipContext, client *git
return &GitHubContext{
MembershipContext: mbrCtx,

ctx: ctx,
client: client,
v4client: v4client,
ctx: ctx,
client: client,
v4client: v4client,
globalCache: globalCache,

evalTimestamp: time.Now(),

Expand Down Expand Up @@ -328,20 +338,105 @@ func (ghc *GitHubContext) Commits() ([]*Commit, error) {
}

func (ghc *GitHubContext) PushedAt(sha string) (time.Time, error) {
repoID := ghc.pr.BaseRepository.DatabaseID
if ghc.pushedAt == nil {
ghc.pushedAt = make(map[string]time.Time)
}

// This list contains the commits that may contain the push time. When
// commits are pushed in a batch, only the head commit of the batch has a
// recorded time in GitHub, but all commits in the batch share the same
// timestamp. This list lets us cache the times for all of the commits in
// the batch that we considered while looking for the head of the batch.
candidateSHAs := []string{sha}

var pushedAt time.Time
var err error
for {
// Starting from the most recent SHA, search towards the head of the
// pull request until we find a commit that's either already in the
// cache or that has status checks. This commit is the head of the
// batch containing the initial commit.
sha := candidateSHAs[len(candidateSHAs)-1]

pushedAt, err = ghc.tryPushedAt(repoID, sha)
if err != nil {
return time.Time{}, err
}
if !pushedAt.IsZero() {
break
}

c, err := ghc.nextChildCommit(sha)
if err != nil {
return time.Time{}, err
}
if c == nil {
break
}
candidateSHAs = append(candidateSHAs, c.SHA)
}

// After looking at all relevant commits, if we found no push time, default
// to the evaluation timestamp (i.e. now)
if pushedAt.IsZero() {
pushedAt = ghc.EvaluationTimestamp()
}

for _, sha := range candidateSHAs {
ghc.pushedAt[sha] = pushedAt
if gc := ghc.globalCache; gc != nil {
gc.SetPushedAt(repoID, sha, pushedAt)
}
}

return pushedAt, nil
}

// tryPushedAt attempts to get the push time for a commit from the local cache,
// the global cache, or the GitHub API. It returns the zero time if it could
// not find a push time in any source.
//
// We prefer the local cache to the global cache because it helps when there is
// no global cache and avoids the possibility that the global cache evicts
// entries during the lifetime of the context.
//
// The local cache must be initialized before calling tryPushedAt.
func (ghc *GitHubContext) tryPushedAt(repoID int64, sha string) (time.Time, error) {
if t, ok := ghc.pushedAt[sha]; ok {
return t, nil
}
if gc := ghc.globalCache; gc != nil {
if t, ok := gc.GetPushedAt(repoID, sha); ok {
ghc.pushedAt[sha] = t
return t, nil
}
}
return ghc.loadPushedAt(sha)
}

// nextChildCommit returns the child commit for the given SHA or nil if the SHA
// is the head of the pull request. A child commit is a commit that has SHA as
// a parent.
func (ghc *GitHubContext) nextChildCommit(sha string) (*Commit, error) {
if sha == ghc.HeadSHA() {
// Optimization: exit early if asked about the head SHA
return nil, nil
}

pushedAt, err := ghc.loadPushedAt(sha)
commits, err := ghc.Commits()
if err != nil {
return time.Time{}, err
return nil, err
}

if ghc.pushedAt == nil {
ghc.pushedAt = make(map[string]time.Time)
for _, c := range commits {
for _, parentSHA := range c.Parents {
if sha == parentSHA {
return c, nil
}
}
}
ghc.pushedAt[sha] = pushedAt
return pushedAt, nil
return nil, nil
}

func (ghc *GitHubContext) Comments() ([]*Comment, error) {
Expand Down Expand Up @@ -916,7 +1011,10 @@ type v4PullRequest struct {
Owner v4Actor
}

BaseRefName string
BaseRefName string
BaseRepository struct {
DatabaseID int64
}
}

type v4PageInfo struct {
Expand Down
Loading

0 comments on commit 2655fb0

Please sign in to comment.