Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Handle errors #44

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions examples/cmd/logtransport.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
package main

import (
"bytes"
"io/ioutil"
"net/http"
"time"

"gopkg.in/src-d/go-log.v1"

"github.com/src-d/metadata-retrieval/utils"
Comment on lines +8 to +9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to previous reviews by @lwsanty the metadata-retrieval variables should be in the middle. I am guessing it is better if we build a linter for that matter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. Will do.

)

func setLogTransport(client *http.Client, logger log.Logger) {
t := &logTransport{client.Transport, logger}
client.Transport = t
client.Transport = &logTransport{client.Transport, logger}
}

type logTransport struct {
Expand All @@ -22,15 +21,23 @@ type logTransport struct {
func (t *logTransport) RoundTrip(r *http.Request) (*http.Response, error) {
t0 := time.Now()

reqBody, _ := ioutil.ReadAll(r.Body)
r.Body = ioutil.NopCloser(bytes.NewBuffer(reqBody))

// Since the underlaying Transport.RoundTrip might consume the Request, and it's also needed later to log the process,
// it is needed to make an snapshot of the Request.Body in advance; otherwise, its will be no longer accessible.
bakupReqBody := utils.BackupRequestBody(r)
resp, err := t.T.RoundTrip(r)
restoreRequestBody(r, bakupReqBody)

// Since the outter Transport.RoundTrip calling this one might also consume the Request, and it is also needed here to log
// the process, it will be done over a snapshot of the Request.Body; otherwise, its will be no longer accessible by outer Transports.
reqBody, _ := utils.ReadRequestAndRestore(r)

if err != nil {
return resp, err
}

respBody, err := ioutil.ReadAll(resp.Body)
// Since the Response has been read, it is needed to restore it
// before reusing it; otherwise, its body will be unreadable again in other Transports.
respBody, err := utils.ReadResponseAndRestore(resp)
if err != nil {
return resp, err
}
Expand All @@ -39,7 +46,5 @@ func (t *logTransport) RoundTrip(r *http.Request) (*http.Response, error) {
log.Fields{"elapsed": time.Since(t0), "response-code": resp.StatusCode, "url": r.URL, "request-body": string(reqBody), "response-body": string(respBody)},
).Debugf("HTTP response")

resp.Body = ioutil.NopCloser(bytes.NewBuffer(respBody))

return resp, err
}
3 changes: 3 additions & 0 deletions examples/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ func (c *DownloaderCmd) buildDownloadersPool(logger log.Logger, db *sql.DB) (*Do
setLogTransport(client, logger)
}

github.SetRateLimitTransport(client, logger)
github.SetRetryTransport(client)

var d *github.Downloader
var err error
if db == nil {
Expand Down
10 changes: 0 additions & 10 deletions github/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,6 @@ type Downloader struct {
// in the given DB. The HTTP client is expected to have the proper
// authentication setup
func NewDownloader(httpClient *http.Client, db *sql.DB) (*Downloader, error) {
// TODO: is the ghsync rate limited client needed?

t := &retryTransport{httpClient.Transport}
httpClient.Transport = t

return &Downloader{
storer: &store.DB{DB: db},
client: githubv4.NewClient(httpClient),
Expand All @@ -73,11 +68,6 @@ func NewDownloader(httpClient *http.Client, db *sql.DB) (*Downloader, error) {
// metadata to stdout. The HTTP client is expected to have the proper
// authentication setup
func NewStdoutDownloader(httpClient *http.Client) (*Downloader, error) {
// TODO: is the ghsync rate limited client needed?

t := &retryTransport{httpClient.Transport}
httpClient.Transport = t

return &Downloader{
storer: &store.Stdout{},
client: githubv4.NewClient(httpClient),
Expand Down
219 changes: 219 additions & 0 deletions github/ratelimit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
package github

import (
"encoding/json"
"fmt"
"net/http"
"strconv"
"strings"
"sync"
"time"

"gopkg.in/src-d/go-log.v1"

"github.com/src-d/metadata-retrieval/utils"
)

var (
defaultAbuseRetryAfter = time.Minute
)

// RateLimitTransport implements GitHub GraphQL API v4 best practices for avoiding rate limits
// https://developer.github.com/v4/guides/resource-limitations/#rate-limit
// https://developer.github.com/v3/#abuse-rate-limits
// RateLimitTransport will process a Request, and if the response could not be fetched
// because of a RateLimit or an AbuseRateLimit, it will return an ErrorRateLimit
// and it no longer process any further Requests until the Limit has been expired.
// RateLimitTransport does not retry; that behaviour must be implemented by another Transport
// Each client (with its own token) should use its own RateLimitTransport
type RateLimitTransport struct {
transport http.RoundTripper
lockedUntil time.Time
mutex sync.Mutex
logger log.Logger
}

// SetRateLimitTransport wraps the passed client.Transport with a RateLimitTransport
func SetRateLimitTransport(client *http.Client, logger log.Logger) {
client.Transport = NewRateLimitTransport(client.Transport, logger)
}

// NewRateLimitTransport returns a new NewRateLimitTransport, who will call the passed
// http.RoundTripper to process the http.Request
// Each client (with its own token) should use its own RateLimitTransport
func NewRateLimitTransport(rt http.RoundTripper, logger log.Logger) *RateLimitTransport {
return &RateLimitTransport{
transport: rt,
logger: logger,
}
}

// RoundTrip executes a single HTTP transaction, returning a Response for the provided Request.
// If the request hitted an API RateLimit or Abuse, it will return an ErrorRateLimit
// and it no longer process any further Requests until the Limit has been expired.
func (rt *RateLimitTransport) RoundTrip(req *http.Request) (*http.Response, error) {
// Make Requests serially
rt.lock()
defer rt.unlock()

if time.Now().Before(rt.lockedUntil) {
rt.logger.Infof("rate limit reached, sleeping until %s", rt.lockedUntil)
time.Sleep(rt.lockedUntil.Sub(time.Now()))
}

resp, err := rt.transport.RoundTrip(req)
if err != nil {
return resp, err
}

if errRateLimit := checkResponseRateLimit(resp, rt.logger); errRateLimit != nil {
rt.lockedUntil = errRateLimit.when()
rt.logger.Errorf(errRateLimit, "locking transport for %s", errRateLimit.when().Sub(time.Now()))
return nil, errRateLimit
}

return resp, nil
}

func (rt *RateLimitTransport) lock() {
rt.mutex.Lock()
}

func (rt *RateLimitTransport) unlock() {
rt.mutex.Unlock()
}

// checkRateLimit checks the API response and returns a whener error if a rate limit was found:
// - *ErrRateLimit is returned when the request failed because of a RateLimit
// https://developer.github.com/v4/guides/resource-limitations/#rate-limit
// - *ErrAbuseRateLimit is returned when the request triggered a GitHub abuse detection mechanism
// https://developer.github.com/v3/#abuse-rate-limits
func checkResponseRateLimit(resp *http.Response, logger log.Logger) whener {
if err := asErrRateLimit(resp); err != nil {
return err
}

if err := asErrAbuseRateLimit(resp, logger); err != nil {
return err
}

return nil
}

// ErrRateLimit is returned when a request failed because of a RateLimit
// https://developer.github.com/v4/guides/resource-limitations/#rate-limit
type ErrRateLimit struct {
errRetryLater
}

func (e *ErrRateLimit) Error() string {
return fmt.Sprintf("API rate limit exceeded; %s", e.errRetryLater.Error())
}

// ErrAbuseRateLimit is returned when a request triggers any GitHub abuse detection mechanism
// https://developer.github.com/v3/#abuse-rate-limits
type ErrAbuseRateLimit struct {
errRetryLater
}

func (e *ErrAbuseRateLimit) Error() string {
return fmt.Sprintf("abuse detection mechanism triggered; %s", e.errRetryLater.Error())
}

type errRetryLater struct {
retryAfter time.Time
}

func (e *errRetryLater) Error() string {
return fmt.Sprintf("retry after %s", e.retryAfter)
}

func (e *errRetryLater) when() time.Time {
return e.retryAfter
}

type whener interface {
error
when() time.Time
}

/*
An apiErrorResponse reports one or more errors caused by an API request.
GitHub API docs: https://developer.github.com/v3/#client-errors
*/
type apiErrorResponse struct {
Message string `json:"message,omitempty"`
DocumentationURL string `json:"documentation_url,omitempty"`
Errors []struct {
Type string `json:"type"`
Message string `json:"message"`
} `json:"errors,omitempty"`
}

func (aer *apiErrorResponse) isAbuseRateLimit() bool {
return strings.Contains(aer.DocumentationURL, "abuse") ||
strings.Contains(aer.Message, "abuse detection")
}

// asErrAbuseRateLimit will return an ErrAbuseRateLimit if the status Response
// is 403 Forbidden and there is a valid 'Retry-After' Response header
// It will also return an ErrAbuseRateLimit with a default RetryAfter if the
// response.Body can be mapped as an apiErrorResponse, and its Message or Documentation
// mentions the abuse error.
func asErrAbuseRateLimit(resp *http.Response, logger log.Logger) *ErrAbuseRateLimit {
if resp.StatusCode != http.StatusForbidden {
return nil
}

retryInHeader := resp.Header.Get("Retry-After")
retryIn, err := strconv.Atoi(retryInHeader)
if err == nil {
return &ErrAbuseRateLimit{
errRetryLater{time.Now().Add(time.Duration(retryIn) * time.Second)},
}
}

errorResponse := &apiErrorResponse{}
err = readAPIErrorResponse(resp, errorResponse)
if err == nil && errorResponse.isAbuseRateLimit() {
logger.Warningf("error reading 'Retry-After=%s' header from the '403 Forbidden' response, using default '%s': %s",
retryInHeader,
defaultAbuseRetryAfter,
err,
)

return &ErrAbuseRateLimit{
errRetryLater{time.Now().Add(defaultAbuseRetryAfter)},
}
}

logger.Warningf("403 Forbidden response got, but could not be read as an Abuse Rate Limit response")
return nil
}

// asErrRateLimit will return an ErrRateLimit if the Response 'X-RateLimit-Remaining' header is 0,
// and the X-RateLimit-Reset' header contains a valid reset info
func asErrRateLimit(resp *http.Response) *ErrRateLimit {
rateLimitResetHeader := resp.Header.Get("X-RateLimit-Reset")
rateLimitReset, errReadingReset := strconv.Atoi(rateLimitResetHeader)
rateLimitRemainingHeader := resp.Header.Get("X-RateLimit-Remaining")
rateLimitRemaining, errReadingRemaining := strconv.Atoi(rateLimitRemainingHeader)

if errReadingReset == nil && errReadingRemaining == nil && rateLimitRemaining == 0 {
return &ErrRateLimit{
errRetryLater{time.Unix(int64(rateLimitReset)+1, 0)},
}
}

return nil
}

// readAPIErrorResponse reads the response.Body into the passed errorResponse
func readAPIErrorResponse(resp *http.Response, errorResponse *apiErrorResponse) error {
bodyContent, err := utils.ReadResponseAndRestore(resp)
if err != nil {
return err
}

return json.Unmarshal(bodyContent, errorResponse)
}
Loading