Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of key-value rate limits #6947

Merged
merged 42 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
8e41eea
WIP
beautifulentropy Jun 14, 2023
67ed1e9
Flesh out the RateLimit methods
beautifulentropy Jun 15, 2023
759479f
Store limit prefixes as integers
beautifulentropy Jun 16, 2023
30e9b01
Improve errors.
beautifulentropy Jun 16, 2023
e834925
Add and test YAML loading
beautifulentropy Jun 16, 2023
5593b49
Refunds, Resets, and Initialization
beautifulentropy Jun 16, 2023
2d148b9
Moar coverage and some small refactors.
beautifulentropy Jun 17, 2023
32741c8
Refunds should return full decisions
beautifulentropy Jun 20, 2023
d44d42c
Initialization is hard.
beautifulentropy Jun 20, 2023
529bfba
Another round of coverage improvements.
beautifulentropy Jun 20, 2023
8178cf1
Avoid shadowing and fix lints.
beautifulentropy Jun 20, 2023
a5a0d15
Addressing comments and adding better checks for limit overrides
beautifulentropy Jun 21, 2023
fb78d4d
Typos
beautifulentropy Jun 21, 2023
5c8d04c
Add some cautionary panics inside of gcra
beautifulentropy Jun 21, 2023
a2844b8
Typos
beautifulentropy Jun 21, 2023
24673e4
Document enums to set stage for the last three id validators
beautifulentropy Jun 22, 2023
b1c2f35
Typo
beautifulentropy Jun 22, 2023
de93969
Addressing comments (WIP)
beautifulentropy Jun 23, 2023
b2f9081
Addressing comments.
beautifulentropy Jun 26, 2023
3b1b762
Lints.
beautifulentropy Jun 26, 2023
1a09f68
We cannot support certain overrides
beautifulentropy Jun 26, 2023
94ec876
Typo.
beautifulentropy Jun 26, 2023
95730f0
Revert changes to policy.
beautifulentropy Jun 26, 2023
47622b3
Address limit.go comments.
beautifulentropy Jun 27, 2023
c413abf
Changes to limit Names, validations, and tests.
beautifulentropy Jun 28, 2023
07699d4
More test cases and a README
beautifulentropy Jun 28, 2023
1ac9b0e
Indent bullets
beautifulentropy Jun 28, 2023
1e7a1ce
Small typos, etc.
beautifulentropy Jun 28, 2023
1c0301e
Informated????
beautifulentropy Jun 28, 2023
6ac0e5e
:woman_facepalming:
beautifulentropy Jun 28, 2023
ea75b65
Addressing comments, two still outstanding.
beautifulentropy Jul 13, 2023
2f69601
Addressed final comment.
beautifulentropy Jul 13, 2023
7442f18
Merge branch 'main' into rate-limits-v2
beautifulentropy Jul 13, 2023
603f187
Update ratelimits/README.md
beautifulentropy Jul 17, 2023
c0289c5
Update ratelimits/README.md
beautifulentropy Jul 17, 2023
897f754
Update ratelimits/README.md
beautifulentropy Jul 17, 2023
5ef087a
Update ratelimits/README.md
beautifulentropy Jul 17, 2023
ca04728
Address comemnts.
beautifulentropy Jul 19, 2023
d052e0c
Merge branch 'main' into rate-limits-v2
beautifulentropy Jul 19, 2023
9ec09f0
unnecessary conversion
beautifulentropy Jul 19, 2023
bb77c0b
Add fractional refund test.
beautifulentropy Jul 20, 2023
861161f
:woman_facepalming:
beautifulentropy Jul 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/expiration-mailer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func (m *mailer) updateLastNagTimestampsChunk(ctx context.Context, certs []*x509
}

func (m *mailer) certIsRenewed(ctx context.Context, names []string, issued time.Time) (bool, error) {
namehash := sa.HashNames(names)
namehash := core.HashNames(names)

var present bool
err := m.dbMap.WithContext(ctx).SelectOne(
Expand Down
8 changes: 4 additions & 4 deletions cmd/expiration-mailer/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ func TestNoContactCertIsRenewed(t *testing.T) {
setupDBMap, err := sa.DBMapForTest(vars.DBConnSAFullPerms)
test.AssertNotError(t, err, "setting up DB")
err = setupDBMap.Insert(&core.FQDNSet{
SetHash: sa.HashNames(names),
SetHash: core.HashNames(names),
Serial: core.SerialToString(serial2),
Issued: testCtx.fc.Now().Add(time.Hour),
Expires: expires.Add(time.Hour),
Expand Down Expand Up @@ -576,13 +576,13 @@ func addExpiringCerts(t *testing.T, ctx *testCtx) []certDERWithRegID {
test.AssertNotError(t, err, "creating cert D")

fqdnStatusD := &core.FQDNSet{
SetHash: sa.HashNames(certDNames),
SetHash: core.HashNames(certDNames),
Serial: serial4String,
Issued: ctx.fc.Now().AddDate(0, 0, -87),
Expires: ctx.fc.Now().AddDate(0, 0, 3),
}
fqdnStatusDRenewed := &core.FQDNSet{
SetHash: sa.HashNames(certDNames),
SetHash: core.HashNames(certDNames),
Serial: serial5String,
Issued: ctx.fc.Now().AddDate(0, 0, -3),
Expires: ctx.fc.Now().AddDate(0, 0, 87),
Expand Down Expand Up @@ -743,7 +743,7 @@ func TestCertIsRenewed(t *testing.T) {
t.Fatal(err)
}
fqdnStatus := &core.FQDNSet{
SetHash: sa.HashNames(testData.DNS),
SetHash: core.HashNames(testData.DNS),
Serial: testData.stringSerial,
Issued: testData.NotBefore,
Expires: testData.NotAfter,
Expand Down
8 changes: 8 additions & 0 deletions core/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,14 @@ func UniqueLowerNames(names []string) (unique []string) {
return
}

// HashNames returns a hash of the names requested. This is intended for use
// when interacting with the orderFqdnSets table and rate limiting.
func HashNames(names []string) []byte {
names = UniqueLowerNames(names)
hash := sha256.Sum256([]byte(strings.Join(names, ",")))
return hash[:]
}

// LoadCert loads a PEM certificate specified by filename or returns an error
func LoadCert(filename string) (*x509.Certificate, error) {
certPEM, err := os.ReadFile(filename)
Expand Down
28 changes: 28 additions & 0 deletions core/util_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package core

import (
"bytes"
"encoding/json"
"fmt"
"math"
Expand Down Expand Up @@ -206,3 +207,30 @@ func TestRetryBackoff(t *testing.T) {
assertBetween(float64(backoff), float64(expected)*0.8, float64(expected)*1.2)

}

func TestHashNames(t *testing.T) {
// Test that it is deterministic
h1 := HashNames([]string{"a"})
h2 := HashNames([]string{"a"})
test.AssertByteEquals(t, h1, h2)

// Test that it differentiates
h1 = HashNames([]string{"a"})
h2 = HashNames([]string{"b"})
test.Assert(t, !bytes.Equal(h1, h2), "Should have been different")

// Test that it is not subject to ordering
h1 = HashNames([]string{"a", "b"})
h2 = HashNames([]string{"b", "a"})
test.AssertByteEquals(t, h1, h2)

// Test that it is not subject to case
h1 = HashNames([]string{"a", "b"})
h2 = HashNames([]string{"A", "B"})
test.AssertByteEquals(t, h1, h2)

// Test that it is not subject to duplication
h1 = HashNames([]string{"a", "a"})
h2 = HashNames([]string{"a"})
test.AssertByteEquals(t, h1, h2)
}
132 changes: 81 additions & 51 deletions ratelimits/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
# Configuring and Storing Key-Value Rate Limits

beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
## Rate Limit Structure

All rate limits use a token-bucket model. The metaphor is that each limit is
represented by a bucket which holds tokens. Each request removes some number of
tokens from the bucket, or is denied if there aren't enough tokens to remove.
Over time, new tokens are added to the bucket at a steady rate, until the bucket
is full. The _burst_ parameter of a rate limit indicates the maximum capacity of
a bucket: how many tokens can it hold before new ones stop being added.
Therefore, this also indicates how many requests can be made in a single burst
before a full bucket is completely emptied. The _count_ and _period_ parameters
indicate the rate at which new tokens are added to a bucket: every period, count
tokens will be added. Therefore, these also indicate the steady-state rate at
which a client which has exhausted its quota can make requests: one token every
(period / count) duration.

## Default Limit Settings

Each root key directly corresponds to a `Name` enumeration as detailed in
`name.go`. The `Name` enum is used to identify the particular limit. The `count`
value is used to determine the maximum number of requests allowed, within the
given `period` of time. The `burst` value is used to determine the maximum
number of requests allowed, at any given time.
Each key directly corresponds to a `Name` enumeration as detailed in `name.go`.
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
The Name enum is used to identify the particular limit. The parameters of a
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
default limit are the values that will be used for all buckets that do not have
an explicit override (see below).

```yaml
NewRegistrationsPerIPAddress:
Expand All @@ -21,27 +35,31 @@ NewOrdersPerAccount:

## Override Limit Settings
aarongable marked this conversation as resolved.
Show resolved Hide resolved

Each root key represents a specific bucket, consisting of two elements: `name`
and `id`. The `name` here refers to the `Name` of the particular limit, while
the `id` is the client's identifier. The format of the `id` is dependent on the
limit. For example, the `id` for 'NewRegistrationsPerIPAddress' is a subscriber
IP address, while the `id` for 'NewOrdersPerAccount' is the subscriber's
registration ID.
Each override key represents a specific bucket, consisting of two elements:
_name_ and _id_. The name here refers to the Name of the particular limit, while
the id is a client identifier. The format of the id is dependent on the limit.
For example, the id for 'NewRegistrationsPerIPAddress' is a subscriber IP
address, while the id for 'NewOrdersPerAccount' is the subscriber's registration
ID.

```yaml
NewRegistrationsPerIPAddress:10.0.0.2:
burst: 40
burst: 20
count: 40
period: 1s
NewOrdersPerAccount:12345678
burst: 600
NewOrdersPerAccount:12345678:
burst: 300
count: 600
period: 180m
```

The above example overrides the default limits for specific subscribers. They
will be allowed to make twice as many requests as the default limits but will
still be limited to the same burst as a regular subscriber.
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved

### Id Formats in Limit Override Settings

Id formats vary based on the 'Name' enumeration. Below are examples for each
Id formats vary based on the Name enumeration. Below are examples for each
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
format:

#### ipAddress
Expand All @@ -52,7 +70,8 @@ Example: `NewRegistrationsPerIPAddress:10.0.0.1`

#### ipv6RangeCIDR

A valid IPv6 range in CIDR notation with a /48 mask.
A valid IPv6 range in CIDR notation with a /48 mask. A /48 range is typically
assigned to a single subscriber.

Example: `NewRegistrationsPerIPv6Range:2001:0db8:0000::/48`

Expand All @@ -79,7 +98,7 @@ Example: `CertificatesPerFQDNSetPerAccount:12345678:example.com,example.org`

Bucket keys are the key used to lookup the bucket for a given limit and
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
subscriber. Bucket keys are formatted similarly to the overrides but with a
slight difference: the limit `Names` do not carry the string form of each limit.
slight difference: the limit Names do not carry the string form of each limit.
Instead, they apply the Name enum equivalent for every limit.
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved

So, instead of:
Expand All @@ -101,6 +120,25 @@ default/override limit.

## How Limits are Applied

beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
Although rate limit buckets are configured in terms of tokens, we do not
actually keep track of the number of tokens in each bucket. Instead, we track
the Theoretical Arrival Time (TAT) at which the bucket will be full again. If
the TAT is in the past, the bucket is full. If the TAT is in the future, some
number of tokens have been spent and the bucket is slowly refilling. If the TAT
pgporada marked this conversation as resolved.
Show resolved Hide resolved
is far enough in the future (specifically, more than `burst * (period / count)`)
in the future), then the bucket is completely empty and requests will be denied.

Additional terminology:

- **burst offset** is the duration of time it takes for a bucket to go from
empty to full (`burst * (period / count)`).
- **emission interval** is the interval at which tokens are added to a bucket
(`period / count`). This is also the steady-state rate at which requests can
be made without being denied even once the burst has been exhausted.
- **cost** is the number of tokens removed from a bucket for a single request.
pgporada marked this conversation as resolved.
Show resolved Hide resolved
- **cost increment** is the duration of time the TAT is advanced to account
for the cost of the request (`cost * emission interval`).

For the purposes of this example, subscribers originating from a specific IPv4
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
address are allowed 20 requests to the newFoo endpoint per second, with a
maximum burst of 20 requests at any point-in-time.
Expand All @@ -109,43 +147,35 @@ A subscriber calls the newFoo endpoint for the first time with an IP address of
172.23.45.22. Here's what happens:

1. The subscriber's IP address is used to generate a bucket key in the form of
`NewFoosPerIPAddress:172.23.45.22`. The Theoretical Arrival Time (TAT) for
this bucket is set to the current time.
'NewFoosPerIPAddress:172.23.45.22'.
pgporada marked this conversation as resolved.
Show resolved Hide resolved

2. The subscriber's bucket is initialized with 19 tokens, as 1 token is removed
to account for the current request. The request is approved, and the TAT is
updated. The TAT is set to the current time, plus the inter-request time
(which would be 1/20th of a second if we are limiting to 20 requests per
second).
2. The request is approved and the 'NewFoosPerIPAddress:172.23.45.22' bucket is
initialized with 19 tokens, as 1 token has been removed to account for the
cost of the current request. To accomplish this, the initial TAT is set to
the current time plus the _cost increment_ (which is 1/20th of a second if we
are limiting to 20 requests per second).

3. The subscriber is informed that their request was successful. Their bucket:
3. Bucket 'NewFoosPerIPAddress:172.23.45.22':
- will reset to full in 50ms (1/20th of a second),
- they can make another newFoo request immediately,
- they can make 19 more requests in the next 50ms,
- they do not need to wait between requests,
- if they make 19 requests in the next 50ms they will need to wait 50ms before
making another request and 1s to make 20 more requests,
- thus if they make 1 request every 50ms, they will never be denied.

Now, the subscriber makes another request immediately:

4. The TAT at bucket key `NewFoosPerIPAddress:172.23.45.22` is compared against
the current time and the burst offset. If the current time is less than the
TAT minus the burst offset, this implies the request would surpass the rate
limit and thus, it's rejected. If the current time is equal to or greater
than the TAT minus the burst offset, the request is allowed.

5. A token is deducted from the subscriber's bucket and the TAT is updated
similarly to the first request.

If the subscriber makes requests rapidly, causing the token count to hit 0
before 50ms has passed, here's what would happen during their next request:

6. The rate limiter checks the TAT. If the current time is less than (TAT -
burst offset), the request is rejected. Since the subscriber has already
exhausted their 20 requests in <50ms, the current time is indeed less than
(TAT - burst offset). Therefore, the request is rejected to maintain the rate
limit.
- will allow another newFoo request immediately,
- will allow between 1 and 19 more requests in the next 50ms,
- will reject the 20th request made in the next 50ms,
- and will allow 1 request every 50ms, indefinitely.

The subscriber makes another request 5ms later:

4. The TAT at bucket key 'NewFoosPerIPAddress:172.23.45.22' is compared against
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
the current time and the _burst offset_. The current time is greater than the
TAT minus the cost increment. Therefore, the request is approved.

5. The TAT at bucket key 'NewFoosPerIPAddress:172.23.45.22' is advanced by the
cost increment to account for the cost of the request.

The subscriber makes a total of 18 requests over the next 44ms:

6. The current time is less than the TAT at bucket key
'NewFoosPerIPAddress:172.23.45.22' minus the burst offset, thus the request
is rejected.

beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
This mechanism allows for bursts of traffic but also ensures that the average
rate of requests stays within the prescribed limits over time.
52 changes: 25 additions & 27 deletions ratelimits/gcra.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ func divThenRound(x, y int64) int64 {
// TAT. The cost must be 0 or greater and <= the burst capacity of the limit.
func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision {
if cost < 0 || cost > rl.Burst {
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
// The condition above is the union of the conditions checked in Check
// and Spend methods of Limiter. If this panic is reached, it means that
// the caller has introduced a bug.
panic("invalid cost for maybeSpend")
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
}
nowUnix := clk.Now().UnixNano()
Expand All @@ -31,32 +34,19 @@ func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
}

// Compute the cost increment.
emissionInterval := divThenRound(rl.Period.Nanoseconds(), rl.Count)
costIncrement := emissionInterval * cost
costIncrement := rl.emissionInterval * cost

// Deduct the cost to find the new TAT and residual capacity.
newTAT := tatUnix + costIncrement
burstOffset := emissionInterval * rl.Burst
difference := nowUnix - (newTAT - burstOffset)
residual := divThenRound(difference, emissionInterval)
difference := nowUnix - (newTAT - rl.burstOffset)

if costIncrement <= 0 && residual == 0 {
// Edge case: no cost to consume and no capacity to consume it from.
return &Decision{
Allowed: false,
Remaining: 0,
RetryIn: time.Duration(emissionInterval),
ResetIn: time.Duration(tatUnix - nowUnix),
newTAT: time.Unix(0, tatUnix).UTC(),
}
}

if residual < 0 {
var residual int64
if difference < 0 {
// Too little capacity to satisfy the cost, deny the request.
remaining := divThenRound(nowUnix-(tatUnix-burstOffset), emissionInterval)
residual = divThenRound(nowUnix-(tatUnix-rl.burstOffset), rl.emissionInterval)
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
return &Decision{
Allowed: false,
Remaining: int(remaining),
Remaining: int(residual),
RetryIn: -time.Duration(difference),
ResetIn: time.Duration(tatUnix - nowUnix),
newTAT: time.Unix(0, tatUnix).UTC(),
Expand All @@ -65,9 +55,10 @@ func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision

// There is enough capacity to satisfy the cost, allow the request.
var retryIn time.Duration
residual = divThenRound(difference, rl.emissionInterval)
if residual == 0 {
// This request will empty the bucket.
retryIn = time.Duration(emissionInterval)
retryIn = time.Duration(rl.emissionInterval)
}
return &Decision{
Allowed: true,
Expand All @@ -84,19 +75,27 @@ func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
// limit. A partial refund is still considered successful.
func maybeRefund(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision {
if cost <= 0 || cost > rl.Burst {
// The condition above is checked in the Refund method of Limiter. If
// this panic is reached, it means that the caller has introduced a bug.
panic("invalid cost for maybeRefund")
}
nowUnix := clk.Now().UnixNano()
tatUnix := tat.UnixNano()

// If the TAT is in the past, use the current time as the starting point.
// The TAT must be in the future to refund capacity.
if nowUnix > tatUnix {
tatUnix = nowUnix
// The TAT is in the past, therefore the bucket is full.
return &Decision{
Allowed: false,
Remaining: int(rl.Burst),
RetryIn: time.Duration(0),
ResetIn: time.Duration(0),
newTAT: tat,
}
}

// Compute the refund increment.
emissionInterval := divThenRound(rl.Period.Nanoseconds(), rl.Count)
refundIncrement := emissionInterval * cost
refundIncrement := rl.emissionInterval * cost

// Subtract the refund increment from the TAT to find the new TAT.
newTAT := tatUnix - refundIncrement
Expand All @@ -107,9 +106,8 @@ func maybeRefund(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
}

// Calculate the new capacity.
burstOffset := emissionInterval * rl.Burst
difference := nowUnix - (newTAT - burstOffset)
residual := divThenRound(difference, emissionInterval)
difference := nowUnix - (newTAT - rl.burstOffset)
residual := divThenRound(difference, rl.emissionInterval)

return &Decision{
Allowed: (newTAT != tatUnix),
Expand Down
Loading