letsencrypt · beautifulentropy · Jul 21, 2023 · Jun 14, 2023 · Jun 15, 2023 · Jun 16, 2023
@@ -304,7 +304,7 @@ func (m *mailer) updateLastNagTimestampsChunk(ctx context.Context, certs []*x509
 }
 
 func (m *mailer) certIsRenewed(ctx context.Context, names []string, issued time.Time) (bool, error) {
-	namehash := sa.HashNames(names)
+	namehash := core.HashNames(names)
 
 	var present bool
 	err := m.dbMap.WithContext(ctx).SelectOne(

@@ -351,7 +351,7 @@ func TestNoContactCertIsRenewed(t *testing.T) {
 	setupDBMap, err := sa.DBMapForTest(vars.DBConnSAFullPerms)
 	test.AssertNotError(t, err, "setting up DB")
 	err = setupDBMap.Insert(&core.FQDNSet{
-		SetHash: sa.HashNames(names),
+		SetHash: core.HashNames(names),
 		Serial:  core.SerialToString(serial2),
 		Issued:  testCtx.fc.Now().Add(time.Hour),
 		Expires: expires.Add(time.Hour),
@@ -576,13 +576,13 @@ func addExpiringCerts(t *testing.T, ctx *testCtx) []certDERWithRegID {
 	test.AssertNotError(t, err, "creating cert D")
 
 	fqdnStatusD := &core.FQDNSet{
-		SetHash: sa.HashNames(certDNames),
+		SetHash: core.HashNames(certDNames),
 		Serial:  serial4String,
 		Issued:  ctx.fc.Now().AddDate(0, 0, -87),
 		Expires: ctx.fc.Now().AddDate(0, 0, 3),
 	}
 	fqdnStatusDRenewed := &core.FQDNSet{
-		SetHash: sa.HashNames(certDNames),
+		SetHash: core.HashNames(certDNames),
 		Serial:  serial5String,
 		Issued:  ctx.fc.Now().AddDate(0, 0, -3),
 		Expires: ctx.fc.Now().AddDate(0, 0, 87),
@@ -743,7 +743,7 @@ func TestCertIsRenewed(t *testing.T) {
 			t.Fatal(err)
 		}
 		fqdnStatus := &core.FQDNSet{
-			SetHash: sa.HashNames(testData.DNS),
+			SetHash: core.HashNames(testData.DNS),
 			Serial:  testData.stringSerial,
 			Issued:  testData.NotBefore,
 			Expires: testData.NotAfter,

@@ -242,6 +242,14 @@ func UniqueLowerNames(names []string) (unique []string) {
 	return
 }
 
+// HashNames returns a hash of the names requested. This is intended for use
+// when interacting with the orderFqdnSets table and rate limiting.
+func HashNames(names []string) []byte {
+	names = UniqueLowerNames(names)
+	hash := sha256.Sum256([]byte(strings.Join(names, ",")))
+	return hash[:]
+}
+
 // LoadCert loads a PEM certificate specified by filename or returns an error
 func LoadCert(filename string) (*x509.Certificate, error) {
 	certPEM, err := os.ReadFile(filename)

@@ -1,6 +1,7 @@
 package core
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"math"
@@ -206,3 +207,30 @@ func TestRetryBackoff(t *testing.T) {
 	assertBetween(float64(backoff), float64(expected)*0.8, float64(expected)*1.2)
 
 }
+
+func TestHashNames(t *testing.T) {
+	// Test that it is deterministic
+	h1 := HashNames([]string{"a"})
+	h2 := HashNames([]string{"a"})
+	test.AssertByteEquals(t, h1, h2)
+
+	// Test that it differentiates
+	h1 = HashNames([]string{"a"})
+	h2 = HashNames([]string{"b"})
+	test.Assert(t, !bytes.Equal(h1, h2), "Should have been different")
+
+	// Test that it is not subject to ordering
+	h1 = HashNames([]string{"a", "b"})
+	h2 = HashNames([]string{"b", "a"})
+	test.AssertByteEquals(t, h1, h2)
+
+	// Test that it is not subject to case
+	h1 = HashNames([]string{"a", "b"})
+	h2 = HashNames([]string{"A", "B"})
+	test.AssertByteEquals(t, h1, h2)
+
+	// Test that it is not subject to duplication
+	h1 = HashNames([]string{"a", "a"})
+	h2 = HashNames([]string{"a"})
+	test.AssertByteEquals(t, h1, h2)
+}
@@ -1,12 +1,26 @@
 # Configuring and Storing Key-Value Rate Limits
 
+## Rate Limit Structure
+
+All rate limits use a token-bucket model. The metaphor is that each limit is
+represented by a bucket which holds tokens. Each request removes some number of
+tokens from the bucket, or is denied if there aren't enough tokens to remove.
+Over time, new tokens are added to the bucket at a steady rate, until the bucket
+is full. The _burst_ parameter of a rate limit indicates the maximum capacity of
+a bucket: how many tokens can it hold before new ones stop being added.
+Therefore, this also indicates how many requests can be made in a single burst
+before a full bucket is completely emptied. The _count_ and _period_ parameters
+indicate the rate at which new tokens are added to a bucket: every period, count
+tokens will be added. Therefore, these also indicate the steady-state rate at
+which a client which has exhausted its quota can make requests: one token every
+(period / count) duration.
+
 ## Default Limit Settings
 
-Each root key directly corresponds to a `Name` enumeration as detailed in
-`name.go`. The `Name` enum is used to identify the particular limit. The `count`
-value is used to determine the maximum number of requests allowed, within the
-given `period` of time. The `burst` value is used to determine the maximum
-number of requests allowed, at any given time.
+Each key directly corresponds to a `Name` enumeration as detailed in `name.go`.
+The Name enum is used to identify the particular limit. The parameters of a
+default limit are the values that will be used for all buckets that do not have
+an explicit override (see below).
 
 ```yaml
 NewRegistrationsPerIPAddress:
@@ -21,27 +35,31 @@ NewOrdersPerAccount:
 
 ## Override Limit Settings
 
-Each root key represents a specific bucket, consisting of two elements: `name`
-and `id`. The `name` here refers to the `Name` of the particular limit, while
-the `id` is the client's identifier. The format of the `id` is dependent on the
-limit. For example, the `id` for 'NewRegistrationsPerIPAddress' is a subscriber
-IP address, while the `id` for 'NewOrdersPerAccount' is the subscriber's
-registration ID.
+Each override key represents a specific bucket, consisting of two elements:
+_name_ and _id_. The name here refers to the Name of the particular limit, while
+the id is a client identifier. The format of the id is dependent on the limit.
+For example, the id for 'NewRegistrationsPerIPAddress' is a subscriber IP
+address, while the id for 'NewOrdersPerAccount' is the subscriber's registration
+ID.
 
 ```yaml
 NewRegistrationsPerIPAddress:10.0.0.2:
-  burst: 40
+  burst: 20
   count: 40
   period: 1s
-NewOrdersPerAccount:12345678
-  burst: 600
+NewOrdersPerAccount:12345678:
+  burst: 300
   count: 600
   period: 180m
 ```
 
+The above example overrides the default limits for specific subscribers. They
+will be allowed to make twice as many requests as the default limits but will
+still be limited to the same burst as a regular subscriber.
+
 ### Id Formats in Limit Override Settings
 
-Id formats vary based on the 'Name' enumeration. Below are examples for each
+Id formats vary based on the Name enumeration. Below are examples for each
 format:
 
 #### ipAddress
@@ -52,7 +70,8 @@ Example: `NewRegistrationsPerIPAddress:10.0.0.1`
 
 #### ipv6RangeCIDR
 
-A valid IPv6 range in CIDR notation with a /48 mask.
+A valid IPv6 range in CIDR notation with a /48 mask. A /48 range is typically
+assigned to a single subscriber.
 
 Example: `NewRegistrationsPerIPv6Range:2001:0db8:0000::/48`
 
@@ -79,7 +98,7 @@ Example: `CertificatesPerFQDNSetPerAccount:12345678:example.com,example.org`
 
 Bucket keys are the key used to lookup the bucket for a given limit and
 subscriber. Bucket keys are formatted similarly to the overrides but with a
-slight difference: the limit `Names` do not carry the string form of each limit.
+slight difference: the limit Names do not carry the string form of each limit.
 Instead, they apply the Name enum equivalent for every limit.
 
 So, instead of:
@@ -101,6 +120,25 @@ default/override limit.
 
 ## How Limits are Applied
 
+Although rate limit buckets are configured in terms of tokens, we do not
+actually keep track of the number of tokens in each bucket. Instead, we track
+the Theoretical Arrival Time (TAT) at which the bucket will be full again. If
+the TAT is in the past, the bucket is full. If the TAT is in the future, some
+number of tokens have been spent and the bucket is slowly refilling. If the TAT
+is far enough in the future (specifically, more than `burst * (period / count)`)
+in the future), then the bucket is completely empty and requests will be denied.
+
+Additional terminology:
+
+  - **burst offset** is the duration of time it takes for a bucket to go from
+    empty to full (`burst * (period / count)`).
+  - **emission interval** is the interval at which tokens are added to a bucket
+    (`period / count`). This is also the steady-state rate at which requests can
+    be made without being denied even once the burst has been exhausted.
+  - **cost** is the number of tokens removed from a bucket for a single request.
+  - **cost increment** is the duration of time the TAT is advanced to account
+    for the cost of the request (`cost * emission interval`).
+
 For the purposes of this example, subscribers originating from a specific IPv4
 address are allowed 20 requests to the newFoo endpoint per second, with a
 maximum burst of 20 requests at any point-in-time.
@@ -109,43 +147,35 @@ A subscriber calls the newFoo endpoint for the first time with an IP address of
 172.23.45.22. Here's what happens:
 
 1. The subscriber's IP address is used to generate a bucket key in the form of
-   `NewFoosPerIPAddress:172.23.45.22`. The Theoretical Arrival Time (TAT) for
-   this bucket is set to the current time.
+   'NewFoosPerIPAddress:172.23.45.22'.
 
-2. The subscriber's bucket is initialized with 19 tokens, as 1 token is removed
-   to account for the current request. The request is approved, and the TAT is
-   updated. The TAT is set to the current time, plus the inter-request time
-   (which would be 1/20th of a second if we are limiting to 20 requests per
-   second).
+2. The request is approved and the 'NewFoosPerIPAddress:172.23.45.22' bucket is
+   initialized with 19 tokens, as 1 token has been removed to account for the
+   cost of the current request. To accomplish this, the initial TAT is set to
+   the current time plus the _cost increment_ (which is 1/20th of a second if we
+   are limiting to 20 requests per second).
 
-3. The subscriber is informed that their request was successful. Their bucket:
+3. Bucket 'NewFoosPerIPAddress:172.23.45.22':
     - will reset to full in 50ms (1/20th of a second),
-    - they can make another newFoo request immediately,
-    - they can make 19 more requests in the next 50ms,
-    - they do not need to wait between requests,
-    - if they make 19 requests in the next 50ms they will need to wait 50ms before
-      making another request and 1s to make 20 more requests,
-    - thus if they make 1 request every 50ms, they will never be denied.
-
-Now, the subscriber makes another request immediately:
-
-4. The TAT at bucket key `NewFoosPerIPAddress:172.23.45.22` is compared against
-   the current time and the burst offset. If the current time is less than the
-   TAT minus the burst offset, this implies the request would surpass the rate
-   limit and thus, it's rejected. If the current time is equal to or greater
-   than the TAT minus the burst offset, the request is allowed.
-
-5. A token is deducted from the subscriber's bucket and the TAT is updated
-   similarly to the first request.
-
-If the subscriber makes requests rapidly, causing the token count to hit 0
-before 50ms has passed, here's what would happen during their next request:
-
-6. The rate limiter checks the TAT. If the current time is less than (TAT -
-   burst offset), the request is rejected. Since the subscriber has already
-   exhausted their 20 requests in <50ms, the current time is indeed less than
-   (TAT - burst offset). Therefore, the request is rejected to maintain the rate
-   limit.
+    - will allow another newFoo request immediately,
+    - will allow between 1 and 19 more requests in the next 50ms,
+    - will reject the 20th request made in the next 50ms,
+    - and will allow 1 request every 50ms, indefinitely.
+
+The subscriber makes another request 5ms later:
+
+4. The TAT at bucket key 'NewFoosPerIPAddress:172.23.45.22' is compared against
+   the current time and the _burst offset_. The current time is greater than the
+   TAT minus the cost increment. Therefore, the request is approved.
+
+5. The TAT at bucket key 'NewFoosPerIPAddress:172.23.45.22' is advanced by the
+   cost increment to account for the cost of the request.
+
+The subscriber makes a total of 18 requests over the next 44ms:
+
+6. The current time is less than the TAT at bucket key
+   'NewFoosPerIPAddress:172.23.45.22' minus the burst offset, thus the request
+   is rejected.
 
 This mechanism allows for bursts of traffic but also ensures that the average
 rate of requests stays within the prescribed limits over time.
@@ -18,6 +18,9 @@ func divThenRound(x, y int64) int64 {
 // TAT. The cost must be 0 or greater and <= the burst capacity of the limit.
 func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision {
 	if cost < 0 || cost > rl.Burst {
+		// The condition above is the union of the conditions checked in Check
+		// and Spend methods of Limiter. If this panic is reached, it means that
+		// the caller has introduced a bug.
 		panic("invalid cost for maybeSpend")
 	}
 	nowUnix := clk.Now().UnixNano()
@@ -31,32 +34,19 @@ func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
 	}
 
 	// Compute the cost increment.
-	emissionInterval := divThenRound(rl.Period.Nanoseconds(), rl.Count)
-	costIncrement := emissionInterval * cost
+	costIncrement := rl.emissionInterval * cost
 
 	// Deduct the cost to find the new TAT and residual capacity.
 	newTAT := tatUnix + costIncrement
-	burstOffset := emissionInterval * rl.Burst
-	difference := nowUnix - (newTAT - burstOffset)
-	residual := divThenRound(difference, emissionInterval)
+	difference := nowUnix - (newTAT - rl.burstOffset)
 
-	if costIncrement <= 0 && residual == 0 {
-		// Edge case: no cost to consume and no capacity to consume it from.
-		return &Decision{
-			Allowed:   false,
-			Remaining: 0,
-			RetryIn:   time.Duration(emissionInterval),
-			ResetIn:   time.Duration(tatUnix - nowUnix),
-			newTAT:    time.Unix(0, tatUnix).UTC(),
-		}
-	}
-
-	if residual < 0 {
+	var residual int64
+	if difference < 0 {
 		// Too little capacity to satisfy the cost, deny the request.
-		remaining := divThenRound(nowUnix-(tatUnix-burstOffset), emissionInterval)
+		residual = divThenRound(nowUnix-(tatUnix-rl.burstOffset), rl.emissionInterval)
 		return &Decision{
 			Allowed:   false,
-			Remaining: int(remaining),
+			Remaining: int(residual),
 			RetryIn:   -time.Duration(difference),
 			ResetIn:   time.Duration(tatUnix - nowUnix),
 			newTAT:    time.Unix(0, tatUnix).UTC(),
@@ -65,9 +55,10 @@ func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
 
 	// There is enough capacity to satisfy the cost, allow the request.
 	var retryIn time.Duration
+	residual = divThenRound(difference, rl.emissionInterval)
 	if residual == 0 {
 		// This request will empty the bucket.
-		retryIn = time.Duration(emissionInterval)
+		retryIn = time.Duration(rl.emissionInterval)
 	}
 	return &Decision{
 		Allowed:   true,
@@ -84,19 +75,27 @@ func maybeSpend(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
 // limit. A partial refund is still considered successful.
 func maybeRefund(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision {
 	if cost <= 0 || cost > rl.Burst {
+		// The condition above is checked in the Refund method of Limiter. If
+		// this panic is reached, it means that the caller has introduced a bug.
 		panic("invalid cost for maybeRefund")
 	}
 	nowUnix := clk.Now().UnixNano()
 	tatUnix := tat.UnixNano()
 
-	// If the TAT is in the past, use the current time as the starting point.
+	// The TAT must be in the future to refund capacity.
 	if nowUnix > tatUnix {
-		tatUnix = nowUnix
+		// The TAT is in the past, therefore the bucket is full.
+		return &Decision{
+			Allowed:   false,
+			Remaining: int(rl.Burst),
+			RetryIn:   time.Duration(0),
+			ResetIn:   time.Duration(0),
+			newTAT:    tat,
+		}
 	}
 
 	// Compute the refund increment.
-	emissionInterval := divThenRound(rl.Period.Nanoseconds(), rl.Count)
-	refundIncrement := emissionInterval * cost
+	refundIncrement := rl.emissionInterval * cost
 
 	// Subtract the refund increment from the TAT to find the new TAT.
 	newTAT := tatUnix - refundIncrement
@@ -107,9 +106,8 @@ func maybeRefund(clk clock.Clock, rl limit, tat time.Time, cost int64) *Decision
 	}
 
 	// Calculate the new capacity.
-	burstOffset := emissionInterval * rl.Burst
-	difference := nowUnix - (newTAT - burstOffset)
-	residual := divThenRound(difference, emissionInterval)
+	difference := nowUnix - (newTAT - rl.burstOffset)
+	residual := divThenRound(difference, rl.emissionInterval)
 
 	return &Decision{
 		Allowed:   (newTAT != tatUnix),