Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RA: Fix legacy override utilization metrics #7124

Merged
merged 6 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 48 additions & 29 deletions ra/ra.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,16 +394,15 @@ func (ra *RegistrationAuthorityImpl) checkRegistrationIPLimit(ctx context.Contex
}

threshold, overrideKey := limit.GetThreshold(ip.String(), noRegistrationID)
if count.Count >= threshold {
return berrors.RegistrationsPerIPError(0, "too many registrations for this IP")
}
if overrideKey != "" {
// We do not support overrides for the NewRegistrationsPerIPRange limit.
utilization := float64(count.Count) / float64(threshold)
utilization := float64(count.Count+1) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.RegistrationsPerIP, overrideKey).Set(utilization)
}

if count.Count >= threshold {
return berrors.RegistrationsPerIPError(0, "too many registrations for this IP")
}

return nil
}

Expand Down Expand Up @@ -600,14 +599,14 @@ func (ra *RegistrationAuthorityImpl) checkPendingAuthorizationLimit(ctx context.
if err != nil {
return err
}
if overrideKey != "" {
utilization := float64(countPB.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.PendingAuthorizationsPerAccount, overrideKey).Set(utilization)
}
if countPB.Count >= threshold {
ra.log.Infof("Rate limit exceeded, PendingAuthorizationsByRegID, regID: %d", regID)
return berrors.RateLimitError(0, "too many currently pending authorizations: %d", countPB.Count)
}
if overrideKey != "" {
utilization := float64(countPB.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.PendingAuthorizationsPerAccount, overrideKey).Set(utilization)
}
return nil
}

Expand Down Expand Up @@ -653,14 +652,14 @@ func (ra *RegistrationAuthorityImpl) checkInvalidAuthorizationLimit(ctx context.
// here.
noKey := ""
threshold, overrideKey := limit.GetThreshold(noKey, regID)
if overrideKey != "" {
utilization := float64(count.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.InvalidAuthorizationsPerAccount, overrideKey).Set(utilization)
}
if count.Count >= threshold {
ra.log.Infof("Rate limit exceeded, InvalidAuthorizationsByRegID, regID: %d", regID)
return berrors.FailedValidationError(0, "too many failed authorizations recently")
}
if overrideKey != "" {
utilization := float64(count.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.InvalidAuthorizationsPerAccount, overrideKey).Set(utilization)
}
return nil
}

Expand All @@ -684,14 +683,13 @@ func (ra *RegistrationAuthorityImpl) checkNewOrdersPerAccountLimit(ctx context.C
// There is no meaningful override key to use for this rate limit
noKey := ""
threshold, overrideKey := limit.GetThreshold(noKey, acctID)
if overrideKey != "" {
utilization := float64(count.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.NewOrdersPerAccount, overrideKey).Set(utilization)
}

if count.Count >= threshold {
return berrors.RateLimitError(0, "too many new orders recently")
}
if overrideKey != "" {
utilization := float64(count.Count+1) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.NewOrdersPerAccount, overrideKey).Set(utilization)
}
return nil
}

Expand Down Expand Up @@ -1423,18 +1421,34 @@ func (ra *RegistrationAuthorityImpl) enforceNameCounts(ctx context.Context, name
}

var badNames []string
var metricsData []struct {
overrideKey string
utilization float64
}

// Find the names that have counts at or over the threshold. Range
// over the names slice input to ensure the order of badNames will
// return the badNames in the same order they were input.
for _, name := range names {
threshold, overrideKey := limit.GetThreshold(name, regID)
if overrideKey != "" {
utilization := float64(response.Counts[name]) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, overrideKey).Set(utilization)
}
if response.Counts[name] >= threshold {
badNames = append(badNames, name)
}
if overrideKey != "" {
// Name is under threshold due to an override.
utilization := float64(response.Counts[name]+1) / float64(threshold)
metricsData = append(metricsData, struct {
overrideKey string
utilization float64
}{overrideKey, utilization})
}
}

if len(badNames) == 0 {
// All names were under the threshold, emit override utilization metrics.
for _, data := range metricsData {
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, data.overrideKey).Set(data.utilization)
}
}
return badNames, response.Earliest.AsTime(), nil
}
Expand Down Expand Up @@ -1498,18 +1512,19 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
return fmt.Errorf("checking duplicate certificate limit for %q: %s", names, err)
}

if overrideKey != "" {
utilization := float64(len(prevIssuances.TimestampsNS)) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
}

if int64(len(prevIssuances.TimestampsNS)) < threshold {
issuanceCount := int64(len(prevIssuances.TimestampsNS))
if issuanceCount < threshold {
// Issuance in window is below the threshold, no need to limit.
if overrideKey != "" {
utilization := float64(issuanceCount+1) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
}
return nil
} else {
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
// Evaluate the rate limit using a leaky bucket algorithm. The bucket
// has a capacity of threshold and is refilled at a rate of 1 token per
// limit.Window/threshold from the time of each issuance timestamp.
// limit.Window/threshold from the time of each issuance timestamp. The
// timestamps start from the most recent issuance and go back in time.
now := ra.clk.Now()
nsPerToken := limit.Window.Nanoseconds() / threshold
for i, timestamp := range prevIssuances.TimestampsNS {
Expand All @@ -1518,6 +1533,10 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
// We know `i+1` tokens were generated since `tokenGeneratedSince`,
// and only `i` certificates were issued, so there's room to allow
// for an additional issuance.
if overrideKey != "" {
utilization := float64(issuanceCount) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
}
return nil
}
}
Expand Down
25 changes: 14 additions & 11 deletions ra/ra_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,12 +719,13 @@ func TestRegistrationsPerIPOverrideUsage(t *testing.T) {
}

// No error expected, the count of existing registrations for "4.5.6.7"
// should be 1 below the threshold.
// should be 1 below the override threshold.
err := ra.checkRegistrationIPLimit(ctx, rlp, regIP, mockCounterAlwaysTwo)
test.AssertNotError(t, err, "Unexpected error checking RegistrationsPerIPRange limit")

// Expecting ~66% of the override for "4.5.6.7" to be utilized.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.RegistrationsPerIP, "override_key": regIP.String()}, 0.6666666666666666)
// Accounting for the anticipated issuance, we expect "4.5.6.7" to be at
// 100% of their override threshold.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.RegistrationsPerIP, "override_key": regIP.String()}, 1)

mockCounterAlwaysThree := func(context.Context, *sapb.CountRegistrationsByIPRequest, ...grpc.CallOption) (*sapb.Count, error) {
return &sapb.Count{Count: 3}, nil
Expand Down Expand Up @@ -1223,30 +1224,32 @@ func TestCheckCertificatesPerNameLimit(t *testing.T) {
// Two base domains, one above threshold but with an override.
mockSA.nameCounts.Counts["example.com"] = 0
mockSA.nameCounts.Counts["bigissuer.com"] = 50
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, "bigissuer.com").Set(.5)
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
test.AssertNotError(t, err, "incorrectly rate limited bigissuer")
// "bigissuer.com" has an override of 100 and they've issued 50. We do
// not count issuance that has yet to occur, so we expect to see 50%
// utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, .5)
// "bigissuer.com" has an override of 100 and they've issued 50. Accounting
// for the anticipated issuance, we expect to see 51% utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, .51)

// Two base domains, one above its override
mockSA.nameCounts.Counts["example.com"] = 10
mockSA.nameCounts.Counts["bigissuer.com"] = 100
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, "bigissuer.com").Set(1)
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
test.AssertError(t, err, "incorrectly failed to rate limit bigissuer")
test.AssertErrorIs(t, err, berrors.RateLimit)
// "bigissuer.com" has an override of 100 and they've issued 100. So we
// expect to see 100% utilization.
// "bigissuer.com" has an override of 100 and they've issued 100. They're
// already at 100% utilization, so we expect to see 100% utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, 1)

// One base domain, above its override (which is below threshold)
mockSA.nameCounts.Counts["smallissuer.co.uk"] = 1
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, "smallissuer.co.uk").Set(1)
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.smallissuer.co.uk"}, rlp, 99)
test.AssertError(t, err, "incorrectly failed to rate limit smallissuer")
test.AssertErrorIs(t, err, berrors.RateLimit)
// "smallissuer.co.uk" has an override of 1 and they've issued 1. So we
// expect to see 100% utilization.
// "smallissuer.co.uk" has an override of 1 and they've issued 1. They're
// already at 100% utilization, so we expect to see 100% utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "smallissuer.co.uk"}, 1)
}

Expand Down
6 changes: 3 additions & 3 deletions ratelimit/rate-limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
const (
// CertificatesPerName is the name of the CertificatesPerName rate limit
// when referenced in metric labels.
CertificatesPerName = "certificates_per_domain_per_account"
CertificatesPerName = "certificates_per_domain"

// RegistrationsPerIP is the name of the RegistrationsPerIP rate limit when
// referenced in metric labels.
Expand All @@ -33,11 +33,11 @@ const (

// CertificatesPerFQDNSet is the name of the CertificatesPerFQDNSet rate
// limit when referenced in metric labels.
CertificatesPerFQDNSet = "certificates_per_fqdn_set_per_account"
CertificatesPerFQDNSet = "certificates_per_fqdn_set"

// CertificatesPerFQDNSetFast is the name of the CertificatesPerFQDNSetFast
// rate limit when referenced in metric labels.
CertificatesPerFQDNSetFast = "certificates_per_fqdn_set_per_account_fast"
CertificatesPerFQDNSetFast = "certificates_per_fqdn_set_fast"

// NewOrdersPerAccount is the name of the NewOrdersPerAccount rate limit
// when referenced in metric labels.
Expand Down