-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add sidekiq backoff service to handle OS Places outages
- When OS Places has a slowdown or outage, we can avoid our own alerting problems by backing off calls (since they're not time-critical). - Add a service which is initialised on startup (relevant mainly to worker processes) which adjusts the scheduled interval of PostcodeProcessWorker creation (currently once per second). We record OS Places API failures, and with each failure we double the interval, until we reach a max of 180s. When we record a successful call, we reduce the interval by 1s, so it will quickly back off if many errors occur, and slowly creep back to full speed when the errors are over.
- Loading branch information
Showing
4 changed files
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
class SidekiqSchedulerBackoffService | ||
def initialize(name:, min_interval:, max_interval:) | ||
@name = name.to_s | ||
@min_interval = min_interval | ||
@max_interval = max_interval | ||
end | ||
|
||
def record_success | ||
initial_interval = current_interval | ||
target_interval = [initial_interval - 1, @min_interval].max | ||
restart_schedule(target_interval) if target_interval != initial_interval | ||
end | ||
|
||
def record_failure | ||
initial_interval = current_interval | ||
target_interval = [initial_interval * 2, @max_interval].min | ||
restart_schedule(target_interval) if target_interval != initial_interval | ||
end | ||
|
||
private | ||
|
||
def current_interval | ||
schedule = Sidekiq.get_schedule[@name] | ||
Integer(schedule["every"].first.chop) | ||
end | ||
|
||
def restart_schedule(target_interval) | ||
schedule = Sidekiq.get_schedule[@name] | ||
Sidekiq.set_schedule(@name, schedule.merge("every" => ["#{target_interval}s"])) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
require "spec_helper" | ||
|
||
RSpec.describe SidekiqSchedulerBackoffService do | ||
let(:min_interval) { 2 } | ||
let(:max_interval) { 180 } | ||
let(:name) { :queue_oldest_postcodes_for_updating } | ||
subject { SidekiqSchedulerBackoffService.new(name:, min_interval:, max_interval:) } | ||
|
||
describe "#record_failure" do | ||
context "when the scheduler is going faster than maximum speed" do | ||
before do | ||
set_scheduled_interval(min_interval - 1) | ||
end | ||
|
||
it "sets the scheduler to maximum speed and reloads the schedule" do | ||
subject.record_failure | ||
expect(scheduled_interval).to eq(["#{min_interval}s"]) | ||
end | ||
end | ||
|
||
context "when the scheduler is going faster than minimum speed" do | ||
before do | ||
set_scheduled_interval(max_interval / 2) | ||
end | ||
|
||
it "halves the scheduler speed and reloads the schedule" do | ||
subject.record_failure | ||
expect(scheduled_interval).to eq(["#{max_interval}s"]) | ||
end | ||
end | ||
|
||
context "when the scheduler is going at minimum speed" do | ||
before do | ||
set_scheduled_interval(max_interval) | ||
end | ||
|
||
it "does nothing" do | ||
subject.record_failure | ||
expect(scheduled_interval).to eq(["#{max_interval}s"]) | ||
end | ||
end | ||
|
||
context "when the scheduler is going slower than minimum speed" do | ||
before do | ||
set_scheduled_interval(max_interval * 2) | ||
end | ||
|
||
it "sets the scheduler to minimum speed and reloads the schedule" do | ||
subject.record_failure | ||
expect(scheduled_interval).to eq(["#{max_interval}s"]) | ||
end | ||
end | ||
end | ||
|
||
describe "#record_success" do | ||
context "when the scheduler is going faster than maximum speed" do | ||
before do | ||
set_scheduled_interval(min_interval - 1) | ||
end | ||
|
||
it "sets the scheduler to maximum speed and reloads the schedule" do | ||
subject.record_success | ||
expect(scheduled_interval).to eq(["#{min_interval}s"]) | ||
end | ||
end | ||
|
||
context "when the scheduler is going at maximum speed" do | ||
before do | ||
set_scheduled_interval(min_interval) | ||
end | ||
|
||
it "does nothing" do | ||
subject.record_success | ||
expect(scheduled_interval).to eq(["#{min_interval}s"]) | ||
end | ||
end | ||
|
||
context "when the scheduler is going slower than maximum speed" do | ||
before do | ||
set_scheduled_interval(min_interval * 4) | ||
end | ||
|
||
it "decremenincrements the scheduler speed by 1 second and reloads the schedule" do | ||
subject.record_success | ||
expect(scheduled_interval).to eq(["#{(min_interval * 4) - 1}s"]) | ||
end | ||
end | ||
|
||
context "when the scheduler is going slower than minimum speed" do | ||
before do | ||
set_scheduled_interval(max_interval + 1) | ||
end | ||
|
||
it "sets the scheduler to minimum speed and reloads the schedule" do | ||
subject.record_success | ||
expect(scheduled_interval).to eq(["#{max_interval}s"]) | ||
end | ||
end | ||
end | ||
end | ||
|
||
def set_scheduled_interval(interval) | ||
Sidekiq.set_schedule(name.to_s, { "every" => ["#{interval}s"], "class" => "PostcodesCollectionWorker" }) | ||
end | ||
|
||
def scheduled_interval | ||
Sidekiq.get_schedule["queue_oldest_postcodes_for_updating"]["every"] | ||
end |