Skip to content

Commit

Permalink
Merge pull request #24769 from vespa-engine/bratseth/no-traffic
Browse files Browse the repository at this point in the history
Handle no traffic
  • Loading branch information
freva authored Nov 6, 2022
2 parents 3b6df7e + ab9e6d3 commit b40c389
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,14 @@ private double idealCpuLoad() {
growthRateHeadroom = Math.min(growthRateHeadroom, 1 / queryFractionOfMax() + 0.1);

// How much headroom is needed to handle sudden arrival of additional traffic due to another zone going down?
double maxTrafficShiftHeadroom = 10.0; // Cap to avoid extreme sizes from a current very small share
double trafficShiftHeadroom;
if (application.status().maxReadShare() == 0) // No traffic fraction data
trafficShiftHeadroom = 2.0; // assume we currently get half of the global share of traffic
else if (application.status().currentReadShare() == 0)
trafficShiftHeadroom = maxTrafficShiftHeadroom;
trafficShiftHeadroom = 1/application.status().maxReadShare();
else
trafficShiftHeadroom = application.status().maxReadShare() / application.status().currentReadShare();
trafficShiftHeadroom = Math.min(trafficShiftHeadroom, maxTrafficShiftHeadroom);
trafficShiftHeadroom = Math.min(trafficShiftHeadroom, 1/application.status().maxReadShare());

// Assumptions: 1) Write load is not organic so we should not grow to handle more.
// (TODO: But allow applications to set their target write rate and size for that)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,16 @@ public void test_autoscaling_uses_peak() {

@Test
public void test_autoscaling_without_traffic() {
var min = new ClusterResources(1, 1, new NodeResources(2, 4, 10, 0.3));
var now = new ClusterResources(4, 1, new NodeResources(2, 16, 10, 0.3));
var max = new ClusterResources(4, 1, new NodeResources(3, 16, 50, 0.3));
var min = new ClusterResources(1, 1, new NodeResources(0.5, 4, 10, 0.3));
var now = new ClusterResources(4, 1, new NodeResources(8, 16, 10, 0.3));
var max = new ClusterResources(4, 1, new NodeResources(16, 32, 50, 0.3));
var fixture = AutoscalingTester.fixture(min, now, max)
.clusterType(ClusterSpec.Type.container)
.awsProdSetup()
.build();
var duration = fixture.loader().addMeasurements(new Load(0.04, 0.39, 0.01), 20);
fixture.tester().clock().advance(duration.negated());
fixture.loader().zeroTraffic(20);
fixture.loader().zeroTraffic(20, 1);
fixture.tester().assertResources("Scaled down",
2, 1, 2, 16, 10,
fixture.autoscale());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,30 @@ public void test_traffic_headroom() {
// No current traffic share: Ideal load is low but capped
var model1 = clusterModel(new Status(0.0, 1.0),
t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
assertEquals(0.10672097759674132, model1.idealLoad().cpu(), delta);
assertEquals(0.37067209775967414, model1.idealLoad().cpu(), delta);

// Almost no current traffic share: Ideal load is low but capped
var model2 = clusterModel(new Status(0.0001, 1.0),
t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
assertEquals(0.10672097759674132, model2.idealLoad().cpu(), delta);
assertEquals(0.37067209775967414, model2.idealLoad().cpu(), delta);
}

@Test
public void test_growth_headroom() {
// No current traffic: Ideal load is low but capped
// No traffic data: Ideal load assumes 2 regions
var model1 = clusterModel(new Status(0.0, 0.0),
t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
assertEquals(0.2240325865580448, model1.idealLoad().cpu(), delta);

// Almost no current traffic: Ideal load is low but capped
var model2 = clusterModel(new Status(0.0001, 1.0),
// No traffic: Ideal load is higher since we now know there is only one zone
var model2 = clusterModel(new Status(0.0, 1.0),
t -> t == 0 ? 10000.0 : 0.0, t -> 0.0);
assertEquals(0.37067209775967414, model2.idealLoad().cpu(), delta);

// Almost no current traffic: Similar number as above
var model3 = clusterModel(new Status(0.0001, 1.0),
t -> t == 0 ? 10000.0 : 0.0001, t -> 0.0);
assertEquals(0.0326530612244898, model2.idealLoad().cpu(), delta);
assertEquals(0.32653061224489793, model3.idealLoad().cpu(), delta);
}

private ClusterModel clusterModelWithNoData() {
Expand All @@ -72,7 +77,6 @@ private ClusterModel clusterModel(Status status, IntFunction<Double> queryRate,
ClusterSpec clusterSpec = clusterSpec();
Cluster cluster = cluster(resources());
application = application.with(cluster);

return new ClusterModel(application.with(status),
clusterSpec, cluster, clock, Duration.ofMinutes(10),
timeseries(cluster,100, queryRate, writeRate, clock),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@ public Loader(Fixture fixture) {
}

/** Assign measured zero traffic in the same way as the system will. */
public Duration zeroTraffic(int measurements) {
public Duration zeroTraffic(int measurements, int prodRegions) {
try (var lock = fixture.tester().nodeRepository().applications().lock(fixture.applicationId())) {
var statusWithZeroLoad = fixture.application().status().withCurrentReadShare(0).withMaxReadShare(1);
var statusWithZeroLoad = fixture.application().status()
.withCurrentReadShare(0)
// the line below from TrafficShareUpdater
.withMaxReadShare(prodRegions < 2 ? 1.0 : 1.0 / ( prodRegions - 1.0));
fixture.tester().nodeRepository().applications().put(fixture.application().with(statusWithZeroLoad), lock);
}
return addQueryRateMeasurements(measurements, (n) -> 0.0);
Expand Down

0 comments on commit b40c389

Please sign in to comment.