Skip to content

Commit

Permalink
Fix flaky BigtableInstanceAdminClientIT.createClusterWithAutoscalingA…
Browse files Browse the repository at this point in the history
…ndPartialUpdateTest

Change-Id: Ia8bc9aa98922a226b84c19400dac91db05b0c6c8
  • Loading branch information
trollyxia committed Nov 26, 2024
1 parent 687b6df commit 49eaeb0
Showing 1 changed file with 37 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static com.google.common.truth.Truth.assertWithMessage;
import static com.google.common.truth.TruthJUnit.assume;

import com.google.api.gax.rpc.FailedPreconditionException;
import com.google.cloud.Policy;
import com.google.cloud.bigtable.admin.v2.BigtableInstanceAdminClient;
import com.google.cloud.bigtable.admin.v2.models.AppProfile;
Expand All @@ -36,7 +37,10 @@
import com.google.cloud.bigtable.test_helpers.env.EmulatorEnv;
import com.google.cloud.bigtable.test_helpers.env.PrefixGenerator;
import com.google.cloud.bigtable.test_helpers.env.TestEnvRule;
import java.time.Duration;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.ClassRule;
Expand All @@ -49,6 +53,8 @@
public class BigtableInstanceAdminClientIT {

@ClassRule public static TestEnvRule testEnvRule = new TestEnvRule();
private static final Logger logger =
Logger.getLogger(BigtableInstanceAdminClientIT.class.getName());
@Rule public final PrefixGenerator prefixGenerator = new PrefixGenerator();

private String instanceId = testEnvRule.env().getInstanceId();
Expand Down Expand Up @@ -410,7 +416,7 @@ public void createClusterWithAutoscalingTest() {
}

@Test
public void createClusterWithAutoscalingAndPartialUpdateTest() {
public void createClusterWithAutoscalingAndPartialUpdateTest() throws Exception {
String newInstanceId = prefixGenerator.newPrefix();
String newClusterId = newInstanceId + "-c1";

Expand Down Expand Up @@ -448,8 +454,16 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
assertThat(retrievedCluster.getAutoscalingCpuPercentageTarget()).isEqualTo(20);
assertThat(retrievedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);

// The test might trigger cluster autoscaling, which races against the update cluster calls in
// this test and causing the update cluster calls to fail with "FAILED_PRECONDITION: Cannot
// update cluster that is currently being modified" error.
// In order to avoid test flakiness due to this race condition, we wrap all the update cluster
// call with a retry loop.
// TODO: After we have a proper fix for the issue, remove the
// updateClusterAutoScalingConfigWithRetry function and all the calls to it.

Cluster updatedCluster =
client.updateClusterAutoscalingConfig(
updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig.of(newInstanceId, clusterId).setMaxNodes(3));
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(1);
assertThat(updatedCluster.getAutoscalingMaxServeNodes()).isEqualTo(3);
Expand All @@ -463,7 +477,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);

updatedCluster =
client.updateClusterAutoscalingConfig(
updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig.of(newInstanceId, clusterId).setMinNodes(2));
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(2);
assertThat(updatedCluster.getAutoscalingMaxServeNodes()).isEqualTo(3);
Expand All @@ -477,7 +491,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);

updatedCluster =
client.updateClusterAutoscalingConfig(
updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
.setCpuUtilizationTargetPercent(40));
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(2);
Expand All @@ -492,7 +506,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);

updatedCluster =
client.updateClusterAutoscalingConfig(
updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
.setCpuUtilizationTargetPercent(45)
.setMaxNodes(5));
Expand All @@ -508,7 +522,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);

updatedCluster =
client.updateClusterAutoscalingConfig(
updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
.setStorageUtilizationGibPerNode(2777));
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(2);
Expand All @@ -523,7 +537,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2777);

updatedCluster =
client.updateClusterAutoscalingConfig(
updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
// testing default case
.setStorageUtilizationGibPerNode(0));
Expand Down Expand Up @@ -614,4 +628,20 @@ private void basicClusterOperationTestHelper(String targetInstanceId, String tar
assertThat(updatedCluster.getAutoscalingCpuPercentageTarget()).isEqualTo(0);
assertThat(updatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(0);
}

private Cluster updateClusterAutoScalingConfigWithRetry(
ClusterAutoscalingConfig clusterAutoscalingConfig) throws Exception {
int retryCount = 0;
int maxRetries = 10;
while (true) {
try {
return client.updateClusterAutoscalingConfig(clusterAutoscalingConfig);
} catch (FailedPreconditionException e) {
if (++retryCount == maxRetries) throw e;
logger.log(
Level.INFO, "Retrying updateClusterAutoscalingConfig, retryCount: " + retryCount);
Thread.sleep(Duration.ofMinutes(1).toMillis());
}
}
}
}

0 comments on commit 49eaeb0

Please sign in to comment.