Add load stage optimization to Bigtable YCSB benchmark.

PiperOrigin-RevId: 564804580
GoogleCloudPlatform · Sep 12, 2023 · 385a19b · 385a19b
1 parent 119f54a
commit 385a19b
Show file tree

Hide file tree

Showing 4 changed files with 3 additions and 203 deletions.
diff --git a/perfkitbenchmarker/linux_benchmarks/cloud_bigtable_ycsb_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cloud_bigtable_ycsb_benchmark.py
@@ -357,19 +357,6 @@ def _GetYcsbExecutor(
   return ycsb.YCSBExecutor(FLAGS.hbase_binding, **executor_flags)
 
 
-def _LoadDatabase(executor: ycsb.YCSBExecutor,
-                  bigtable: gcp_bigtable.GcpBigtableInstance,
-                  vms: list[virtual_machine.VirtualMachine],
-                  load_kwargs: dict[str, Any]) -> list[sample.Sample]:
-  """Loads the database with the specified infrastructure capacity."""
-  if bigtable.restored or ycsb.SKIP_LOAD_STAGE.value:
-    return []
-  bigtable.UpdateCapacityForLoad()
-  results = list(executor.Load(vms, load_kwargs=load_kwargs))
-  bigtable.UpdateCapacityForRun()
-  return results
-
-
 def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> List[sample.Sample]:
   """Spawn YCSB and gather the results.
 
@@ -391,7 +378,8 @@ def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> List[sample.Sample]:
   load_kwargs = _GenerateLoadKwargs(instance)
   run_kwargs = _GenerateRunKwargs(instance)
   executor: ycsb.YCSBExecutor = _GetYcsbExecutor(vms)
-  samples += _LoadDatabase(executor, instance, vms, load_kwargs)
+  if not instance.restored:
+    samples += list(executor.Load(vms, load_kwargs=load_kwargs))
   samples += list(executor.Run(vms, run_kwargs=run_kwargs))
 
   # Optionally add new samples for cluster cpu utilization.

diff --git a/perfkitbenchmarker/providers/gcp/gcp_bigtable.py b/perfkitbenchmarker/providers/gcp/gcp_bigtable.py
@@ -18,7 +18,6 @@
 
 import json
 import logging
-import time
 from typing import Any, Dict, List, Optional
 
 from absl import flags
@@ -53,13 +52,6 @@
         'Ignored if --bigtable_autoscaling_min_nodes is set.'
     ),
 )
-_LOAD_NODES = flags.DEFINE_integer(
-    'bigtable_load_node_count',
-    None,
-    'The number of nodes for the Bigtable instance to use for the load'
-    ' phase. Assumes that the benchmark calls UpdateRunCapacity to set the '
-    ' correct node count manually before the run phase.',
-)
 _AUTOSCALING_MIN_NODES = flags.DEFINE_integer(
     'bigtable_autoscaling_min_nodes', None,
     'Minimum number of nodes for autoscaling.')
@@ -97,7 +89,6 @@ class BigtableSpec(non_relational_db.BaseNonRelationalDbSpec):
   zone: str
   project: str
   node_count: int
-  load_node_count: int
   storage_type: str
   replication_cluster: bool
   replication_cluster_zone: str
@@ -119,7 +110,6 @@ def _GetOptionDecoderConstructions(cls):
         'zone': (option_decoders.StringDecoder, none_ok),
         'project': (option_decoders.StringDecoder, none_ok),
         'node_count': (option_decoders.IntDecoder, none_ok),
-        'load_node_count': (option_decoders.IntDecoder, none_ok),
         'storage_type': (option_decoders.StringDecoder, none_ok),
         'replication_cluster': (option_decoders.BooleanDecoder, none_ok),
         'replication_cluster_zone': (option_decoders.StringDecoder, none_ok),
@@ -157,7 +147,6 @@ def _ApplyFlags(cls, config_values, flag_values) -> None:
         'google_bigtable_zone': 'zone',
         'bigtable_storage_type': 'storage_type',
         'bigtable_node_count': 'node_count',
-        'bigtable_load_node_count': 'load_node_count',
         'bigtable_replication_cluster': 'replication_cluster',
         'bigtable_replication_cluster_zone': 'replication_cluster_zone',
         'bigtable_multicluster_routing': 'multicluster_routing',
@@ -206,7 +195,6 @@ def __init__(self,
                project: Optional[str],
                zone: Optional[str],
                node_count: Optional[int],
-               load_node_count: Optional[int],
                storage_type: Optional[str],
                replication_cluster: Optional[bool],
                replication_cluster_zone: Optional[str],
@@ -222,7 +210,6 @@ def __init__(self,
     self.zone: str = zone or FLAGS.google_bigtable_zone
     self.project: str = project or FLAGS.project or util.GetDefaultProject()
     self.node_count: int = node_count or _DEFAULT_NODE_COUNT
-    self._load_node_count = load_node_count or self.node_count
     self.storage_type: str = storage_type or _DEFAULT_STORAGE_TYPE
     self.replication_cluster: bool = replication_cluster or False
     self.replication_cluster_zone: Optional[str] = (
@@ -240,7 +227,6 @@ def FromSpec(cls, spec: BigtableSpec) -> 'GcpBigtableInstance':
         zone=spec.zone,
         project=spec.project,
         node_count=spec.node_count,
-        load_node_count=spec.load_node_count,
         storage_type=spec.storage_type,
         replication_cluster=spec.replication_cluster,
         replication_cluster_zone=spec.replication_cluster_zone,
@@ -401,29 +387,13 @@ def _GetClusters(self) -> List[Dict[str, Any]]:
     return json.loads(stdout)
 
   def _UpdateNodes(self, nodes: int) -> None:
-    """Updates clusters to the specified node count and waits until ready."""
     clusters = self._GetClusters()
     for i in range(len(clusters)):
-      # Do nothing if the node count is already equal to what we want.
-      if clusters[i]['serveNodes'] == nodes:
-        continue
       cmd = _GetBigtableGcloudCommand(self, 'bigtable', 'clusters', 'update',
                                       f'{self.name}-{i}')
       cmd.flags['instance'] = self.name
       cmd.flags['num-nodes'] = nodes or self.node_count
       cmd.Issue()
-    # Note that Exists is implemented like IsReady, but should likely be
-    # refactored.
-    while not self._Exists():
-      time.sleep(10)
-
-  def UpdateCapacityForLoad(self) -> None:
-    """See base class."""
-    self._UpdateNodes(self._load_node_count)
-
-  def UpdateCapacityForRun(self) -> None:
-    """See base class."""
-    self._UpdateNodes(self.node_count)
 
   def _Freeze(self) -> None:
     self._UpdateNodes(_FROZEN_NODE_COUNT)

diff --git a/tests/linux_benchmarks/cloud_bigtable_ycsb_benchmark_test.py b/tests/linux_benchmarks/cloud_bigtable_ycsb_benchmark_test.py
diff --git a/tests/providers/gcp/gcp_bigtable_test.py b/tests/providers/gcp/gcp_bigtable_test.py
@@ -18,11 +18,10 @@
 from absl import flags
 from absl.testing import flagsaver
 import mock
+
 from perfkitbenchmarker import errors
-from perfkitbenchmarker import vm_util
 from perfkitbenchmarker.providers.gcp import gcp_bigtable
 from perfkitbenchmarker.providers.gcp import util
-from tests import matchers
 from tests import pkb_common_test_case
 import requests
 
@@ -40,23 +39,6 @@
     }}
 """
 
-_TEST_LIST_CLUSTERS_OUTPUT = [
-    {
-        'defaultStorageType': 'SSD',
-        'location': 'projects/pkb-test/locations/us-west1-a',
-        'name': 'projects/pkb-test/instances/pkb-bigtable-730b1e6b/clusters/pkb-bigtable-730b1e6b-1',
-        'serveNodes': 3,
-        'state': 'READY',
-    },
-    {
-        'defaultStorageType': 'SSD',
-        'location': 'projects/pkb-test/locations/us-west1-c',
-        'name': 'projects/pkb-test/instances/pkb-bigtable-730b1e6b/clusters/pkb-bigtable-730b1e6b-0',
-        'serveNodes': 3,
-        'state': 'READY',
-    },
-]
-
 
 OUT_OF_QUOTA_STDERR = """
 ERROR: (gcloud.beta.bigtable.instances.create) Operation successfully rolled
@@ -300,52 +282,6 @@ def testBigtableGcloudCommand(self):
     self.assertEqual(cmd.flags['project'], PROJECT)
     self.assertEmpty(cmd.flags['zone'])
 
-  def testSetNodes(self):
-    test_instance = GetTestBigtableInstance()
-    self.enter_context(
-        mock.patch.object(
-            test_instance,
-            '_GetClusters',
-            return_value=_TEST_LIST_CLUSTERS_OUTPUT,
-        )
-    )
-    self.enter_context(
-        mock.patch.object(test_instance, '_Exists', return_value=True)
-    )
-    cmd = self.enter_context(
-        mock.patch.object(vm_util, 'IssueCommand', return_value=[None, None, 0])
-    )
-
-    test_instance._UpdateNodes(6)
-
-    self.assertSequenceEqual(
-        [
-            mock.call(matchers.HASALLOF('--num-nodes', '6')),
-            mock.call(matchers.HASALLOF('--num-nodes', '6')),
-        ],
-        cmd.mock_calls,
-    )
-
-  def testSetNodesSkipsIfCountAlreadyCorrect(self):
-    test_instance = GetTestBigtableInstance()
-    self.enter_context(
-        mock.patch.object(
-            test_instance,
-            '_GetClusters',
-            return_value=_TEST_LIST_CLUSTERS_OUTPUT,
-        )
-    )
-    self.enter_context(
-        mock.patch.object(test_instance, '_Exists', return_value=True)
-    )
-    cmd = self.enter_context(
-        mock.patch.object(vm_util, 'IssueCommand', return_value=[None, None, 0])
-    )
-
-    test_instance._UpdateNodes(3)
-
-    cmd.assert_not_called()
-
 
 if __name__ == '__main__':
   unittest.main()