From e45a0fa3e4d45a41ef3c7a5344e65d8da9cdc151 Mon Sep 17 00:00:00 2001
From: Brandon Liu <liubrandon@google.com>
Date: Wed, 10 Jan 2024 09:36:34 -0800
Subject: [PATCH] Use the CPU utilization of the hottest cluster rather than
 the average of the whole instance.

PiperOrigin-RevId: 597274776
---
 perfkitbenchmarker/providers/gcp/gcp_bigtable.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/perfkitbenchmarker/providers/gcp/gcp_bigtable.py b/perfkitbenchmarker/providers/gcp/gcp_bigtable.py
index 05798bef4d..a3e5903cfc 100644
--- a/perfkitbenchmarker/providers/gcp/gcp_bigtable.py
+++ b/perfkitbenchmarker/providers/gcp/gcp_bigtable.py
@@ -495,7 +495,7 @@ def _Restore(self) -> None:
   def GetAverageCpuUsage(
       self, duration_minutes: int, end_time: datetime.datetime
   ) -> float:
-    """Gets the average CPU usage for the cluster.
+    """Gets the average CPU usage for the instance.
 
     Note that there is a delay for the API to get data, so this returns the
     average CPU usage in the period ending at `end_time` with missing data
@@ -507,7 +507,8 @@ def GetAverageCpuUsage(
       end_time: The ending timestamp of the workload.
 
     Returns:
-      The average CPU usage during the time period.
+      The average CPU usage during the time period. In the case of multiple
+      clusters, this returns the average CPU of the hottest cluster.
     """
     if duration_minutes * 60 <= CPU_API_DELAY_SECONDS:
       raise ValueError(
@@ -530,7 +531,7 @@ def GetAverageCpuUsage(
     cpu_query = cpu_query.select_resources(instance=self.name)
     time_series = list(cpu_query)
 
-    instance_total_utilization = 0.0
+    instance_utilization_by_cluster = []
     for cluster_time_series in time_series:
       cluster_total_utilization = 0.0
       cluster_name = cluster_time_series.resource.labels['cluster']
@@ -550,10 +551,13 @@ def GetAverageCpuUsage(
           cluster_name,
           cluster_average_utilization,
       )
-      instance_total_utilization += cluster_average_utilization
+      instance_utilization_by_cluster.append(cluster_average_utilization)
 
-    average_utilization = instance_total_utilization / len(time_series)
-    logging.info('Instance average CPU utilization: %s', average_utilization)
+    average_utilization = max(instance_utilization_by_cluster)
+    logging.info(
+        'Instance average CPU utilization (hottest cluster): %s',
+        average_utilization,
+    )
     return average_utilization
 
   def CalculateTheoreticalMaxThroughput(