From e45a0fa3e4d45a41ef3c7a5344e65d8da9cdc151 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 10 Jan 2024 09:36:34 -0800 Subject: [PATCH] Use the CPU utilization of the hottest cluster rather than the average of the whole instance. PiperOrigin-RevId: 597274776 --- perfkitbenchmarker/providers/gcp/gcp_bigtable.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/perfkitbenchmarker/providers/gcp/gcp_bigtable.py b/perfkitbenchmarker/providers/gcp/gcp_bigtable.py index 05798bef4d..a3e5903cfc 100644 --- a/perfkitbenchmarker/providers/gcp/gcp_bigtable.py +++ b/perfkitbenchmarker/providers/gcp/gcp_bigtable.py @@ -495,7 +495,7 @@ def _Restore(self) -> None: def GetAverageCpuUsage( self, duration_minutes: int, end_time: datetime.datetime ) -> float: - """Gets the average CPU usage for the cluster. + """Gets the average CPU usage for the instance. Note that there is a delay for the API to get data, so this returns the average CPU usage in the period ending at `end_time` with missing data @@ -507,7 +507,8 @@ def GetAverageCpuUsage( end_time: The ending timestamp of the workload. Returns: - The average CPU usage during the time period. + The average CPU usage during the time period. In the case of multiple + clusters, this returns the average CPU of the hottest cluster. """ if duration_minutes * 60 <= CPU_API_DELAY_SECONDS: raise ValueError( @@ -530,7 +531,7 @@ def GetAverageCpuUsage( cpu_query = cpu_query.select_resources(instance=self.name) time_series = list(cpu_query) - instance_total_utilization = 0.0 + instance_utilization_by_cluster = [] for cluster_time_series in time_series: cluster_total_utilization = 0.0 cluster_name = cluster_time_series.resource.labels['cluster'] @@ -550,10 +551,13 @@ def GetAverageCpuUsage( cluster_name, cluster_average_utilization, ) - instance_total_utilization += cluster_average_utilization + instance_utilization_by_cluster.append(cluster_average_utilization) - average_utilization = instance_total_utilization / len(time_series) - logging.info('Instance average CPU utilization: %s', average_utilization) + average_utilization = max(instance_utilization_by_cluster) + logging.info( + 'Instance average CPU utilization (hottest cluster): %s', + average_utilization, + ) return average_utilization def CalculateTheoreticalMaxThroughput(