From 7f0955206323b79fa7d7c18620d93a2c4cf4c1e8 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 27 Mar 2024 02:34:40 +0100 Subject: [PATCH] process-stats: Add support for graphs with aggregate resource usage. We take all the stats and generate sum/mean/max/min graphs per process type as well as an extra graph for all OVN processes together and another one for all OVS processes together. Resulted HTML also contains the peak of the sum of RSS and the peak of the sum of CPU usage for all OVN processes together. Signed-off-by: Ilya Maximets --- do.sh | 10 +++- utils/process-stats.py | 116 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 112 insertions(+), 14 deletions(-) diff --git a/do.sh b/do.sh index ef2845ab..9d94e03a 100755 --- a/do.sh +++ b/do.sh @@ -406,13 +406,19 @@ function mine_data() { resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json \ | grep -E 'ovn-tester|ovn-central-az[0-2]-') python3 ${topdir}/utils/process-stats.py \ - resource-usage-report-central.html ${resource_usage_logs} + -o resource-usage-report-central.html ${resource_usage_logs} # Collecting stats only for 3 workers to avoid bloating the report. resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json \ | grep ovn-scale | head -3) python3 ${topdir}/utils/process-stats.py \ - resource-usage-report-worker.html ${resource_usage_logs} + -o resource-usage-report-worker.html ${resource_usage_logs} + + # Preparing reports for aggregate resource usage. + resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json) + python3 ${topdir}/utils/process-stats.py --aggregate \ + -o resource-usage-report-aggregate.html ${resource_usage_logs} + deactivate popd diff --git a/utils/process-stats.py b/utils/process-stats.py index 40dbeb0e..5ffa4e51 100644 --- a/utils/process-stats.py +++ b/utils/process-stats.py @@ -1,45 +1,118 @@ +import argparse import json +import logging import os import pandas as pd import plotly.express as px import sys -from datetime import datetime from typing import Dict, List +FORMAT = '%(asctime)s |%(levelname)s| %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=FORMAT) +log = logging.getLogger(__name__) + + def read_file(filename: str) -> Dict: with open(filename, "r") as file: return json.load(file) -def resource_stats_generate(filename: str, data: Dict) -> None: +def aggregated(df: pd.DataFrame) -> (pd.DataFrame, int): + column_names = list(df.columns) + value_name = column_names[2] + + log.info(f'Pivot and interpolate {value_name} ...') + df = df.pivot_table( + index='Time', columns='Process', values=value_name, aggfunc='mean' + ).interpolate(method='time', limit_direction='both') + + result = pd.DataFrame(index=df.index) + processes = {p.split('|')[0] for p in df.columns} + + log.info(f'Aggregating {value_name} ...') + for p in processes: + df_filtered = df.filter(regex='^' + p) + result[p + '|sum'] = df_filtered.sum(axis=1) + result[p + '|mean'] = df_filtered.mean(axis=1) + result[p + '|max'] = df_filtered.max(axis=1) + result[p + '|min'] = df_filtered.min(axis=1) + + result['ovn|sum'] = df.filter(regex=r'^ovn.*\|ovn-(central|scale).*').sum( + axis=1 + ) + ovn_max = result['ovn|sum'].astype('int').max() + + result['ovs|sum'] = df.filter(regex=r'^ovs.*\|ovn-(central|scale).*').sum( + axis=1 + ) + + result = result.astype('int').reset_index().melt(id_vars=['Time']) + result.columns = column_names + result = result.sort_values(['Process', 'Time']) + + return result, ovn_max + + +def resource_stats_generate( + filename: str, data: Dict, aggregate: bool +) -> None: rss: List[List] = [] cpu: List[List] = [] + log.info('Preprocessing ...') for ts, time_slice in sorted(data.items()): + tme = pd.Timestamp.fromtimestamp(float(ts)).round('1s') for name, res in time_slice.items(): - tme = datetime.fromtimestamp(float(ts)) rss_mb = int(res['rss']) >> 20 rss.append([tme, name, rss_mb]) cpu.append([tme, name, float(res['cpu'])]) + log.info('Creating DataFrame ...') df_rss = pd.DataFrame(rss, columns=['Time', 'Process', 'RSS (MB)']) df_cpu = pd.DataFrame(cpu, columns=['Time', 'Process', 'CPU (%)']) + if aggregate: + df_rss, max_sum_rss = aggregated(df_rss) + df_cpu, max_sum_cpu = aggregated(df_cpu) + + log.info('Creating charts ...') rss_chart = px.line( df_rss, x='Time', y='RSS (MB)', color='Process', - title='Resident Set Size', + title=('Aggregate ' if aggregate else '') + 'Resident Set Size', ) cpu_chart = px.line( - df_cpu, x='Time', y='CPU (%)', color='Process', title='CPU usage' + df_cpu, + x='Time', + y='CPU (%)', + color='Process', + title=('Aggregate ' if aggregate else '') + 'CPU usage', ) + log.info(f'Writing HTML to {filename} ...') with open(filename, 'w') as report_file: report_file.write('') + if aggregate: + report_file.write( + f''' + + + + + + + + + + + +
Max(Sum(OVN RSS)) {max_sum_rss} MB
Max(Sum(OVN CPU)) {max_sum_cpu} %
+ ''' + ) report_file.write( rss_chart.to_html( full_html=False, @@ -60,17 +133,36 @@ def resource_stats_generate(filename: str, data: Dict) -> None: if __name__ == '__main__': - if len(sys.argv) < 3: - print(f'Usage: {sys.argv[0]} output-file input-file [input-file ...]') - sys.exit(1) + parser = argparse.ArgumentParser( + description='Generate resource usage charts.' + ) + parser.add_argument( + '--aggregate', action='store_true', help='generate aggregate charts' + ) + parser.add_argument( + '-o', '--output', required=True, help='file to write an HTML result' + ) + parser.add_argument( + 'input_files', + metavar='input-file', + type=str, + nargs='+', + help='JSON file with recorded process statistics', + ) - if os.path.isfile(sys.argv[1]): - print(f'Output file {sys.argv[1]} already exists') + args = parser.parse_args() + + if os.path.isfile(args.output): + log.fatal(f'Output file {args.output} already exists') sys.exit(2) + log.info(f'Processing stats from {len(args.input_files)} files.') + + log.info('Reading ...') data: Dict = {} - for f in sys.argv[2:]: + for f in args.input_files: d = read_file(f) data.update(d) - resource_stats_generate(sys.argv[1], data) + resource_stats_generate(args.output, data, args.aggregate) + log.info('Done.')