Skip to content

Commit

Permalink
Merge pull request #198 from igsilya/aggregate-resource-usage
Browse files Browse the repository at this point in the history
Add aggregate resource usage graphs
  • Loading branch information
dceara authored Apr 2, 2024
2 parents f70fc1a + 7f09552 commit b64185e
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 16 deletions.
10 changes: 8 additions & 2 deletions do.sh
Original file line number Diff line number Diff line change
Expand Up @@ -406,13 +406,19 @@ function mine_data() {
resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json \
| grep -E 'ovn-tester|ovn-central-az[0-2]-')
python3 ${topdir}/utils/process-stats.py \
resource-usage-report-central.html ${resource_usage_logs}
-o resource-usage-report-central.html ${resource_usage_logs}

# Collecting stats only for 3 workers to avoid bloating the report.
resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json \
| grep ovn-scale | head -3)
python3 ${topdir}/utils/process-stats.py \
resource-usage-report-worker.html ${resource_usage_logs}
-o resource-usage-report-worker.html ${resource_usage_logs}

# Preparing reports for aggregate resource usage.
resource_usage_logs=$(find ${out_dir}/logs -name process-stats.json)
python3 ${topdir}/utils/process-stats.py --aggregate \
-o resource-usage-report-aggregate.html ${resource_usage_logs}

deactivate

popd
Expand Down
11 changes: 9 additions & 2 deletions ovn-fake-multinode-utils/process-monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def monitor(suffix: str, out_file: str, exit_file: str) -> None:
for p in psutil.process_iter():
if any(name in p.name() for name in process_names):
processes.add(p)
elif any(
elif p.name() != 'monitor' and any(
name in part
for part in p.cmdline()
for name in process_names
Expand All @@ -36,7 +36,14 @@ def monitor(suffix: str, out_file: str, exit_file: str) -> None:
tme = time.time()
for p in processes:
try:
name = p.name() + "-" + suffix + "-" + str(p.pid)
name = p.name()
for arg in p.cmdline():
if arg.endswith('.pid') or arg.endswith('.py'):
name = arg.split('/')[-1].split('.')[0]
break

name = name + "|" + suffix + "|" + str(p.pid)

# cpu_percent(seconds) call will block
# for the amount of seconds specified.
cpu = p.cpu_percent(0.5)
Expand Down
116 changes: 104 additions & 12 deletions utils/process-stats.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,118 @@
import argparse
import json
import logging
import os
import pandas as pd
import plotly.express as px
import sys

from datetime import datetime
from typing import Dict, List


FORMAT = '%(asctime)s |%(levelname)s| %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=FORMAT)
log = logging.getLogger(__name__)


def read_file(filename: str) -> Dict:
with open(filename, "r") as file:
return json.load(file)


def resource_stats_generate(filename: str, data: Dict) -> None:
def aggregated(df: pd.DataFrame) -> (pd.DataFrame, int):
column_names = list(df.columns)
value_name = column_names[2]

log.info(f'Pivot and interpolate {value_name} ...')
df = df.pivot_table(
index='Time', columns='Process', values=value_name, aggfunc='mean'
).interpolate(method='time', limit_direction='both')

result = pd.DataFrame(index=df.index)
processes = {p.split('|')[0] for p in df.columns}

log.info(f'Aggregating {value_name} ...')
for p in processes:
df_filtered = df.filter(regex='^' + p)
result[p + '|sum'] = df_filtered.sum(axis=1)
result[p + '|mean'] = df_filtered.mean(axis=1)
result[p + '|max'] = df_filtered.max(axis=1)
result[p + '|min'] = df_filtered.min(axis=1)

result['ovn|sum'] = df.filter(regex=r'^ovn.*\|ovn-(central|scale).*').sum(
axis=1
)
ovn_max = result['ovn|sum'].astype('int').max()

result['ovs|sum'] = df.filter(regex=r'^ovs.*\|ovn-(central|scale).*').sum(
axis=1
)

result = result.astype('int').reset_index().melt(id_vars=['Time'])
result.columns = column_names
result = result.sort_values(['Process', 'Time'])

return result, ovn_max


def resource_stats_generate(
filename: str, data: Dict, aggregate: bool
) -> None:
rss: List[List] = []
cpu: List[List] = []

log.info('Preprocessing ...')
for ts, time_slice in sorted(data.items()):
tme = pd.Timestamp.fromtimestamp(float(ts)).round('1s')
for name, res in time_slice.items():
tme = datetime.fromtimestamp(float(ts))
rss_mb = int(res['rss']) >> 20
rss.append([tme, name, rss_mb])
cpu.append([tme, name, float(res['cpu'])])

log.info('Creating DataFrame ...')
df_rss = pd.DataFrame(rss, columns=['Time', 'Process', 'RSS (MB)'])
df_cpu = pd.DataFrame(cpu, columns=['Time', 'Process', 'CPU (%)'])

if aggregate:
df_rss, max_sum_rss = aggregated(df_rss)
df_cpu, max_sum_cpu = aggregated(df_cpu)

log.info('Creating charts ...')
rss_chart = px.line(
df_rss,
x='Time',
y='RSS (MB)',
color='Process',
title='Resident Set Size',
title=('Aggregate ' if aggregate else '') + 'Resident Set Size',
)
cpu_chart = px.line(
df_cpu, x='Time', y='CPU (%)', color='Process', title='CPU usage'
df_cpu,
x='Time',
y='CPU (%)',
color='Process',
title=('Aggregate ' if aggregate else '') + 'CPU usage',
)

log.info(f'Writing HTML to {filename} ...')
with open(filename, 'w') as report_file:
report_file.write('<html>')
if aggregate:
report_file.write(
f'''
<table border="1" class="dataframe">
<tbody>
<tr>
<td>Max(Sum(OVN RSS))</td>
<td> {max_sum_rss} MB</td>
</tr>
<tr>
<td>Max(Sum(OVN CPU))</td>
<td> {max_sum_cpu} %</td>
</tr>
</tbody>
</table>
'''
)
report_file.write(
rss_chart.to_html(
full_html=False,
Expand All @@ -60,17 +133,36 @@ def resource_stats_generate(filename: str, data: Dict) -> None:


if __name__ == '__main__':
if len(sys.argv) < 3:
print(f'Usage: {sys.argv[0]} output-file input-file [input-file ...]')
sys.exit(1)
parser = argparse.ArgumentParser(
description='Generate resource usage charts.'
)
parser.add_argument(
'--aggregate', action='store_true', help='generate aggregate charts'
)
parser.add_argument(
'-o', '--output', required=True, help='file to write an HTML result'
)
parser.add_argument(
'input_files',
metavar='input-file',
type=str,
nargs='+',
help='JSON file with recorded process statistics',
)

if os.path.isfile(sys.argv[1]):
print(f'Output file {sys.argv[1]} already exists')
args = parser.parse_args()

if os.path.isfile(args.output):
log.fatal(f'Output file {args.output} already exists')
sys.exit(2)

log.info(f'Processing stats from {len(args.input_files)} files.')

log.info('Reading ...')
data: Dict = {}
for f in sys.argv[2:]:
for f in args.input_files:
d = read_file(f)
data.update(d)

resource_stats_generate(sys.argv[1], data)
resource_stats_generate(args.output, data, args.aggregate)
log.info('Done.')

0 comments on commit b64185e

Please sign in to comment.