-
Notifications
You must be signed in to change notification settings - Fork 6
/
merge.py
executable file
·401 lines (320 loc) · 14.8 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
#!/usr/bin/env python3
import json, struct, shutil, tempfile, subprocess, re, sys, argparse
from pathlib import Path
verbose = True
def log(val):
if verbose:
print(val)
UFTRACE_PATH = "uftrace"
def parse_uftrace(uftracedir):
""" parses uftrace trace to get chrome json file """
print(f"Converting traces from {uftracedir}")
uftrace_cmd = [UFTRACE_PATH, 'dump', "--chrome"]
js = subprocess.check_output(uftrace_cmd, cwd=uftracedir)
trace = json.loads(js)
return trace
def parse_perf_trace(perf_trace_file):
""" parses a perf-recording of kvm-events
perf CLOCK IS MISALIGNED. use trace-cmd instead!
# enable tracing access for ALL users (else perf needs sudo)
sudo sysctl kernel.perf_event_paranoid=-1
# record all kvm events
perf record -e 'kvm:*' -a sleep 1h
perf script -F trace:time,event --ns
"""
perf_cmd = ['perf', 'script', '-F', 'trace:time,event', '--ns', '-i', perf_trace_file]
events = subprocess.check_output(perf_cmd).split(b"\n")
print(f"Parsing {len(events)} KVM events")
out = [{'pid':1, 'ts':int(e[:15].replace(b".",b""))/1000, 'ph':'i','name':e[16:-2].strip().decode()} for e in events if e]
return out
def parse_tracecmd_trace(trace_cmd_trace_file):
""" parses a trace-cmd recording of kvm-events
sudo trace-cmd record -e 'kvm:*' -C x86-tsc
"""
print(f"Parsing kvm traces from {trace_cmd_trace_file}")
# the full kvmtrace can easily be >1gb file. Even with sed as filtering we get ~4gb output.
trace_cmd = ['trace-cmd', 'report', '-q', '-i', trace_cmd_trace_file]
trace = subprocess.Popen(trace_cmd, stdout=subprocess.PIPE)
# align so that we have csv of time,func. Sed is faster and memory friendlier than python here
# <...>-105662 [002] 117471684343752: kvm_update_master_clock: masterclock 0 hostclock tsc offsetmatched 0
# qemu-system-x86-20667 [010] 15813659115036: kvm_exit: reason EXIT_IOIO rip 0xec08e info cf80140 ec08f
sed = ['sed', '-E', r's/.*\[[0-9][0-9][0-9]] ([^ ]*): ([^ ]*):.*/\1,\2/']
events = subprocess.check_output(sed, stdin = trace.stdout).split(b"\n")
trace.wait()
log("events like:")
log(events[:10])
# cut off header
offset = next(i for i,e in enumerate(events) if b"kvm_" in e)
log('events look like:')
log(events[offset])
print(f"Parsing {len(events)} KVM events")
out = []
in_kvm = True # first event will be a "begin" function event, so we dont get broken frames
for e in events[offset:]:
pts = e.strip().split(b",")
if len(pts) < 2:
continue
#name = pts[3][:-1].decode()
#ts = int(pts[2][:-1])
ts = int(pts[0]) / 1000.0
name = pts[1].decode()
# make kvm-exit and entry special, so we see the time it is exited. all others get 300ns duration bars
if name == "kvm_exit" and in_kvm:
tp = "B" # entry to kvm-host
name = "kvm exited"
in_kvm = False
elif name == "kvm_entry" and not in_kvm:
tp = "E" # exit from kvm-host
name = "kvm exited"
in_kvm = True
else:
tp = "X" # generic kvm event.
out.append({
'pid':77,
'tid':77,
'ts':ts,
'ph':tp, # i = instant event, too small to see.. X = duration event
'dur':0.3, # gets ignored if we are in entry/exit case
'name': name
})
return out
def get_offset():
if args.offset == 'auto':
print("Trying to autodetect offset..")
# log line where tsc_offset is set
last_tsc = None
# if we have a kvm trace, try that for an offset
if args.kvm:
# trace-cmd report -q -i tracekvm.dat| grep kvm_write_tsc_offset
trace_cmd = ['trace-cmd', 'report', '-q', '-i', args.kvm, '-F', 'kvm_write_tsc_offset']
tscs = subprocess.check_output(trace_cmd).split(b"\n")
if len(tscs) > 2:
print("Using kvm trace as offset source")
log(tscs)
last_tsc = tscs[-2].decode()
# try directly acessing kernel trace buffer instead
if not last_tsc:
try:
with open('/sys/kernel/debug/tracing/instances/tsc_offset/trace') as f:
trace = f.read().split("\n")
last_tsc = trace[-2]
except:
print("Could not determine offset from kernel tracing. If you intended this, please setup with `setup_kvm_tracing.sh`")
# if any method suceeded in getting tsc log line, parse timestamp
if last_tsc:
r = re.search(r"kvm_write_tsc_offset.*next=(\d*)", last_tsc)
if len(r.groups()) == 0:
print(f"Cannot parse correct offset from tracing: {last_tsc}!")
sys.exit(-1)
offset_raw = int(r.groups()[0])
else:
print("Could not determine offset from either source!`")
offset_raw = 0
# reinterpret offset u64 as i64
offset = struct.unpack('q', struct.pack('Q', offset_raw))[0]
print(f"Determined offset as {offset_raw} == {offset}")
else:
offset = int(args.offset)
return offset
def fixup_tids(trace, target_tid):
# if an event has no tid, set it to target_tid
for e in trace['traceEvents']:
if not "tid" in e:
e['tid'] = target_tid
def fixup_missing_starts(trace, fts, lts):
# go through trace backwards, keep track of current function stack
# everytime we pass exit, we push onto stack, on entry we pop again
# we have separate stacks for each tid/pid pair!
new_events = []
stacks = {}
for e in trace['traceEvents'][::-1]:
if e['ph'] not in ['E', 'B']:
# only affects entry and exit events
continue
if 'tid' not in e:
print("TID NOT FOUND!")
print(e)
key = (e['pid'], e['tid'])
# get stack or set if not exists yet
stack = stacks.setdefault(key, [])
if e['ph'] == "E": # exit
stack.append(e['name'])
elif e['ph'] == "B": # entry
if len(stack) > 0:
func = stack.pop()
if(e['name'] != func):
log(f"Callstack misaligned! expected {func} got")
log(e)
else:
log("Entered function which we never exited!")
log(e)
new_events.append({
'pid':e['pid'],
'tid':e['tid'],
'ts':lts,
'ph':"E",
'name': e['name']
})
for (pid, tid), names in stacks.items():
for name in names:
log(f"Exited function which we never entered: {name}!")
new_events.append({
'pid':pid,
'tid':tid,
'ts':fts,
'ph':"B",
'name': name
})
log(new_events)
trace['traceEvents'] += new_events
# parses the counters output by perf with a command like
# sudo perf stat -p $(pgrep -f "qemu-system.*iozone") -I 100 -e cycles:G,kvm:kvm_exit -x\#
# optionally: place "rdtsc 0000000" in the first line to have better offsets!
# returns events, with TS in seconds, not tsc!
def parse_perf_counters(filename, starttime, conversion_fac):
# 15.127678049#114116639##cycles:G#107341775#100.00##
# 15.127678049#31961##kvm:kvm_exit#107369492#100.00##
new_events = []
with open(filename, "r") as f:
# try to get better rdtsc, if one is available in counter file.
head = f.readline()
if "rdtsc" in head:
# we have to adjust start time to seconds here, since perf counters already is in seconds since start!
starttime = int(head.split(" ")[1])/1000 * conversion_fac
log(f"Updated perf start time from rdtsc-line to {starttime}")
f.readline() # also skip next line. this is the usual file header
else:
print("No accurate timestamp alignment possible for perf counters! They will likely be offset by some tens of ms.")
for line in f.readlines()[1:]:
fields = line.strip().split('#')
if "not counted" in fields[1]:
# could not sample here? Just continue..
continue
time = float(fields[0])
count = int(fields[1])
name = fields[3]
counter_runtime = int(fields[4])
counter_percentage = float(fields[5]) # time the counter was running if limited available
# upscale count by counter_percentage
count = count / (counter_percentage / 100.0)
new_events.append({
'pid': 1,
'ts':starttime + time*1000000, # convert to us
'ph':"C",
'name': name,
'args': {name:count},
})
#print(new_events)
return new_events
def merge():
# determine the timestamp offset of the guest vm
offset = get_offset()
# if binary is specified, generate symbols for trace
if args.binary:
print("Generating symbols with nm")
nm_cmd = ['nm', '-n', args.binary]
with open(f"{args.TRACE}/{args.binaryname}.sym", "w") as symbolfile:
subprocess.run(nm_cmd, stdout=symbolfile)
else:
print("No binary specified, not generating any symbols!")
hermit_trace = parse_uftrace(args.TRACE)
if args.merge:
merge_trace = parse_uftrace(args.merge)
# uftrace might not include tid data, which will crash the tracy importer.
# fix up now
fixup_tids(merge_trace, 1234)
else:
merge_trace = None
if offset:
print(f"Offseting guest traces by {offset} counts")
for event in hermit_trace["traceEvents"]:
if event['ts'] > 0:
event['ts'] -= offset/1000
else:
print("Not offsetting:", event)
# perf/kvm traces
if perf_kvm_trace:
kvm_events = parse_perf_trace(perf_kvm_trace)
if args.kvm:
kvm_events = parse_tracecmd_trace(args.kvm)
# get first and last timestamp of our trace-cmd trace. Not the best but seems to work
# needed later to fix some stuff up, like aligning perf stat samples.
lts = kvm_events[-1]['ts']
fts = kvm_events[0]['ts']
log(f"Min/Max ts: {fts} {lts}")
else:
lts = None
fts = None
# merging
print("Merging traces")
out = hermit_trace
if merge_trace:
out['traceEvents'] += merge_trace['traceEvents']
if args.kvm:
out['traceEvents'] += kvm_events
# TSC -> ns conversion
tsc_khz = args.freq
if tsc_khz:
conversion_fac = 1000000.0/tsc_khz * time_stretch
print(f"Converting time to ns with tsc_khz={tsc_khz}")
for event in out["traceEvents"]:
event['ts'] *= conversion_fac
if time_stretch != 1:
print(f"Time is off by a factor of {time_stretch}!")
# also adapt first/last trace-cmd timestamps
fts *= conversion_fac
lts *= conversion_fac
# add perf traces after time conversion, since they are already in ns
if perf_kvm_trace:
if not tsc_khz:
print("Error: perf is nanosecond aligned -> need to specify tsc_khz for conversion!")
sys.exit(-1)
out['traceEvents'] += kvm_events
# get first timestamp if still missing. will misalign?
if not fts:
print("Using hacky first/last timestamp method. This will likely misalign counters?")
fts = min(out['traceEvents'], key=lambda x: x['ts'] if x['ts'] > 0 else 999999999999999)['ts']
log(f"Got fts as {fts}")
# perf stat trace
if args.perf:
print(f"Adding perf stat counters")
# we simply assume that perf started with the first timestamp we have.
# this is wrong byte the time from perf start to kvm start.
# but since perf is sampling with ~10hz, this does not matter much.
# seems to be offset by around 50ms, so just correct for this.
# this gets overwritten if we provide an 'rdtsc ' line in the perf counter file!
kvm_start_offset = 50000
out['traceEvents'] += parse_perf_counters(args.perf, fts-kvm_start_offset, conversion_fac)
# filter out unrelevant traces (outside of qemu runtime) if specified.
if args.filter:
if not args.kvm:
print("ERROR: You have to specify a kvm trace to filter! Ignoring option.")
else:
out['traceEvents'] = list(filter(lambda e: e['ts'] >= fts and e['ts'] <= lts, out['traceEvents']))
print('Fixing missing start entries if necessary')
fixup_missing_starts(out, fts, lts)
print(f"Saving merged trace to {args.OUTPUT}!")
with open(args.OUTPUT, 'w') as f:
json.dump(out, f)
def do_uftrace():
print(f"uftrace mode, ignoring most options!")
Path(args.OUTPUT).mkdir(parents=True, exist_ok=True)
create_fake_uftrace(args.OUTPUT, args.TRACE, args.binary)
print(f"You can view a replay of the trace with `uftrace replay -d {args.OUTPUT}`")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Merge traces (and/or fix timestamps). Outputs a chrome json trace file.')
parser.add_argument("TRACE", help="path to one or more guest trace files, as output by the tracing crate")
parser.add_argument("OUTPUT", help="file or folder where output gets stored")
parser.add_argument("-b", "--binary", help="path to guest binary, used to generate the symbols of the guest trace")
parser.add_argument("-B", "--binaryname", help="name of guest binary, has to match fake uftrace metadata", default="test")
parser.add_argument("-O", "--offset", help="guest <-> host TSC offset. If 'auto', determines it from a) given KVM trace or b) linux tracing", default="auto")
parser.add_argument("-m", "--merge", help="merge with additional trace, recorded on the host. Has to be a patched-uftrace trace (with TSC time)")
parser.add_argument("-k", "--kvm", help="path to the trace-cmd trace of kvm samples (trace-cmd record -e 'kvm:*' -C x86-tsc)")
parser.add_argument("-f", "--freq", help="TSC frequency in khz. This is approx. your cpu frequency. If specified, outputs timestamps into nanoseconds.", type=float)
parser.add_argument("-F", "--filter", action="store_true", help="filter out all events which happened before kvm started. Need to specify kvm trace!")
parser.add_argument("-p", "--perf", help="Additional perf stat file for graphs in trace. (perf stat -I 100 -e cycles:G -x\#)")
args = parser.parse_args()
# perf KVM Traces have unreliable timestamps, not exposed via argument
perf_kvm_trace = None
time_stretch = 1 # can stretch time for better zooming in perfetto ui
merge()