-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.py
executable file
·466 lines (407 loc) · 16.6 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
#!/usr/bin/python3
import subprocess
import signal
from subprocess import CalledProcessError, TimeoutExpired
from threading import Timer
import argparse
import pathlib
import time
import json
import sys
import re
import os
NORMAL='\033[0m'
GREEN='\033[32m'
WHITE='\033[97m'
RED='\033[91m'
YELLOW='\033[33m'
BS='\010'
CR='\015'
GRAY_BACKGROUND='\u001b[40;1m'
def stream_read_json(file_name):
start_pos = 0
with open(file_name, 'r') as f:
while True:
try:
obj = json.load(f)
yield obj
return
# except json.JSONDecodeError as e:
# f.seek(start_pos)
# json_str = f.read(e.pos)
# obj = json.loads(json_str)
# start_pos += e.pos
# yield obj
except ValueError as e:
f.seek(start_pos)
end_pos = int(re.match('Extra data: line \d+ column \d+ .*\(char (\d+).*\)',
e.args[0]).groups()[0])
json_str = f.read(end_pos)
obj = json.loads(json_str)
start_pos += end_pos
yield obj
def drbdsetup_down(vm):
cmd = 'drbdsetup down all'
args = ['ssh', 'root@' + vm, cmd]
subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, timeout=30)
def unmount_dev(vm, dev):
cmd = 'umount ' + dev
args = ['ssh', 'root@' + vm, cmd]
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise Exception(stderr)
def is_process_running(vm, pid):
cmd = 'test -d /proc/' + pid
args = ['ssh', 'root@' + vm, cmd]
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p.communicate()
return p.returncode == 0
def kill_process(vm, pid):
cmd = 'kill -9 ' + pid
args = ['ssh', 'root@' + vm, cmd]
attempt = 0
last_stderr = ''
while attempt < 3:
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if not is_process_running(vm, pid):
break
print(' Process was not killed on attempt {0}, retrying'.format(attempt+1))
attempt += 1
else:
raise Exception()
def kill_node(vm):
cmd = '{ sleep 0.5; echo -e "s\nb" > /proc/sysrq-trigger; } > /dev/null &'
args = ['ssh', 'root@' + vm, cmd]
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
raise Exception(stderr)
def handle_down_error(vm, e):
"""
handle an error produced by 'drbdsetup down'
"""
if e.returncode == 11:
# held open by someone
regexPid = r'^(.*) opened by (.*) \(pid (\d*)\) at .*$'
r = re.search(regexPid, str(e.stderr))
dev = r.group(1)
procname = r.group(2)
pid = r.group(3)
# special case: if 'mount' is holding the device open, we need to unmount it
if procname == 'mount':
print(' Device is mounted, unmounting {0}'.format(dev))
unmount_dev(vm, dev)
else:
print(' Device is being held open, killing {0} (pid {1})'.format(procname, pid))
kill_process(vm, pid)
else:
raise Exception('Unknown drbdsetup down exception: {0}'.format(e))
# let's try this again...
drbdsetup_down(vm)
def try_down_all(vm, all_vm_names):
try:
drbdsetup_down(vm)
except CalledProcessError as e:
handle_down_error(vm, e)
def cleanup_and_prepare_vm(vm, all_vm_names, parallel=False):
try_down_all(vm, all_vm_names)
cmd = """
rmmod drbd_transport_tcp 2>/dev/null || true
rmmod drbd 2>/dev/null || true
modprobe crc32c 2>/dev/null || true
UNR=$(uname -r)
if test -e /lib/modules/$UNR/updates/drbd.ko; then
insmod /lib/modules/$UNR/updates/drbd.ko
insmod /lib/modules/$UNR/updates/drbd_transport_tcp.ko
elif test -e /lib/modules/$UNR/weak-updates/drbd/drbd.ko; then
insmod /lib/modules/$UNR/weak-updates/drbd/drbd.ko
insmod /lib/modules/$UNR/weak-updates/drbd/drbd_transport_tcp.ko
elif test -e /lib/modules/$UNR/extra/drbd/drbd.ko; then
insmod /lib/modules/$UNR/extra/drbd/drbd.ko
insmod /lib/modules/$UNR/extra/drbd/drbd_transport_tcp.ko
else
echo "drbd.ko is in an unknown location, cannot load"
exit 1
fi
"""
p = subprocess.Popen(['ssh', 'root@' + vm, cmd])
if not parallel:
p.wait()
if p.returncode != 0:
raise subprocess.CalledProcessError(p.returncode, 'ssh')
return p
def cleanup_and_prepare_vms(vm_names):
processes = []
print("Preparing", end='')
for vm in vm_names:
print(" %s" % (vm), end='')
processes.append(cleanup_and_prepare_vm(vm, vm_names, parallel=True))
sys.stdout.flush()
for p in processes:
p.wait()
if p.returncode != 0:
raise subprocess.CalledProcessError(p.returncode, 'ssh')
def run_with_progress(args, statistic = None):
N_CHARS = 10
TIMEOUT_SEC = 5*60 # 5 minutes
start = time.time()
p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid)
timeout = False
def do_timeout():
nonlocal timeout
os.killpg(p.pid, signal.SIGKILL)
timeout = True
timer = Timer(TIMEOUT_SEC, do_timeout)
timer.start()
now = start
while True:
before = now
line = p.stdout.readline()
now = time.time()
if not line:
break
if now - before > 0.1: # 100ms since last update
time_diff = now - start
if statistic is not None and 'average_runtime' in statistic:
N_CHARS = len('123.1 secs (100.0%)')
percent = min(time_diff / statistic['average_runtime'] * 100, 100)
secs = ('%5.1f secs (%5.1f%%)' % (time_diff, percent))
else:
N_CHARS = len('123.1 secs')
secs = ('%5.1f secs' % (time_diff))
print(secs + (BS * N_CHARS), end='')
sys.stdout.flush()
print(' ' * N_CHARS + BS * N_CHARS, end='')
sys.stdout.flush()
p.wait()
timer.cancel()
now = time.time()
return {'exit_code': p.returncode, 'run_time': now - start, 'timeout': timeout}
def list_LVs(vm):
p = subprocess.run(['ssh', 'root@' + vm, 'lvs --noheadings -o lv_path'],
check=True, stdout=subprocess.PIPE)
lvs = [lv.strip() for lv in p.stdout.decode('utf-8').splitlines()]
return lvs
def remove_excess_LVs(orig_lvs, vm, all_vm_names):
lvs = list_LVs(vm)
for lv in lvs:
if not lv in orig_lvs:
cleanup_and_prepare_vm(vm, all_vm_names)
subprocess.run(['ssh', 'root@' + vm, 'lvremove -f %s' % (lv)], check=True)
def cleanup(original_lvs, all_vm_names):
for vm in all_vm_names:
remove_excess_LVs(original_lvs[vm], vm, all_vm_names)
def generate_test_set(tests, n_vms):
test_sets = {}
if not test_sets:
for test in tests:
p = subprocess.run(["tests/" + test, '--report-and-quit'],
stdout=subprocess.PIPE, check=True)
[min_nodes_str, max_nodes_str] = p.stdout.decode('utf-8').splitlines()
min_nodes = int(re.findall(r'min_nodes=(\d+)', min_nodes_str)[0])
max_nodes = int(re.findall(r'max_nodes=(\d+)', max_nodes_str)[0])
if n_vms >= min_nodes:
use_vms = min(n_vms, max_nodes)
if use_vms in test_sets:
test_sets[use_vms].append(test)
else:
test_sets[use_vms] = [test]
else:
print("Can not run %s with %d VMs, need %d at minimum" %
(WHITE + test + NORMAL, n_vms, min_nodes))
for n in iter(test_sets):
result = {}
result['vms'] = n
result['tests'] = test_sets[n]
yield result
def run_tests(test_set_iter, all_vm_names, exclude_tests, keep_going, num_skip, statistics):
original_lvs = {}
results = {}
global_exit_code = 0
num_successful = 0
num_total = 0
num_skipped = 0
for vm in all_vm_names:
original_lvs[vm] = list_LVs(vm)
for test_set in test_set_iter:
if len(all_vm_names) < test_set['vms']:
continue
vm_names = all_vm_names[:test_set['vms']]
for test in test_set['tests']:
if test in exclude_tests:
continue
if num_total + num_skipped < num_skip:
print(CR + '%2d skipping %s' % (num_total + num_skipped, WHITE + test + NORMAL))
num_skipped += 1
continue
cleanup_and_prepare_vms(vm_names)
print(CR + '%2d running %s on %s: ' %
(num_total + num_skipped, WHITE + test + NORMAL, ', '.join(vm_names)), end='')
num_total += 1
result = run_with_progress(["tests/" + test, '--cleanup=always', *vm_names], statistics.get(test))
result['nodes'] = len(vm_names)
results[test] = result
exit_code = result.get('exit_code')
timeout = result.get('timeout')
success_rate = ''
if test in statistics:
rate = statistics[test].get('success_rate') * 100
success_rate = ' (%.1f%% success rate)' % (rate)
if timeout:
print(RED + 'TIMEOUT' + NORMAL + success_rate)
cleanup(original_lvs, all_vm_names)
if keep_going:
# just return 1 for now
global_exit_code = 1
else:
return (exit_code, results, num_successful, num_total)
else:
if exit_code == 0:
print(GREEN + 'OKAY' + NORMAL + success_rate)
num_successful += 1
else:
print(RED + 'FAILED' + NORMAL + success_rate)
cleanup(original_lvs, all_vm_names)
if keep_going:
# just return 1 for now
global_exit_code = 1
else:
return (exit_code, results, num_successful, num_total)
return (global_exit_code, results, num_successful, num_total)
def collect_software_versions(all_vm_names):
vm = all_vm_names[0]
cleanup_and_prepare_vm(vm, all_vm_names)
p = subprocess.run(['ssh', 'root@' + vm, 'uname -r; cat /proc/drbd'],
check=True, stdout=subprocess.PIPE)
s = p.stdout.decode('utf-8')
[(kern_ver, drbd_ver, drbd_git)] = re.findall(
r'([^\s]+)\nversion: ([\d\.-]+)[^G]*GIT-hash: ([0-9a-f]+)', s)
return ( kern_ver, drbd_ver, drbd_git)
def find_statistics(result_db, statistics):
total_times = {}
total_successes = {}
for result in result_db:
for name, params in result.get('results').items():
if name not in total_successes:
total_successes[name] = {'successes': 0, 'count': 0}
total_successes[name]['count'] += 1
if params.get('exit_code') == 0:
total_successes[name]['successes'] += 1
else:
continue
if name not in total_times:
total_times[name] = {'time': 0, 'count': 0}
total_times[name]['time'] += params.get('run_time')
total_times[name]['count'] += 1
for name, v in total_times.items():
av = v.get('time') / v.get('count')
if name not in statistics:
statistics[name] = {}
statistics[name]['average_runtime'] = av
for name, v in total_successes.items():
av = v.get('successes') / v.get('count')
if name not in statistics:
statistics[name] = {}
statistics[name]['success_rate'] = av
statistics[name]['count'] = v.get('count')
def analyze():
try:
f = open('result_db.json', 'r')
result_db = json.load(f)
except FileNotFoundError:
result_db = []
statistics = {}
find_statistics(result_db, statistics)
sorted_stats = sorted(statistics.items(), key=lambda x: (x[1]['success_rate'], x[1].get('average_runtime')), reverse=True)
print('{}{:<29} {:>5} {:>8} {:>6}{}'.format(GRAY_BACKGROUND, 'Test Name', 'Count', 'Success', 'Avg. Runtime', NORMAL))
for name, stats in sorted_stats:
count = stats.get('count')
success_rate = round(stats.get('success_rate') * 100, 2)
if success_rate == 0:
color = RED
elif success_rate == 100:
color = GREEN
else:
color = YELLOW
s = '{:<29} {:>5} {}{:>5}%{}'.format(name, stats.get('count'), color, success_rate, NORMAL)
av = stats.get('average_runtime')
if av:
secs = round(av, 2)
else:
secs = ' -----'
s += ' {:>6}s'.format(secs)
print(s)
def main():
cmdline_parser = argparse.ArgumentParser(
description="run the testsuite's tests locally"
)
cmdline_parser.add_argument('vm_name', type=str, nargs='*',
help='name of VM/host that can be used to ssh into it')
cmdline_parser.add_argument('-f', '--test-set-file', type=pathlib.Path,
dest='test_set_file', default='tests.drbd9.json',
metavar="FILE", help='JSON stream file to read the test set')
cmdline_parser.add_argument('-n', '--not', type=str, metavar="TEST_NAME", action='append',
dest='exclude', help='exclude these tests', default=[])
cmdline_parser.add_argument('-r', '--run', type=str, metavar="TEST_NAME", action='append',
dest='run', help='run only these named tests', default=[])
cmdline_parser.add_argument('-k', '--keep-going', action='store_true', dest='keep_going',
help='keep going even after a test fails', default=False)
cmdline_parser.add_argument('-a', '--analyze', action='store_true', dest='analyze',
help='view statistics about previous runs', default=False)
cmdline_parser.add_argument('-i', '--iterations', type=int, dest='iterations',
help='how many times to run the test suite', default=1)
cmdline_parser.add_argument('-s', '--skip', type=int, dest='num_skip',
help='skip the first n tests', default=0)
args = cmdline_parser.parse_args()
if args.analyze:
analyze()
return
all_vm_names = args.vm_name
if len(all_vm_names) < 1:
cmdline_parser.print_usage()
print('error: the following arguments are required: vm_name')
sys.exit(1)
(kern_ver, drbd_ver, drbd_git) = collect_software_versions(all_vm_names)
try:
f = open('result_db.json', 'r')
result_db = json.load(f)
except FileNotFoundError:
result_db = []
# register SIGINT handler so that the result_db is saved on Ctrl+C
def sigint_handler(sig, frame):
with open('result_db.json', 'w') as f:
json.dump(result_db, f)
sys.exit(0)
signal.signal(signal.SIGINT, sigint_handler)
total_successful = 0
total_ran = 0
for i in range(args.iterations):
if args.iterations > 1:
print('Starting iteration {0}{2}{1} of {0}{3}{1}'.format(WHITE, NORMAL, i+1, args.iterations))
if args.run:
test_set_iter=generate_test_set(args.run, len(all_vm_names))
else:
test_set_iter=stream_read_json(args.test_set_file)
statistics = {}
find_statistics(result_db, statistics)
(exit_code, results, num_successful, num_total) = run_tests(test_set_iter, all_vm_names, args.exclude, args.keep_going, args.num_skip, statistics)
total_successful += num_successful
total_ran += num_total
if args.iterations > 1:
print('Iteration {0}{4}{3}: {0}{5}{3} tests ran, {1}{6} successful{3}, {2}{7} failed{3}'.format(
WHITE, GREEN, RED, NORMAL, i+1, num_total, num_successful, num_total - num_successful))
result_db.append({
'version': drbd_ver,
'git_hash': drbd_git,
'kernel': kern_ver,
'date_time': time.strftime("%Y%m%d-%H%M%S"),
'results': results})
print('{0}{4}{3} tests ran, {1}{5} successful{3}, {2}{6} failed{3}'.format(
WHITE, GREEN, RED, NORMAL, total_ran, total_successful, total_ran - total_successful))
with open('result_db.json', 'w') as f:
json.dump(result_db, f)
return exit_code
if __name__ == "__main__":
main()