From cef4d24211876f3dec5543f0a5c39721783e38ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkay=20=C3=9Cr=C3=BCn?= Date: Sat, 24 Jun 2023 16:15:29 +0200 Subject: [PATCH] Switch to HDF5 based backup --- controller.py | 291 +++++++++++++++++++++++++++++++++++++++++++++++--- goldenrun.py | 1 - hdf5logger.py | 181 +++++++++++++++++++++++++++++-- 3 files changed, 448 insertions(+), 25 deletions(-) diff --git a/controller.py b/controller.py index 35ef7cd..a6e8c68 100755 --- a/controller.py +++ b/controller.py @@ -17,13 +17,13 @@ # limitations under the License. import argparse +import hashlib import logging -import lzma from multiprocessing import Manager, Process from pathlib import Path -import pickle import subprocess import sys +import tables import time import pandas as pd @@ -41,6 +41,7 @@ from faultclass import detect_type, detect_model, Fault, Trigger from faultclass import python_worker +from faultclass import Register from hdf5logger import hdf5collector from goldenrun import run_goldenrun @@ -243,15 +244,233 @@ def get_system_ram(): return mem +def create_backup(args, queue_output, config_qemu, expanded_faultlist): + """ + Creates a backup dict with hashes and pushes it to the given queue + + :param args: parameters of the program + :param queue_output: queue to push the created backup + :param config_qemu: qemu.json config dictionary + :param expanded_faultlist: list of expanded faults + :return: void + """ + filenames = { + "qemu_hash": args.qemu.name, + "fault_hash": args.faults.name, + "kernel_hash": config_qemu["kernel"], + "bios_hash": config_qemu["bios"], + } + + config_qemu["hash"] = {} + config_qemu["hash_function"] = "sha256" + + for name, location in filenames.items(): + if Path(location).is_file(): + config_qemu["hash"][name] = hashlib.sha256( + (Path(location).read_bytes()) + ).digest() + + backup = { + "index": -3, + "config": config_qemu, + "expanded_faultlist": expanded_faultlist, + } + + queue_output.put(backup) + + +def check_backup(args, current_config, backup_config): + filenames = { + "qemu_hash": args.qemu.name, + "fault_hash": args.faults.name, + "kernel_hash": current_config["kernel"], + "bios_hash": current_config["bios"], + } + + for name, location in filenames.items(): + if Path(location).is_file(): + file_hash = hashlib.sha256((Path(location).read_bytes())).digest() + if backup_config["hash"][name] != file_hash: + return False + else: + if backup_config["hash"][name]: + clogger.warning( + f"{name} is missing from the current configuration but exists in the backup" + ) + return False + + return True + + +def read_backup(hdf5_file): + """ + :param hdf5_file: path to hdf5 + :return: backup_raw_faults, backup_expanded_faults, backup_config, backup_goldenrun + """ + with tables.open_file(hdf5_file, "r") as f_in: + if "Backup" not in f_in.root: + raise NameError("Backup could not be found in the HDF5 file") + + # Process start points + assert len(f_in.root.Backup.start_address) == 1 + hdf5_start_address = f_in.root.Backup.start_address.read() + + # Process end points + endpoints = [ + {"address": endpoint["address"], "counter": endpoint["counter"]} + for endpoint in f_in.root.Backup.end_addresses + ] + + # Process config + assert len(f_in.root.Backup.parameters) == 1 + hdf5_flags = f_in.root.Backup.parameters.read() + backup_config = { + "qemu": hdf5_flags["qemu"][0].decode("utf-8"), + "kernel": hdf5_flags["kernel"][0].decode("utf-8"), + "plugin": hdf5_flags["plugin"][0].decode("utf-8"), + "machine": hdf5_flags["machine"][0].decode("utf-8"), + "additional_qemu_args": hdf5_flags["additional_qemu_args"][0].decode( + "utf-8" + ), + "bios": hdf5_flags["bios"][0].decode("utf-8"), + "ring_buffer": bool(hdf5_flags["ring_buffer"][0]), + "tb_exec_list": bool(hdf5_flags["tb_exec_list"][0]), + "tb_info": bool(hdf5_flags["tb_info"][0]), + "mem_info": bool(hdf5_flags["mem_info"][0]), + "max_instruction_count": int(hdf5_flags["max_instruction_count"][0]), + "start": { + "address": int(hdf5_start_address["address"][0]), + "counter": int(hdf5_start_address["counter"][0]), + }, + "end": endpoints, + "hash": {}, + } + + # Process hash + assert len(f_in.root.Backup.hash) == 1 + hdf5_hashes = f_in.root.Backup.hash.read() + backup_config["hash"]["qemu_hash"] = hdf5_hashes["qemu_hash"][0] + backup_config["hash"]["fault_hash"] = hdf5_hashes["fault_hash"][0] + backup_config["hash"]["kernel_hash"] = hdf5_hashes["kernel_hash"][0] + backup_config["hash"]["bios_hash"] = hdf5_hashes["bios_hash"][0] + + # Process goldenrun data + backup_goldenrun = {"index": -1} + + backup_goldenrun["faultlist"] = [ + Fault( + fault["fault_address"], + [], + fault["fault_type"], + fault["fault_model"], + fault["fault_lifespan"], + fault["fault_mask"], + fault["trigger_address"], + fault["trigger_hitcounter"], + fault["fault_num_bytes"], + False, + ) + for fault in f_in.root.Goldenrun.faults.iterrows() + ] + + backup_goldenrun["tbinfo"] = [ + { + "assembler": tb_info["assembler"].decode("utf-8"), + "id": tb_info["identity"], + "ins_count": tb_info["ins_count"], + "num_exec": tb_info["num_exec"], + "size": tb_info["size"], + } + for tb_info in f_in.root.Goldenrun.tbinfo.iterrows() + ] + + backup_goldenrun["tbexec"] = [ + {"pos": tb_exec["pos"], "tb": tb_exec["tb"]} + for tb_exec in f_in.root.Goldenrun.tbexeclist.iterrows() + ] + + if backup_config["mem_info"]: + backup_goldenrun["meminfo"] = [ + { + "address": mem["address"], + "counter": mem["counter"], + "direction": mem["direction"], + "ins": mem["insaddr"], + "size": mem["size"], + "tbid": mem["tbid"], + } + for mem in f_in.root.Goldenrun.meminfo.iterrows() + ] + + # Process goldenrun registers + if "riscvregisters" in f_in.root.Goldenrun: + register_backup_name = "riscvregisters" + register_type = Register.RISCV + reg_size = 32 + reg_name = "x" + rows = f_in.root.Goldenrun.riscvregisters.iterrows() + elif "armregisters" in f_in.root.Goldenrun: + register_backup_name = "armregisters" + register_type = Register.ARM + reg_size = 16 + reg_name = "r" + rows = f_in.root.Goldenrun.armregisters.iterrows() + else: + raise tables.NoSuchNodeError( + "No supported register architecture could be found in the HDF5 file" + ) + + backup_goldenrun[register_backup_name] = [] + + for reg in rows: + registers = {"pc": reg["pc"], "tbcounter": reg["tbcounter"]} + for i in range(0, reg_size): + registers[f"{reg_name}{i}"] = reg[f"{reg_name}{i}"] + + # Last element of register_values is XPSR for Arm, PC for RISCV + if register_type == Register.ARM: + registers["xpsr"] = reg["xpsr"] + elif register_type == Register.RISCV: + registers[f"{reg_name}{reg_size}"] = reg[f"{reg_name}{reg_size}"] + + backup_goldenrun[register_backup_name].append(registers) + + # Process expanded faults + backup_expanded_faults = [] + exp_n = 0 + for exp in f_in.root.Backup.expanded_faults: + backup_exp = { + "index": exp_n, + "faultlist": [ + Fault( + fault["fault_address"], + [], + fault["fault_type"], + fault["fault_model"], + fault["fault_lifespan"], + fault["fault_mask"], + fault["trigger_address"], + fault["trigger_hitcounter"], + fault["fault_num_bytes"], + fault["fault_wildcard"], + ) + for fault in exp.faults.iterrows() + ], + } + exp_n = exp_n + 1 + backup_expanded_faults.append(backup_exp) + + return [backup_expanded_faults, backup_config, backup_goldenrun] + + def controller( - hdf5path, + args, hdf5mode, faultlist, config_qemu, num_workers, queuedepth, compressionlevel, - qemu_output, goldenrun=True, logger=hdf5collector, qemu_pre=None, @@ -267,6 +486,9 @@ def controller( t0 = time.time() + hdf5path = args.hdf5file + qemu_output = args.debug + m = Manager() m2 = Manager() queue_output = m.Queue() @@ -275,9 +497,47 @@ def controller( prctl.set_name("Controller") prctl.set_proctitle("Python_Controller") - # Storing and restoring goldenrun_data with pickle is a temporary fix - # A better solution is to parse the goldenrun_data from the existing hdf5 file + log_config = True + log_goldenrun = True + overwrite_faults = False + goldenrun_data = {} + + hdf5_file = Path(hdf5path) + if hdf5_file.is_file(): + clogger.info("HDF5 file already exits") + try: + [ + backup_expanded_faultlist, + backup_config, + backup_goldenrun_data, + ] = read_backup(hdf5_file) + except NameError: + clogger.warning("Backup could not be found in the HDF5 file") + return config_qemu + except tables.NoSuchNodeError: + clogger.warning( + "Invalid/unsupported backup file, run with the overwrite flag to overwrite!" + ) + return config_qemu + + clogger.info("Checking the backup") + if not check_backup(args, config_qemu, backup_config): + clogger.warning("Backup does not match, ending the execution!") + return config_qemu + + clogger.info("Backup matched and will be used") + + faultlist = backup_expanded_faultlist + config_qemu["max_instruction_count"] = backup_config["max_instruction_count"] + goldenrun_data = backup_goldenrun_data + + goldenrun = False + hdf5mode = "a" + + if not args.append: + overwrite_faults = True + if goldenrun: [ config_qemu["max_instruction_count"], @@ -286,16 +546,11 @@ def controller( ] = run_goldenrun( config_qemu, qemu_output, queue_output, faultlist, qemu_pre, qemu_post ) - pickle.dump( - (config_qemu["max_instruction_count"], goldenrun_data, faultlist), - lzma.open("bkup_goldenrun_results.xz", "wb"), - ) + + create_backup(args, queue_output, config_qemu, faultlist) else: - ( - config_qemu["max_instruction_count"], - goldenrun_data, - faultlist, - ) = pickle.load(lzma.open("bkup_goldenrun_results.xz", "rb")) + log_config = False + log_goldenrun = False p_logger = Process( target=logger, @@ -306,6 +561,9 @@ def controller( len(faultlist), compressionlevel, logger_postprocess, + log_config, + log_goldenrun, + overwrite_faults, ), ) @@ -650,14 +908,13 @@ def init_logging(): init_logging() controller( - args.hdf5file, # hdf5path + args, parguments["hdf5mode"], # hdf5mode parguments["faultlist"], # faultlist parguments["qemu_conf"], # config_qemu parguments["num_workers"], # num_workers parguments["queuedepth"], # queuedepth parguments["compressionlevel"], # compressionlevel - args.debug, # qemu_output parguments["goldenrun"], # goldenrun hdf5collector, # logger None, # qemu_pre diff --git a/goldenrun.py b/goldenrun.py index cce6c5d..05fc6b7 100644 --- a/goldenrun.py +++ b/goldenrun.py @@ -26,7 +26,6 @@ from faultclass import Fault from faultclass import python_worker - logger = logging.getLogger(__name__) diff --git a/hdf5logger.py b/hdf5logger.py index ac0c425..316fad7 100644 --- a/hdf5logger.py +++ b/hdf5logger.py @@ -50,6 +50,7 @@ class fault_table(tables.IsDescription): fault_mask_upper = tables.UInt64Col() fault_mask = tables.UInt64Col() fault_num_bytes = tables.UInt8Col() + fault_wildcard = tables.BoolCol() class memory_dump_table(tables.IsDescription): @@ -118,6 +119,34 @@ class riscv_registers_table(tables.IsDescription): x32 = tables.UInt64Col() +class config_table(tables.IsDescription): + qemu = tables.StringCol(1000) + kernel = tables.StringCol(1000) + plugin = tables.StringCol(1000) + machine = tables.StringCol(1000) + additional_qemu_args = tables.StringCol(1000) + bios = tables.StringCol(1000) + ring_buffer = tables.BoolCol() + tb_exec_list = tables.BoolCol() + tb_info = tables.BoolCol() + mem_info = tables.BoolCol() + max_instruction_count = tables.UInt64Col() + memory_dump = tables.BoolCol() + + +class hash_table(tables.IsDescription): + qemu_hash = tables.StringCol(32) + fault_hash = tables.StringCol(32) + kernel_hash = tables.StringCol(32) + bios_hash = tables.StringCol(32) + hash_function = tables.StringCol(16) + + +class address_table(tables.IsDescription): + address = tables.UInt64Col() + counter = tables.UInt64Col() + + binary_atom = tables.UInt8Atom() @@ -218,11 +247,11 @@ def process_dumps(f, group, memdumplist, myfilter): memdumpstable.close() -def process_faults(f, group, faultlist, endpoint, end_reason, myfilter): +def process_faults(f, group, faultlist, endpoint, end_reason, myfilter, name="faults"): # create table faulttable = f.create_table( group, - "faults", + name, fault_table, "Fault list table that contains the fault configuration used for this experiment", expectedrows=(len(faultlist)), @@ -241,6 +270,7 @@ def process_faults(f, group, faultlist, endpoint, end_reason, myfilter): faultrow["fault_mask_upper"] = (fault.mask >> 64) & (pow(2, 64) - 1) faultrow["fault_mask"] = fault.mask & (pow(2, 64) - 1) faultrow["fault_num_bytes"] = fault.num_bytes + faultrow["fault_wildcard"] = fault.wildcard faultrow.append() faulttable.flush() faulttable.close() @@ -316,8 +346,126 @@ def process_memory_info(f, group, meminfolist, myfilter): meminfotable.close() +def process_config(f, configgroup, exp, myfilter): + hashtable = f.create_table( + configgroup, + "hash", + hash_table, + "", + expectedrows=1, + filters=myfilter, + ) + + hash_table_row = hashtable.row + for file, f_hash in exp["hash"].items(): + hash_table_row[file] = f_hash + hash_table_row["hash_function"] = exp["hash_function"] + + hash_table_row.append() + + hashtable.flush() + hashtable.close() + + configtable = f.create_table( + configgroup, + "parameters", + config_table, + "", + expectedrows=1, + filters=myfilter, + ) + + config_row = configtable.row + config_row["qemu"] = exp["qemu"] + config_row["kernel"] = exp["kernel"] + config_row["plugin"] = exp["plugin"] + config_row["machine"] = exp["machine"] + config_row["additional_qemu_args"] = exp["additional_qemu_args"] + config_row["bios"] = exp["bios"] + config_row["ring_buffer"] = exp["ring_buffer"] + config_row["tb_exec_list"] = exp["tb_exec_list"] + config_row["tb_info"] = exp["tb_info"] + config_row["mem_info"] = exp["mem_info"] + config_row["max_instruction_count"] = exp["max_instruction_count"] + + config_row.append() + + configtable.flush() + configtable.close() + + starttable = f.create_table( + configgroup, + "start_address", + address_table, + "", + expectedrows=(len(exp["start"])), + filters=myfilter, + ) + + start_row = starttable.row + start_row["address"] = exp["start"]["address"] + start_row["counter"] = exp["start"]["counter"] + + start_row.append() + + starttable.flush() + starttable.close() + + endtable = f.create_table( + configgroup, + "end_addresses", + address_table, + "", + expectedrows=(len(exp["end"])), + filters=myfilter, + ) + + end_row = endtable.row + for endpoint in exp["end"]: + end_row["address"] = endpoint["address"] + end_row["counter"] = endpoint["counter"] + + end_row.append() + + endtable.flush() + endtable.close() + + +def process_backup(f, configgroup, exp, myfilter): + process_config(f, configgroup, exp["config"], myfilter) + + fault_expanded_group = f.create_group( + configgroup, "expanded_faults", "Group containing expanded input faults" + ) + + tmp = "{}".format(len(exp["expanded_faultlist"])) + exp_name = "experiment{:0" + "{}".format(len(tmp)) + "d}" + + for exp_number in range(0, len(exp["expanded_faultlist"])): + exp_group = f.create_group( + fault_expanded_group, exp_name.format(exp_number), "Group containing faults" + ) + + process_faults( + f, + exp_group, + exp["expanded_faultlist"][exp_number]["faultlist"], + 0, + "not executed", + myfilter, + ) + + def hdf5collector( - hdf5path, mode, queue_output, num_exp, compressionlevel, logger_postprocess=None + hdf5path, + mode, + queue_output, + num_exp, + compressionlevel, + logger_postprocess=None, + log_goldenrun=True, + log_config=False, + overwrite_faults=False, ): prctl.set_name("logger") prctl.set_proctitle("logger") @@ -330,7 +478,14 @@ def hdf5collector( t0 = time.time() tmp = "{}".format(num_exp) groupname = "experiment{:0" + "{}".format(len(tmp)) + "d}" - while num_exp > 0: + + log_pregoldenrun = log_goldenrun + + if overwrite_faults: + for n in f.root.fault._f_iter_nodes(): + n._f_remove(recursive=True) + + while num_exp > 0 or log_goldenrun or log_pregoldenrun or log_config: # readout queue and get next output from qemu. Will block exp = queue_output.get() t1 = time.time() @@ -353,7 +508,7 @@ def hdf5collector( ) ) num_exp = num_exp - 1 - elif exp["index"] == -2: + elif exp["index"] == -2 and log_pregoldenrun: if "Pregoldenrun" in f.root: raise ValueError("Pregoldenrun already exists!") exp_group = f.create_group( @@ -361,14 +516,26 @@ def hdf5collector( "Pregoldenrun", "Group containing all information regarding firmware running before start point is reached", ) - elif exp["index"] == -1: + log_pregoldenrun = False + elif exp["index"] == -1 and log_goldenrun: if "Goldenrun" in f.root: raise ValueError("Goldenrun already exists!") exp_group = f.create_group( "/", "Goldenrun", "Group containing all information about goldenrun" ) + log_goldenrun = False + elif exp["index"] == -3 and log_config: + if "backup" in f.root: + raise ValueError("Backup already exists!") + exp_group = f.create_group( + "/", "Backup", "Group containing backup and run information" + ) + + process_backup(f, exp_group, exp, myfilter) + log_config = False + continue else: - raise ValueError("Index is not supposed to be negative") + continue datasets = [] datasets.append((process_tbinfo, "tbinfo"))