From 87d396f224eb3bb6986da2d9b0f4bf38e5d4d113 Mon Sep 17 00:00:00 2001 From: Han Dai Date: Sun, 10 Nov 2024 19:09:44 -0500 Subject: [PATCH] fallback when regionidentifier fails --- .../components/binary_analyzers/angr.py | 86 ++++++++++++------- 1 file changed, 56 insertions(+), 30 deletions(-) diff --git a/src/patcherex2/components/binary_analyzers/angr.py b/src/patcherex2/components/binary_analyzers/angr.py index 837484a..8b78a6b 100644 --- a/src/patcherex2/components/binary_analyzers/angr.py +++ b/src/patcherex2/components/binary_analyzers/angr.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import traceback import angr from archinfo import ArchARM @@ -73,36 +74,61 @@ def get_basic_block(self, addr: int) -> dict[str, int | list[int]]: addr += 1 addr = self.denormalize_addr(addr) - func = self.p.kb.functions.function( - self.cfg.model.get_any_node(addr, anyaddr=True).function_address - ) - ri = self.p.analyses.RegionIdentifier(func) - graph = ri._graph.copy() - ri._make_supergraph(graph) - - for multinode in graph.nodes(): - nodes = multinode.nodes if hasattr(multinode, "nodes") else [multinode] - start = multinode.addr - size = sum(node.size for node in nodes) - end = start + size - - instr_addrs = [ - instr_addr - for node in nodes - for instr_addr in func.get_block(node.addr).instruction_addrs - ] - - if addr in instr_addrs: - return { - "start": self.normalize_addr(start), - "end": self.normalize_addr(end), - "size": size, - "instruction_addrs": [ - self.normalize_addr(instr_addr) - - (1 if self.is_thumb(self.normalize_addr(instr_addr)) else 0) - for instr_addr in instr_addrs - ], - } + try: + func = self.p.kb.functions.function( + self.cfg.model.get_any_node(addr, anyaddr=True).function_address + ) + ri = self.p.analyses.RegionIdentifier(func) + graph = ri._graph.copy() + ri._make_supergraph(graph) + + for multinode in graph.nodes(): + nodes = multinode.nodes if hasattr(multinode, "nodes") else [multinode] + start = multinode.addr + size = sum(node.size for node in nodes) + end = start + size + + instr_addrs = [ + instr_addr + for node in nodes + for instr_addr in func.get_block(node.addr).instruction_addrs + ] + + if addr in instr_addrs: + return { + "start": self.normalize_addr(start), + "end": self.normalize_addr(end), + "size": size, + "instruction_addrs": [ + self.normalize_addr(instr_addr) + - ( + 1 + if self.is_thumb(self.normalize_addr(instr_addr)) + else 0 + ) + for instr_addr in instr_addrs + ], + } + except Exception: + logger.error( + f"angr RegionIdentifier failed for function containing {hex(addr)}, falling back to use cfg nodes\n{traceback.format_exc()}" + ) + bb = None + for node in self.cfg.model.nodes(): + if addr in node.instruction_addrs: + bb = node + break + assert bb is not None + return { + "start": self.normalize_addr(bb.addr), + "end": self.normalize_addr(bb.addr + bb.size), + "size": bb.size, + "instruction_addrs": [ + self.normalize_addr(addr) + - (1 if self.is_thumb(self.normalize_addr(addr)) else 0) + for addr in bb.instruction_addrs + ], + } raise Exception(f"Cannot find a block containing address {hex(addr)}")