diff --git a/ares/n64/cpu/cpu.cpp b/ares/n64/cpu/cpu.cpp index 29f03ce916..e7fbf92336 100644 --- a/ares/n64/cpu/cpu.cpp +++ b/ares/n64/cpu/cpu.cpp @@ -113,7 +113,15 @@ auto CPU::instruction() -> void { if(Accuracy::CPU::Recompiler && recompiler.enabled && access.cache) { if(vaddrAlignedError(access.vaddr, false)) return; - auto block = recompiler.block(ipu.pc, access.paddr, GDB::server.hasBreakpoints()); + auto block = recompiler.block(ipu.pc, access.paddr, + { + .singleInstruction = GDB::server.hasBreakpoints(), + .endian = Context::Endian(context.endian), + .mode = Context::Mode(context.mode), + .cop1Enabled = scc.status.enable.coprocessor1 > 0, + .floatingPointMode = scc.status.floatingPointMode > 0, + .is64bit = context.bits == 64, + }); block->execute(*this); } else { auto data = fetch(access); diff --git a/ares/n64/cpu/cpu.hpp b/ares/n64/cpu/cpu.hpp index bddb7222a1..700fbe8882 100644 --- a/ares/n64/cpu/cpu.hpp +++ b/ares/n64/cpu/cpu.hpp @@ -863,7 +863,20 @@ struct CPU : Thread { }; struct Pool { - Block* blocks[1 << 6]; + struct Row { + Block* block; + u32 tag; + }; + Row rows[1 << 6]; + }; + + struct JITContext { + bool singleInstruction; + Context::Endian endian; + Context::Mode mode; + bool cop1Enabled; + bool floatingPointMode; + bool is64bit; }; auto reset() -> void { @@ -899,9 +912,11 @@ struct CPU : Thread { } auto pool(u32 address) -> Pool*; - auto block(u64 vaddr, u32 address, bool singleInstruction = false) -> Block*; + auto computePoolKey(u32 address, JITContext ctx) -> u32; + auto computePoolRow(u32 key) -> u32; + auto block(u64 vaddr, u32 address, JITContext ctx) -> Block*; - auto emit(u64 vaddr, u32 address, bool singleInstruction = false) -> Block*; + auto emit(u64 vaddr, u32 address, JITContext ctx) -> Block*; auto emitZeroClear(u32 n) -> void; auto emitEXECUTE(u32 instruction) -> bool; auto emitSPECIAL(u32 instruction) -> bool; diff --git a/ares/n64/cpu/recompiler.cpp b/ares/n64/cpu/recompiler.cpp index 68a6962243..a7a3bde363 100644 --- a/ares/n64/cpu/recompiler.cpp +++ b/ares/n64/cpu/recompiler.cpp @@ -9,10 +9,41 @@ auto CPU::Recompiler::pool(u32 address) -> Pool* { return pool; } -auto CPU::Recompiler::block(u64 vaddr, u32 address, bool singleInstruction) -> Block* { - if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block; - auto block = emit(vaddr, address, singleInstruction); - pool(address)->blocks[address >> 2 & 0x3f] = block; +auto CPU::Recompiler::computePoolKey(u32 address, JITContext ctx) -> u32 { + u32 key = address >> 2 & 0x3f; + key |= ctx.singleInstruction ? 1 << 6 : 0; + key |= ctx.endian ? 1 << 7 : 0; + key |= (ctx.mode & 0x03) << 9; + key |= ctx.cop1Enabled ? 1 << 10 : 0; + key |= ctx.floatingPointMode ? 1 << 11 : 0; + key |= ctx.is64bit ? 1 << 12 : 0; + return key; +} + +auto CPU::Recompiler::computePoolRow(u32 key) -> u32 { + // Jon Maiga's 'xmx' mixer, see https://jonkagstrom.com/bit-mixer-construction/ + u64 x = key; + x ^= x >> 23; + x *= 0xff51afd7ed558ccdull; + x ^= x >> 23; + u32 row = x & 0x3f; + assert(row < sizeof(Pool::rows)/sizeof(Pool::rows[0])); + return row; +} + +auto CPU::Recompiler::block(u64 vaddr, u32 address, JITContext ctx) -> Block* { + u32 key = computePoolKey(address, ctx); + u32 row = computePoolRow(key); + + if (pool(address)->rows[row].tag == key) { + if (auto block = pool(address)->rows[row].block) { + return block; + } + } + + memory::jitprotect(false); + auto block = emit(vaddr, address, ctx); + pool(address)->rows[row] = {.block = block, .tag = key}; memory::jitprotect(true); return block; } @@ -21,7 +52,7 @@ auto CPU::Recompiler::block(u64 vaddr, u32 address, bool singleInstruction) -> B #define IpuReg(r) sreg(1), offsetof(IPU, r) - IpuBase #define PipelineReg(x) mem(sreg(0), offsetof(CPU, pipeline) + offsetof(Pipeline, x)) -auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Block* { +auto CPU::Recompiler::emit(u64 vaddr, u32 address, JITContext ctx) -> Block* { if(unlikely(allocator.available() < 1_MiB)) { print("CPU allocator flush\n"); allocator.release(); @@ -60,7 +91,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Bl vaddr += 4; address += 4; jumpToSelf += 4; - if(hasBranched || (address & 0xfc) == 0 || singleInstruction) break; //block boundary + if(hasBranched || (address & 0xfc) == 0 || ctx.singleInstruction) break; //block boundary hasBranched = branched; jumpEpilog(flag_nz); }