n64: access compiled blocks via a hash

We'd like the recompiler to take the execution context such as kernel mode into account when compiling blocks. That's why it's necessary to identify blocks not just by address but all the information used at compile time. This is done by computing a 32-bit key and using that as a block's identifier instead of the last six physical address bits like was done before. Since we have now 32-bit instead of 6-bit keys, the block() function hashes the keys before mapping them to one of the 64 pool rows. The hash function was chosen arbitrarily to be better than a simple multiplicative hash and is likely not the best choice for this exact task.
ares-emulator · Sep 8, 2024 · 616b3b6 · 616b3b6
1 parent f43da41
commit 616b3b6
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 10 deletions.
diff --git a/ares/n64/cpu/cpu.cpp b/ares/n64/cpu/cpu.cpp
@@ -113,7 +113,15 @@ auto CPU::instruction() -> void {
 
   if(Accuracy::CPU::Recompiler && recompiler.enabled && access.cache) {
     if(vaddrAlignedError<Word>(access.vaddr, false)) return;
-    auto block = recompiler.block(ipu.pc, access.paddr, GDB::server.hasBreakpoints());
+    auto block = recompiler.block(ipu.pc, access.paddr,
+        {
+        .singleInstruction = GDB::server.hasBreakpoints(),
+        .endian = Context::Endian(context.endian),
+        .mode = Context::Mode(context.mode),
+        .cop1Enabled = scc.status.enable.coprocessor1 > 0,
+        .floatingPointMode = scc.status.floatingPointMode > 0,
+        .is64bit = context.bits == 64,
+        });
     block->execute(*this);
   } else {
     auto data = fetch(access);

diff --git a/ares/n64/cpu/cpu.hpp b/ares/n64/cpu/cpu.hpp
@@ -863,7 +863,20 @@ struct CPU : Thread {
     };
 
     struct Pool {
-      Block* blocks[1 << 6];
+      struct Row {
+        Block* block;
+        u32 tag;
+      };
+      Row rows[1 << 6];
+    };
+
+    struct JITContext {
+      bool singleInstruction;
+      Context::Endian endian;
+      Context::Mode mode;
+      bool cop1Enabled;
+      bool floatingPointMode;
+      bool is64bit;
     };
 
     auto reset() -> void {
@@ -899,9 +912,11 @@ struct CPU : Thread {
     }
 
     auto pool(u32 address) -> Pool*;
-    auto block(u64 vaddr, u32 address, bool singleInstruction = false) -> Block*;
+    auto computePoolKey(u32 address, JITContext ctx) -> u32;
+    auto computePoolRow(u32 key) -> u32;
+    auto block(u64 vaddr, u32 address, JITContext ctx) -> Block*;
 
-    auto emit(u64 vaddr, u32 address, bool singleInstruction = false) -> Block*;
+    auto emit(u64 vaddr, u32 address, JITContext ctx) -> Block*;
     auto emitZeroClear(u32 n) -> void;
     auto emitEXECUTE(u32 instruction) -> bool;
     auto emitSPECIAL(u32 instruction) -> bool;

diff --git a/ares/n64/cpu/recompiler.cpp b/ares/n64/cpu/recompiler.cpp
@@ -9,10 +9,41 @@ auto CPU::Recompiler::pool(u32 address) -> Pool* {
   return pool;
 }
 
-auto CPU::Recompiler::block(u64 vaddr, u32 address, bool singleInstruction) -> Block* {
-  if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block;
-  auto block = emit(vaddr, address, singleInstruction);
-  pool(address)->blocks[address >> 2 & 0x3f] = block;
+auto CPU::Recompiler::computePoolKey(u32 address, JITContext ctx) -> u32 {
+  u32 key = address >> 2 & 0x3f;
+  key |= ctx.singleInstruction ? 1 << 6 : 0;
+  key |= ctx.endian ? 1 << 7 : 0;
+  key |= (ctx.mode & 0x03) << 9;
+  key |= ctx.cop1Enabled ? 1 << 10 : 0;
+  key |= ctx.floatingPointMode ? 1 << 11 : 0;
+  key |= ctx.is64bit ? 1 << 12 : 0;
+  return key;
+}
+
+auto CPU::Recompiler::computePoolRow(u32 key) -> u32 {
+  // Jon Maiga's 'xmx' mixer, see https://jonkagstrom.com/bit-mixer-construction/
+  u64 x = key;
+  x ^= x >> 23;
+  x *= 0xff51afd7ed558ccdull;
+  x ^= x >> 23;
+  u32 row = x & 0x3f;
+  assert(row < sizeof(Pool::rows)/sizeof(Pool::rows[0]));
+  return row;
+}
+
+auto CPU::Recompiler::block(u64 vaddr, u32 address, JITContext ctx) -> Block* {
+  u32 key = computePoolKey(address, ctx);
+  u32 row = computePoolRow(key);
+
+  if (pool(address)->rows[row].tag == key) {
+    if (auto block = pool(address)->rows[row].block) {
+      return block;
+    }
+  }
+
+  memory::jitprotect(false);
+  auto block = emit(vaddr, address, ctx);
+  pool(address)->rows[row] = {.block = block, .tag = key};
   memory::jitprotect(true);
   return block;
 }
@@ -21,7 +52,7 @@ auto CPU::Recompiler::block(u64 vaddr, u32 address, bool singleInstruction) -> B
 #define IpuReg(r)      sreg(1), offsetof(IPU, r) - IpuBase
 #define PipelineReg(x) mem(sreg(0), offsetof(CPU, pipeline) + offsetof(Pipeline, x))
 
-auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Block* {
+auto CPU::Recompiler::emit(u64 vaddr, u32 address, JITContext ctx) -> Block* {
   if(unlikely(allocator.available() < 1_MiB)) {
     print("CPU allocator flush\n");
     allocator.release();
@@ -60,7 +91,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, bool singleInstruction) -> Bl
     vaddr += 4;
     address += 4;
     jumpToSelf += 4;
-    if(hasBranched || (address & 0xfc) == 0 || singleInstruction) break;  //block boundary
+    if(hasBranched || (address & 0xfc) == 0 || ctx.singleInstruction) break;  //block boundary
     hasBranched = branched;
     jumpEpilog(flag_nz);
   }