From 382c633052850aa8385c48fc3cb05033c024bd1b Mon Sep 17 00:00:00 2001 From: tykkiman Date: Sun, 8 Sep 2024 17:40:22 +0300 Subject: [PATCH] n64: inline simple dual mode operations * Pass JITContext down to leaf emit functions. * Emit inline implementations of basic 64-bit operations. * Use block compile-time information to elide kernel mode checks of the now inlined operations. --- ares/n64/cpu/cpu.hpp | 6 +- ares/n64/cpu/recompiler.cpp | 137 +++++++++++++++++++----------------- 2 files changed, 77 insertions(+), 66 deletions(-) diff --git a/ares/n64/cpu/cpu.hpp b/ares/n64/cpu/cpu.hpp index 700fbe888..a1fcbf19e 100644 --- a/ares/n64/cpu/cpu.hpp +++ b/ares/n64/cpu/cpu.hpp @@ -917,9 +917,11 @@ struct CPU : Thread { auto block(u64 vaddr, u32 address, JITContext ctx) -> Block*; auto emit(u64 vaddr, u32 address, JITContext ctx) -> Block*; + auto emitOverflowCheck(reg temp) -> sljit_jump*; auto emitZeroClear(u32 n) -> void; - auto emitEXECUTE(u32 instruction) -> bool; - auto emitSPECIAL(u32 instruction) -> bool; + auto checkDualAllowed(const JITContext& ctx) -> bool; + auto emitEXECUTE(u32 instruction, JITContext ctx) -> bool; + auto emitSPECIAL(u32 instruction, JITContext ctx) -> bool; auto emitREGIMM(u32 instruction) -> bool; auto emitSCC(u32 instruction) -> bool; auto emitFPU(u32 instruction) -> bool; diff --git a/ares/n64/cpu/recompiler.cpp b/ares/n64/cpu/recompiler.cpp index a7a3bde36..eacad5022 100644 --- a/ares/n64/cpu/recompiler.cpp +++ b/ares/n64/cpu/recompiler.cpp @@ -77,7 +77,7 @@ auto CPU::Recompiler::emit(u64 vaddr, u32 address, JITContext ctx) -> Block* { mov32(reg(2), imm(instruction)); call(&CPU::instructionPrologue); } - bool branched = emitEXECUTE(instruction); + bool branched = emitEXECUTE(instruction, ctx); if(unlikely(instruction == branchToSelf || instruction == jumpToSelf)) { //accelerate idle loops mov32(reg(1), imm(64 * 2)); @@ -134,12 +134,31 @@ auto CPU::Recompiler::emitZeroClear(u32 n) -> void { if(n == 0) mov64(mem(IpuReg(r[0])), imm(0)); } -auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { +auto CPU::Recompiler::emitOverflowCheck(reg temp) -> sljit_jump* { + // If overflow flag set: throw an exception, skip the instruction via the 'end' label. + mov32_f(temp, flag_o); + auto didntOverflow = cmp32_jump(temp, imm(0), flag_eq); + call(&CPU::Exception::arithmeticOverflow, &cpu.exception); + auto end = jump(); + setLabel(didntOverflow); + return end; +} + +auto CPU::Recompiler::checkDualAllowed(const JITContext& ctx) -> bool { + if (ctx.mode != Context::Mode::Kernel && !ctx.is64bit) { + call(&CPU::Exception::reservedInstruction, &self.exception); + return false; + } + + return true; +} + +auto CPU::Recompiler::emitEXECUTE(u32 instruction, JITContext ctx) -> bool { switch(instruction >> 26) { //SPECIAL case 0x00: { - return emitSPECIAL(instruction); + return emitSPECIAL(instruction, ctx); } //REGIMM @@ -315,21 +334,19 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { //DADDI Rt,Rs,i16 case 0x18: { - lea(reg(1), Rt); - lea(reg(2), Rs); - mov32(reg(3), imm(i16)); - call(&CPU::DADDI); - emitZeroClear(Rtn); + if (!checkDualAllowed(ctx)) return 1; + add64(reg(0), mem(Rs), imm(i16), set_o); + auto skip = emitOverflowCheck(reg(2)); + if(Rtn > 0) mov64(mem(Rt), reg(0)); + setLabel(skip); return 0; } //DADDIU Rt,Rs,i16 case 0x19: { - lea(reg(1), Rt); - lea(reg(2), Rs); - mov32(reg(3), imm(i16)); - call(&CPU::DADDIU); - emitZeroClear(Rtn); + if (!checkDualAllowed(ctx)) return 1; + add64(reg(0), mem(Rs), imm(i16), set_o); + if(Rtn > 0) mov64(mem(Rt), reg(0)); return 0; } @@ -647,7 +664,7 @@ auto CPU::Recompiler::emitEXECUTE(u32 instruction) -> bool { return 0; } -auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { +auto CPU::Recompiler::emitSPECIAL(u32 instruction, JITContext ctx) -> bool { switch(instruction & 0x3f) { //SLL Rd,Rt,Sa @@ -791,11 +808,10 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { //DSLLV Rd,Rt,Rs case 0x14: { - lea(reg(1), Rd); - lea(reg(2), Rt); - lea(reg(3), Rs); - call(&CPU::DSLLV); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + if (Rdn == 0) return 0; + and64(reg(0), mem(Rs32), imm(63)); + shl64(mem(Rd), mem(Rt), reg(0)); return 0; } @@ -807,21 +823,19 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { //DSRLV Rd,Rt,Rs case 0x16: { - lea(reg(1), Rd); - lea(reg(2), Rt); - lea(reg(3), Rs); - call(&CPU::DSRLV); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + if (Rdn == 0) return 0; + and64(reg(0), mem(Rs32), imm(63)); + lshr64(mem(Rd), mem(Rt), reg(0)); return 0; } //DSRAV Rd,Rt,Rs case 0x17: { - lea(reg(1), Rd); - lea(reg(2), Rt); - lea(reg(3), Rs); - call(&CPU::DSRAV); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + if (Rdn == 0) return 0; + and64(reg(0), mem(Rs32), imm(63)); + ashr64(mem(Rd), mem(Rt), reg(0)); return 0; } @@ -981,41 +995,42 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { //DADD Rd,Rs,Rt case 0x2c: { - lea(reg(1), Rd); - lea(reg(2), Rs); - lea(reg(3), Rt); - call(&CPU::DADD); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + add64(reg(0), mem(Rs), mem(Rt), set_o); + auto skip = emitOverflowCheck(reg(2)); + if(Rdn > 0) mov64(mem(Rd), reg(0)); + setLabel(skip); return 0; } //DADDU Rd,Rs,Rt case 0x2d: { - lea(reg(1), Rd); - lea(reg(2), Rs); - lea(reg(3), Rt); - call(&CPU::DADDU); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) { + return 1; + } + + if(Rdn == 0) return 0; + + add64(reg(0), mem(Rs), mem(Rt)); + mov64(mem(Rd), reg(0)); return 0; } //DSUB Rd,Rs,Rt case 0x2e: { - lea(reg(1), Rd); - lea(reg(2), Rs); - lea(reg(3), Rt); - call(&CPU::DSUB); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + sub64(reg(0), mem(Rs), mem(Rt), set_o); + auto skip = emitOverflowCheck(reg(2)); + if(Rdn > 0) mov64(mem(Rd), reg(0)); + setLabel(skip); return 0; } //DSUBU Rd,Rs,Rt case 0x2f: { - lea(reg(1), Rd); - lea(reg(2), Rs); - lea(reg(3), Rt); - call(&CPU::DSUBU); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + sub64(reg(0), mem(Rs), mem(Rt), set_o); + if(Rdn > 0) mov64(mem(Rd), reg(0)); return 0; } @@ -1081,11 +1096,9 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { //DSLL Rd,Rt,Sa case 0x38: { - lea(reg(1), Rd); - lea(reg(2), Rt); - mov32(reg(3), imm(Sa)); - call(&CPU::DSLL); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + if (Rdn == 0) return 0; + shl64(mem(Rd), mem(Rt), imm(Sa)); return 0; } @@ -1107,21 +1120,17 @@ auto CPU::Recompiler::emitSPECIAL(u32 instruction) -> bool { //DSRA Rd,Rt,Sa case 0x3b: { - lea(reg(1), Rd); - lea(reg(2), Rt); - mov32(reg(3), imm(Sa)); - call(&CPU::DSRA); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + if (Rdn == 0) return 0; + ashr64(mem(Rd), mem(Rt), imm(Sa)); return 0; } //DSLL32 Rd,Rt,Sa case 0x3c: { - lea(reg(1), Rd); - lea(reg(2), Rt); - mov32(reg(3), imm(Sa+32)); - call(&CPU::DSLL); - emitZeroClear(Rdn); + if (!checkDualAllowed(ctx)) return 1; + if (Rdn == 0) return 0; + shl64(mem(Rd), mem(Rt), imm(Sa+32)); return 0; }