diff --git a/Makefile b/Makefile index 713cbe90d..7a623c2e3 100644 --- a/Makefile +++ b/Makefile @@ -109,6 +109,10 @@ endif -include Makefile.local +#OBJS += platform/common/host_dasm.o cpu/sh2/mame/sh2dasm.o +#OBJS += platform/libpicofe/linux/host_dasm.o cpu/sh2/mame/sh2dasm.o +#LDFLAGS += -lbfd -lopcodes -liberty + # TODO this should somehow go to the platform directory? ifeq "$(PLATFORM)" "generic" PicoDrive.zip: $(TARGET) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index ecaf06e29..27d988d26 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -92,6 +92,7 @@ static inline void emith_update_add(void *base, void *end) pageflush[i].base = p_base, pageflush[i].end = p_end; } +#if 1 // peephole optimizer. ATM only tries to reduce interlock #define EMIT_CACHE_SIZE 6 struct emit_op { @@ -187,6 +188,16 @@ static void emith_flush(void) EMIT_PTR(emit_ptr, emit_cache[i].op); emit_index = 0; } +#else +static NOINLINE void EMIT(u32 op, u32 dst, u32 src) +{ + EMIT_PTR(tcache_ptr, op); \ + COUNT_OP; \ +} +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#endif + #define A_COND_AL 0xe #define A_COND_EQ 0x0 @@ -574,6 +585,7 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int u32 ror2, v; \ for (ror2 = 0, v = imm; v && !(v & 3); v >>= 2) \ ror2--; \ +if (v & ~0xff) { printf ("xxl %x op %x\n",imm,op); exit(1); } \ EOP_C_DOP_IMM(cond, op, 1, r, 0, ror2 & 0x0f, v & 0xff); \ } while (0) @@ -1466,6 +1478,7 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) #ifdef T // T bit handling +#if 1 static int tcond = -1; #define emith_invert_cond(cond) \ @@ -1512,4 +1525,41 @@ static int emith_tst_t(int sr, int tf) } else return tf ? tcond : emith_invert_cond(tcond); } +#else +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + EMITH_SJMP_START(emith_invert_cond(cond)); + emith_or_r_imm_c(cond, sr, T); + EMITH_SJMP_END(emith_invert_cond(cond)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif #endif diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 0714eb941..71226eed4 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -194,7 +194,7 @@ static char sh2dasm_buff[64]; (ulong)(sh2)->r[8], (ulong)(sh2)->r[9], (ulong)(sh2)->r[10], (ulong)(sh2)->r[11], \ (ulong)(sh2)->r[12], (ulong)(sh2)->r[13], (ulong)(sh2)->r[14], (ulong)(sh2)->r[15]); \ printf("%csh2 pc-ml %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", ms, \ - (ulong)(sh2)->pc, (ulong)(sh2)->ppc, (ulong)(sh2)->pr, (ulong)(sh2)->sr&0xfff, \ + (ulong)(sh2)->pc, (ulong)(sh2)->ppc, (ulong)(sh2)->pr, (ulong)(sh2)->sr/*&0xfff*/, \ (ulong)(sh2)->gbr, (ulong)(sh2)->vbr, (ulong)(sh2)->mach, (ulong)(sh2)->macl); \ printf("%csh2 tmp-p %08x %08x %08x %08x %08x %08lx %08x %08x\n", ms, \ (sh2)->drc_tmp, (sh2)->irq_cycles, \ @@ -260,7 +260,7 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) printf("trace eof at %08lx\n",ftell(trace[idx])); exit(1); } - fsh2.sr = (fsh2.sr & 0x3ff) | (sh2->sr & ~0x3ff); +// fsh2.sr = (fsh2.sr & 0x3ff) | (sh2->sr & ~0x3ff); fsh2.is_slave = idx; if (memcmp(&fsh2, sh2, offsetof(SH2, read8_map)) || 0)//memcmp(&fsh2.pdb_io_csum, &sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum))) @@ -1792,7 +1792,6 @@ static int rcache_allocate(int what, int minprio) } } - if (prio < minprio || oldest == -1) return -1; @@ -3029,6 +3028,16 @@ static void emit_do_static_regs(int is_write, int tmpr) } } +static void REGPARM(3) sh2_delay_before(SH2 *sh2, int cycles, int reg) +{ + printf("delay: s %d pc %x c %d/%d r %d(%d)\n",sh2->is_slave,sh2->pc,sh2->sr>>12,cycles,reg,sh2->r[reg]); +} + +static void REGPARM(3) sh2_delay_after(SH2 *sh2, int cycles, int reg) +{ + printf("delay: s %d c %d r %d(%d)\n",sh2->is_slave,sh2->sr>>12,reg,sh2->r[reg]); +} + #if DIV_OPTIMIZER // divide operation replacement functions, called by compiled code. Only the // 32:16 cases and the 64:32 cases described in the SH2 prog man are replaced. @@ -3322,7 +3331,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) block = dr_find_inactive_block(tcache_id, crc, base_pc, end_pc - base_pc, base_literals, end_literals - base_literals); -#if (DRC_DEBUG & (256|512)) +#if 0//(DRC_DEBUG & (256|512)) // remove any (partial) old blocks which might get in the way, to make sure // the same branch targets are used in the recording/playback code. Not needed // normally since the SH2 code wasn't overwritten and should be the same. @@ -4964,7 +4973,40 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // idle or delay loop emit_sync_t_to_sr(); +#if 0 + v = SHR_SR; + if (guest_regs[v].vreg >= 0 && (guest_regs[v].flags & GRF_DIRTY)) + emith_ctx_write(cache_regs[guest_regs[v].vreg].hreg, v * 4); + v = drcf.delay_reg; + if (guest_regs[v].vreg >= 0 && (guest_regs[v].flags & GRF_DIRTY)) + emith_ctx_write(cache_regs[guest_regs[v].vreg].hreg, v * 4); + tmp = rcache_used_hregs_mask(); + emith_save_caller_regs(tmp); + host_arg2reg(tmp2, 0); + emith_move_r_imm(tmp2, pc); + emith_ctx_write(tmp2, SHR_PC * 4); + emith_pass_arg_r(0, CONTEXT_REG); + emith_pass_arg_imm(1, cycles); + emith_pass_arg_imm(2, drcf.delay_reg); + emith_abicall(sh2_delay_before); + emith_restore_caller_regs(tmp); +#endif emith_sh2_delay_loop(cycles, drcf.delay_reg); +#if 0 + v = SHR_SR; + if (guest_regs[v].vreg >= 0 && (guest_regs[v].flags & GRF_DIRTY)) + emith_ctx_write(cache_regs[guest_regs[v].vreg].hreg, v * 4); + v = drcf.delay_reg; + if (guest_regs[v].vreg >= 0 && (guest_regs[v].flags & GRF_DIRTY)) + emith_ctx_write(cache_regs[guest_regs[v].vreg].hreg, v * 4); + tmp = rcache_used_hregs_mask(); + emith_save_caller_regs(tmp); + emith_pass_arg_r(0, CONTEXT_REG); + emith_pass_arg_imm(1, cycles); + emith_pass_arg_imm(2, drcf.delay_reg); + emith_abicall(sh2_delay_after); + emith_restore_caller_regs(tmp); +#endif rcache_unlock_all(); // may lock delay_reg drcf.polling = drcf.loop_type = drcf.pinning = 0; } diff --git a/pico/cd/libchdr b/pico/cd/libchdr index 0b5a86fe0..e62ac5995 160000 --- a/pico/cd/libchdr +++ b/pico/cd/libchdr @@ -1 +1 @@ -Subproject commit 0b5a86fe0c8b64497d7c6cbc23db72fd6fb1b982 +Subproject commit e62ac5995b1c7ef65ece35293914843b8ee57d49 diff --git a/pico/videoport.c b/pico/videoport.c index 4adf7a7ed..1410d1e9b 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -944,13 +944,8 @@ PICO_INTERNAL_ASM void PicoVideoWrite(u32 a,unsigned short d) } d &= 0xff; - if (num == 0 && !(pvid->reg[0]&2) && (d&2)) - pvid->hv_latch = PicoVideoRead(0x08); - if (num == 12 && ((pvid->reg[12]^d)&0x01)) - PicoVideoFIFOMode(pvid->reg[1]&0x40, d & 1); if (num == 1 && ((pvid->reg[1]^d)&0x40)) { - PicoVideoFIFOMode(d & 0x40, pvid->reg[12]&1); // handle line blanking before line rendering. Only the last switch // before the 1st sync for other reasons is honoured. PicoVideoSync(1); @@ -965,15 +960,21 @@ PICO_INTERNAL_ASM void PicoVideoWrite(u32 a,unsigned short d) switch (num) { case 0x00: + if ((~dold&d)&2) { + unsigned c = SekCyclesDone() - Pico.t.m68c_line_start; + pvid->hv_latch = VdpFIFO.fifo_hcounts[c/clkdiv] | (pvid->v_counter << 8); + } elprintf(EL_INTSW, "hint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x10)>>4, (d&0x10)>>4, SekCyclesDone(), (pvid->pending_ints&0x10)>>4, SekPc); goto update_irq; case 0x01: - elprintf(EL_INTSW, "vint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x20)>>5, - (d&0x20)>>5, SekCyclesDone(), (pvid->pending_ints&0x20)>>5, SekPc); + if ((d^dold)&0x40) + PicoVideoFIFOMode(d & 0x40, pvid->reg[12]&1); if (!(pvid->status & PVS_VB2)) pvid->status &= ~SR_VB; pvid->status |= ((d >> 3) ^ SR_VB) & SR_VB; // forced blanking + elprintf(EL_INTSW, "vint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x20)>>5, + (d&0x20)>>5, SekCyclesDone(), (pvid->pending_ints&0x20)>>5, SekPc); goto update_irq; case 0x05: case 0x06: @@ -982,7 +983,10 @@ PICO_INTERNAL_ASM void PicoVideoWrite(u32 a,unsigned short d) case 0x0c: // renderers should update their palettes if sh/hi mode is changed if ((d^dold)&8) Pico.m.dirtyPal = 1; - if ((d^dold)&1) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + if ((d^dold)&1) { + PicoVideoFIFOMode(pvid->reg[1]&0x40, d & 1); + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + } break; default: return; @@ -1154,8 +1158,11 @@ unsigned char PicoVideoRead8CtlL(int is_from_z80) unsigned char PicoVideoRead8HV_H(int is_from_z80) { - elprintf(EL_HVCNT, "vcounter: %02x [%u] @ %06x", Pico.video.v_counter, SekCyclesDone(), SekPc); - return Pico.video.v_counter; + u32 d = Pico.video.v_counter; + if (Pico.video.reg[0]&2) + d = Pico.video.hv_latch >> 8; + elprintf(EL_HVCNT, "vcounter: %02x [%u] @ %06x", d, SekCyclesDone(), SekPc); + return d; } // FIXME: broken diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index fa3f7e5ac..d8be1670b 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -223,17 +223,18 @@ void cache_flush_d_inval_i(void *start, void *end) { #ifdef __arm__ size_t len = (char *)end - (char *)start; - (void)len; #if defined(__BLACKBERRY_QNX__) - msync(start, end - start, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); + msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); #elif defined(__MACH__) sys_dcache_flush(start, len); sys_icache_invalidate(start, len); #elif defined(_3DS) + (void)len; ctr_flush_invalidate_cache(); #elif defined(VITA) sceKernelSyncVMDomain(sceBlock, start, len); #else + (void)len; __clear_cache(start, end); #endif #endif @@ -550,7 +551,7 @@ void emu_video_mode_change(int start_line, int line_count, int start_col, int co vm_current_start_col = start_col; vm_current_col_count = col_count; - // 8bit renderes create a 328x256 CLUT image, while 16bit creates 320x240 RGB + // 8bit renderers create a 328x256 CLUT image, 16bit creates 320x240 RGB #if defined(RENDER_GSKIT_PS2) // calculate the borders of the real image inside the picodrive image vout_width = (vout_16bit ? VOUT_MAX_WIDTH : VOUT_8BIT_WIDTH); diff --git a/tools/release.sh b/tools/release.sh index 6e789bf83..26b791481 100755 --- a/tools/release.sh +++ b/tools/release.sh @@ -14,7 +14,7 @@ # gcw0,opendingux:ghcr.io/irixxxx/toolchain-opendingux # miyoo: ghcr.io/irixxxx/toolchain-miyoo # psp: docker.io/pspdev/pspdev -# psp: docker.io/ps2dev/ps2dev +# ps2: docker.io/ps2dev/ps2dev # pandora: ghcr.io/irixxxx/toolchain-pandora # odbeta-gcw0: ghcr.io/irixxxx/toolchain-odbeta-gcw0 # odbeta-lepus: ghcr.io/irixxxx/toolchain-odbeta-lepus