From cf67927f76099e9c1a922a5ba3514636e8d4caab Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Dec 2024 21:34:32 +0200 Subject: [PATCH 1/4] vita: yet another try to fix crashes libretro/pcsx_rearmed#856 --- Makefile.libretro | 2 +- libpcsxcore/new_dynarec/new_dynarec.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index c8f99f93..7ef285d5 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -332,7 +332,7 @@ else ifeq ($(platform), vita) CFLAGS += -fsingle-precision-constant -mword-relocations -fno-unwind-tables CFLAGS += -fno-asynchronous-unwind-tables -ftree-vectorize #CFLAGS += -funroll-loops # ~280K bloat - #CFLAGS += -fno-optimize-sibling-calls # debug? + CFLAGS += -fno-optimize-sibling-calls # broken arm->thumb tailcalls? CFLAGS += -I$(VITASDK)/include -Ifrontend/vita CFLAGS += -DNO_DYLIB CFLAGS_LAST += -O3 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index f1f5e609..10315b53 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -9010,7 +9010,7 @@ static struct block_info *new_block_info(u_int start, u_int len, return block; } -static int new_recompile_block(u_int addr) +static int noinline new_recompile_block(u_int addr) { u_int pagelimit = 0; u_int state_rflags = 0; From bbdd626a0c0ee655391bb7454303b475c2443db0 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Dec 2024 21:36:52 +0200 Subject: [PATCH 2/4] drc: avoid mov pc DDI0406 A4.1.1 "Changing between Thumb state and ARM state" recommends against mov pc, and recent compilers completely avoid it too. --- libpcsxcore/new_dynarec/linkage_arm.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 01cb42ca..37afa972 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -165,7 +165,7 @@ FUNCTION(dyna_linker): beq 0f add r6, r5, r6, asr #6 /* old target */ teq r0, r6 - moveq pc, r0 /* Stale i-cache */ + bxeq r0 /* Stale i-cache */ mov r0, r4 mov r1, r6 bl ndrc_add_jump_out @@ -176,7 +176,7 @@ FUNCTION(dyna_linker): sub r1, r1, #2 add r1, r1, r2, lsr #8 str r1, [r5] - mov pc, r8 + bx r8 0: mov r0, r4 #else @@ -184,7 +184,7 @@ FUNCTION(dyna_linker): #endif ldr r1, [fp, #LO_hash_table_ptr] bl ndrc_get_addr_ht - mov pc, r0 + bx r0 .size dyna_linker, .-dyna_linker .align 2 @@ -234,7 +234,7 @@ FUNCTION(jump_vaddr_r7): FUNCTION(jump_vaddr_r0): ldr r1, [fp, #LO_hash_table_ptr] bl ndrc_get_addr_ht - mov pc, r0 + bx r0 .size jump_vaddr_r0, .-jump_vaddr_r0 .align 2 @@ -257,10 +257,10 @@ FUNCTION(cc_interrupt): tst r2, r2 ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} cmp r0, r9 - moveq pc, lr + bxeq lr ldr r1, [fp, #LO_hash_table_ptr] bl ndrc_get_addr_ht - mov pc, r0 + bx r0 .size cc_interrupt, .-cc_interrupt .align 2 @@ -317,7 +317,7 @@ FUNCTION(jump_to_new_pc): bne new_dyna_leave ldr r1, [fp, #LO_hash_table_ptr] bl ndrc_get_addr_ht - mov pc, r0 + bx r0 .size jump_to_new_pc, .-jump_to_new_pc .align 2 @@ -427,7 +427,7 @@ new_dyna_start_at_e: ldr r10, [fp, #LO_cycle] str r1, [fp, #LO_last_count] sub r10, r10, r1 - mov pc, r0 + bx r0 .size new_dyna_start, .-new_dyna_start /* --------------------------------------- */ @@ -456,7 +456,7 @@ new_dyna_start_at_e: .else \readop r0, [r1, r3, lsl #\tab_shift] .endif - movcc pc, lr + bxcc lr mov r2, r12 str r12, [fp, #LO_cycle] .endm @@ -495,7 +495,7 @@ FUNCTION(jump_handler_read32): .else \wrtop r1, [r3, r12, lsl #\tab_shift] .endif - movcc pc, lr + bxcc lr ldr r12, [fp, #LO_last_count] mov r0, r1 add r2, r2, r12 From 802ff0a44626c8ac8e0057b7645f8a11d3657d42 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 18 Dec 2024 01:28:11 +0200 Subject: [PATCH 3/4] psxinterpreter: avoid excessive icache clearing --- libpcsxcore/psxinterpreter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index c19f1c21..7e732558 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1243,7 +1243,8 @@ static void intNotify(enum R3000Anote note, void *data) { setupCop(psxRegs.CP0.n.SR); // fallthrough case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core? - memset(&ICache, 0xff, sizeof(ICache)); + if (fetch == fetchICache) + memset(&ICache, 0xff, sizeof(ICache)); break; case R3000ACPU_NOTIFY_CACHE_UNISOLATED: break; @@ -1340,8 +1341,10 @@ void intApplyConfig() { // the dynarec may occasionally call the interpreter, in such a case the // cache won't work (cache only works right if all fetches go through it) - if (!Config.icache_emulation || psxCpu != &psxInt) + if (!Config.icache_emulation || psxCpu != &psxInt) { fetch = fetchNoCache; + memset(&ICache, 0xff, sizeof(ICache)); + } else fetch = fetchICache; From 6994d2404b8ddcb818202d84b77bdc389724fa65 Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 18 Dec 2024 01:28:49 +0200 Subject: [PATCH 4/4] drc: allow address 0 in thread mode Otherwise the compile thread would spin endlessly. Noticed it when another bug caused exec at address 0. --- libpcsxcore/new_dynarec/emu_if.c | 32 +++++++++++++-------------- libpcsxcore/new_dynarec/new_dynarec.h | 3 +-- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 9ceca916..a9122c81 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -342,11 +342,10 @@ static void clear_local_cache(void) static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs, enum blockExecCaller block_caller) { - if (!ndrc_g.thread.busy) { + if (ndrc_g.thread.busy_addr == ~0u) { memcpy(ndrc_smrv_regs, regs->GPR.r, sizeof(ndrc_smrv_regs)); slock_lock(ndrc_g.thread.lock); - ndrc_g.thread.addr = regs->pc; - ndrc_g.thread.busy = 1; + ndrc_g.thread.busy_addr = regs->pc; slock_unlock(ndrc_g.thread.lock); scond_signal(ndrc_g.thread.cond); } @@ -357,7 +356,7 @@ static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs, { psxInt.ExecuteBlock(regs, block_caller); } - while (!regs->stop && ndrc_g.thread.busy && block_caller == EXEC_CALLER_OTHER); + while (!regs->stop && ndrc_g.thread.busy_addr != ~0u && block_caller == EXEC_CALLER_OTHER); psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); @@ -372,8 +371,7 @@ static void ari64_execute_threaded_once(struct psxRegisters *regs, *(void **)((char *)drc_local + LO_hash_table_ptr); void *target; - if (likely(!ndrc_g.thread.busy)) { - ndrc_g.thread.addr = 0; + if (likely(ndrc_g.thread.busy_addr == ~0u)) { target = ndrc_get_addr_ht_param(hash_table, regs->pc, ndrc_cm_no_compile); if (target) { @@ -412,12 +410,12 @@ static void ari64_execute_threaded_block(struct psxRegisters *regs, static void ari64_thread_sync(void) { - if (!ndrc_g.thread.lock || !ndrc_g.thread.busy) + if (!ndrc_g.thread.lock || ndrc_g.thread.busy_addr == ~0u) return; for (;;) { slock_lock(ndrc_g.thread.lock); slock_unlock(ndrc_g.thread.lock); - if (!ndrc_g.thread.busy) + if (ndrc_g.thread.busy_addr == ~0) break; retro_sleep(0); } @@ -425,8 +423,8 @@ static void ari64_thread_sync(void) static int ari64_thread_check_range(unsigned int start, unsigned int end) { - u32 addr = ndrc_g.thread.addr; - if (!addr) + u32 addr = ndrc_g.thread.busy_addr; + if (addr == ~0u) return 0; addr &= 0x1fffffff; @@ -451,16 +449,17 @@ static void ari64_compile_thread(void *unused) slock_lock(ndrc_g.thread.lock); while (!ndrc_g.thread.exit) { - if (!ndrc_g.thread.busy) + addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr; + if (addr == ~0u) scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock); - addr = ndrc_g.thread.addr; - if (!ndrc_g.thread.busy || !addr || ndrc_g.thread.exit) + addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr; + if (addr == ~0u || ndrc_g.thread.exit) continue; target = ndrc_get_addr_ht_param(hash_table, addr, ndrc_cm_compile_in_thread); //printf("c %08x -> %p\n", addr, target); - ndrc_g.thread.busy = 0; + ndrc_g.thread.busy_addr = ~0u; } slock_unlock(ndrc_g.thread.lock); (void)target; @@ -490,7 +489,7 @@ static void ari64_thread_shutdown(void) slock_free(ndrc_g.thread.lock); ndrc_g.thread.lock = NULL; } - ndrc_g.thread.busy = ndrc_g.thread.addr = 0; + ndrc_g.thread.busy_addr = ~0u; } static void ari64_thread_init(void) @@ -514,7 +513,8 @@ static void ari64_thread_init(void) return; ari64_thread_shutdown(); - ndrc_g.thread.busy = ndrc_g.thread.addr = ndrc_g.thread.exit = 0; + ndrc_g.thread.exit = 0; + ndrc_g.thread.busy_addr = ~0u; if (enable) { ndrc_g.thread.lock = slock_new(); diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 5d3057b7..0d3a6868 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -24,8 +24,7 @@ struct ndrc_globals void *cond; void *dirty_start; void *dirty_end; - unsigned int addr; - int busy; + unsigned int busy_addr; // 0 is valid, ~0 == none int exit; } thread; };