From d8f2909e2386cc848ec1a6f8120e9330979f9823 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 17 Dec 2024 02:20:39 +0200 Subject: [PATCH] drc: another try to fix thread cache stuff libretro/pcsx_rearmed#856 --- libpcsxcore/new_dynarec/emu_if.c | 24 ++++++++++++++++++------ libpcsxcore/new_dynarec/new_dynarec.c | 24 +++++++++++++++++------- libpcsxcore/new_dynarec/new_dynarec.h | 4 +++- 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index a19bd2dd..9ceca916 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -17,6 +17,7 @@ #include "../gte_arm.h" #include "../gte_neon.h" #include "compiler_features.h" +#include "arm_features.h" #define FLAGLESS #include "../gte.h" #ifdef NDRC_THREAD @@ -317,13 +318,24 @@ static void ari64_apply_config() #ifdef NDRC_THREAD static void clear_local_cache(void) { -#ifdef _3DS - if (ndrc_g.thread.cache_dirty) { - ndrc_g.thread.cache_dirty = 0; - ctr_invalidate_icache(); - } +#if defined(__arm__) || defined(__aarch64__) + if (ndrc_g.thread.dirty_start) { + // see "Ensuring the visibility of updates to instructions" + // in v7/v8 reference manuals (DDI0406, DDI0487 etc.) +#if defined(__aarch64__) || defined(HAVE_ARMV8) + // the actual clean/invalidate is broadcast to all cores, + // the manual only prescribes an isb + __asm__ volatile("isb"); +//#elif defined(_3DS) +// ctr_invalidate_icache(); #else - // hopefully nothing is needed, as tested on r-pi4 and switch + // while on v6 this is always required, on v7 it depends on + // "Multiprocessing Extensions" being present, but that is difficult + // to detect so do it always for now + new_dyna_clear_cache(ndrc_g.thread.dirty_start, ndrc_g.thread.dirty_end); +#endif + ndrc_g.thread.dirty_start = ndrc_g.thread.dirty_end = 0; + } #endif } diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 6f64e961..f1f5e609 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -474,12 +474,7 @@ static void mprotect_w_x(void *start, void *end, int is_x) #endif } -static void start_tcache_write(void *start, void *end) -{ - mprotect_w_x(start, end, 0); -} - -static void end_tcache_write(void *start, void *end) +void new_dyna_clear_cache(void *start, void *end) { #if defined(__arm__) || defined(__aarch64__) size_t len = (char *)end - (char *)start; @@ -495,7 +490,6 @@ static void end_tcache_write(void *start, void *end) ctr_clear_cache_range(start, end); else ctr_clear_cache(); - ndrc_g.thread.cache_dirty = 1; #elif defined(HAVE_LIBNX) if (g_jit.type == JitType_CodeMemory) { armDCacheClean(start, len); @@ -512,6 +506,22 @@ static void end_tcache_write(void *start, void *end) #endif (void)len; #endif +} + +static void start_tcache_write(void *start, void *end) +{ + mprotect_w_x(start, end, 0); +} + +static void end_tcache_write(void *start, void *end) +{ +#ifdef NDRC_THREAD + if (!ndrc_g.thread.dirty_start || (size_t)ndrc_g.thread.dirty_start > (size_t)start) + ndrc_g.thread.dirty_start = start; + if ((size_t)ndrc_g.thread.dirty_end < (size_t)end) + ndrc_g.thread.dirty_end = end; +#endif + new_dyna_clear_cache(start, end); mprotect_w_x(start, end, 1); } diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 411d8678..5d3057b7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -22,10 +22,11 @@ struct ndrc_globals void *handle; void *lock; void *cond; + void *dirty_start; + void *dirty_end; unsigned int addr; int busy; int exit; - int cache_dirty; // 3ds only } thread; }; extern struct ndrc_globals ndrc_g; @@ -40,6 +41,7 @@ void new_dynarec_print_stats(void); int new_dynarec_quick_check_range(unsigned int start, unsigned int end); void new_dynarec_invalidate_range(unsigned int start, unsigned int end); void new_dynarec_invalidate_all_pages(void); +void new_dyna_clear_cache(void *start, void *end); void new_dyna_start(void *context); void new_dyna_start_at(void *context, void *compiled_code);