From 51ce53be52b626176e58bde93664c0d86989377a Mon Sep 17 00:00:00 2001 From: Yen-Fu Chen Date: Tue, 18 Jun 2024 13:15:17 +0800 Subject: [PATCH] Introduce background compilation Given the significant runtime compilation overhead associated with performing aggressive optimizations, we have implemented a background compilation mechanism to mitigate this issue. When the runtime profiler identifies a strong hotspot, it adds a T2C compilation request to the wait queue. A background thread, which continuously monitors this queue, triggers T2C to process the requests and notifies the main thread upon completion by updating a flag. --- .github/workflows/main.yml | 5 ++++- src/emulate.c | 18 +++++++++++------- src/feature.h | 6 ++++++ src/riscv.c | 37 +++++++++++++++++++++++++++++++++++++ src/riscv_private.h | 19 ++++++++++++++++++- 5 files changed, 76 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3da6de63..9fbb2676 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -75,7 +75,7 @@ jobs: - name: undefined behavior test run: | make clean && make ENABLE_UBSAN=1 check -j$(nproc) - make ENABLE_JIT=1 clean clean && make ENABLE_JIT=1 ENABLE_UBSAN=1 check -j$(nproc) + make ENABLE_JIT=1 clean && make ENABLE_JIT=1 ENABLE_UBSAN=1 check -j$(nproc) host-arm64: needs: [detect-code-related-file-changes] @@ -134,6 +134,9 @@ jobs: run: | sudo apt-get update -q -y sudo apt-get install -q -y clang clang-tools libsdl2-dev libsdl2-mixer-dev + wget https://apt.llvm.org/llvm.sh + chmod +x ./llvm.sh + sudo ./llvm.sh 17 shell: bash - name: run scan-build without JIT run: make distclean && scan-build -v -o ~/scan-build --status-bugs --use-cc=clang --force-analyze-debug-code --show-description -analyzer-config stable-report-filename=true -enable-checker valist,nullability make ENABLE_EXT_F=0 ENABLE_SDL=0 ENABLE_JIT=0 diff --git a/src/emulate.c b/src/emulate.c index 3e6bf1e3..e156aaba 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -308,6 +308,9 @@ static block_t *block_alloc(riscv_t *rv) block->has_loops = false; block->n_invoke = 0; INIT_LIST_HEAD(&block->list); +#if RV32_HAS(T2C) + block->compiled = false; +#endif #endif return block; } @@ -993,13 +996,14 @@ void rv_step(void *arg) ((exec_t2c_func_t) block->func)(rv); prev = NULL; continue; - } /* check if the execution path is strong hotspot */ - if (block->n_invoke >= THRESHOLD) { - t2c_compile(block, - (uint64_t) ((memory_t *) PRIV(rv)->mem)->mem_base); - ((exec_t2c_func_t) block->func)(rv); - prev = NULL; - continue; + } /* check if invoking times of t1 generated code exceed threshold */ + else if (!block->compiled && block->n_invoke >= THRESHOLD) { + block->compiled = true; + queue_entry_t *entry = malloc(sizeof(queue_entry_t)); + entry->block = block; + pthread_mutex_lock(&rv->wait_queue_lock); + list_add(&entry->list, &rv->wait_queue); + pthread_mutex_unlock(&rv->wait_queue_lock); } #endif /* executed through the tier-1 JIT compiler */ diff --git a/src/feature.h b/src/feature.h index 706c4603..ee27936a 100644 --- a/src/feature.h +++ b/src/feature.h @@ -57,5 +57,11 @@ #define RV32_FEATURE_T2C 0 #endif +/* T2C depends on JIT configuration */ +#if !RV32_FEATURE_JIT +#undef RV32_FEATURE_T2C +#define RV32_FEATURE_T2C 0 +#endif + /* Feature test macro */ #define RV32_HAS(x) RV32_FEATURE_##x diff --git a/src/riscv.c b/src/riscv.c index 0657f692..a0d385cc 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -28,6 +28,9 @@ #include "riscv_private.h" #include "utils.h" #if RV32_HAS(JIT) +#if RV32_HAS(T2C) +#include +#endif #include "cache.h" #include "jit.h" #define CODE_CACHE_SIZE (4 * 1024 * 1024) @@ -184,6 +187,27 @@ IO_HANDLER_IMPL(byte, write_b, W) #undef R #undef W +#if RV32_HAS(T2C) +static pthread_t t2c_thread; +static void *t2c_runloop(void *arg) +{ + riscv_t *rv = (riscv_t *) arg; + while (rv->quit) { + if (!list_empty(&rv->wait_queue)) { + queue_entry_t *entry = + list_last_entry(&rv->wait_queue, queue_entry_t, list); + pthread_mutex_lock(&rv->wait_queue_lock); + list_del_init(&entry->list); + pthread_mutex_unlock(&rv->wait_queue_lock); + t2c_compile(entry->block, + (uint64_t) ((memory_t *) PRIV(rv)->mem)->mem_base); + free(entry); + } + } + return NULL; +} +#endif + riscv_t *rv_create(riscv_user_t rv_attr) { assert(rv_attr); @@ -269,6 +293,14 @@ riscv_t *rv_create(riscv_user_t rv_attr) rv->jit_state = jit_state_init(CODE_CACHE_SIZE); rv->block_cache = cache_create(BLOCK_MAP_CAPACITY_BITS); assert(rv->block_cache); +#if RV32_HAS(T2C) + rv->quit = false; + /* prepare wait queue. */ + pthread_mutex_init(&rv->wait_queue_lock, NULL); + INIT_LIST_HEAD(&rv->wait_queue); + /* activate the background compilation thread. */ + pthread_create(&t2c_thread, NULL, t2c_runloop, rv); +#endif #endif return rv; @@ -353,6 +385,11 @@ void rv_delete(riscv_t *rv) memory_delete(attr->mem); block_map_destroy(rv); #else +#if RV32_HAS(T2C) + rv->quit = true; + pthread_join(t2c_thread, NULL); + pthread_mutex_destroy(&rv->wait_queue_lock); +#endif mpool_destroy(rv->chain_entry_mp); jit_state_exit(rv->jit_state); cache_free(rv->block_cache); diff --git a/src/riscv_private.h b/src/riscv_private.h index 95efe76d..33d8cb3e 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -14,6 +14,9 @@ #include "riscv.h" #include "utils.h" #if RV32_HAS(JIT) +#if RV32_HAS(T2C) +#include +#endif #include "cache.h" #endif @@ -70,7 +73,10 @@ typedef struct block { bool translatable; /**< Determine the block has RV32AF insturctions or not */ bool has_loops; /**< Determine the block has loop or not */ - uint32_t offset; /**< The machine code offset in T1 code cache */ +#if RV32_HAS(T2C) + bool compiled; /**< The T2C request is enqueued or not */ +#endif + uint32_t offset; /**< The machine code offset in T1 code cache */ uint32_t n_invoke; /**< The invoking times of T1 machine code */ void *func; /**< The function pointer of T2 machine code */ struct list_head list; @@ -82,6 +88,12 @@ typedef struct { block_t *block; struct list_head list; } chain_entry_t; +#if RV32_HAS(T2C) +typedef struct { + block_t *block; + struct list_head list; +} queue_entry_t; +#endif #endif typedef struct { @@ -134,6 +146,11 @@ struct riscv_internal { #else struct cache *block_cache; struct mpool *chain_entry_mp; +#if RV32_HAS(T2C) + struct list_head wait_queue; + pthread_mutex_t wait_queue_lock; + volatile bool quit; /**< Determine the main thread is terminated or not */ +#endif #endif struct mpool *block_mp, *block_ir_mp;