diff --git a/memory.x b/memory.x index e6b80c473..add5c902f 100644 --- a/memory.x +++ b/memory.x @@ -34,3 +34,38 @@ SECTIONS { KEEP(*(.boot2)); } > BOOT2 } INSERT BEFORE .text; + +/* Per-core (thread) data into flash */ +SECTIONS { + .tdata : ALIGN(4) + { + . = ALIGN(4); + PROVIDE(__tdata_start = .); + *(.tdata .tdata.*); + . = ALIGN(4); + PROVIDE(__tdata_end = .); + } > FLASH + PROVIDE(__tdata_len = __tdata_end - __tdata_start); +} INSERT AFTER .data; + +/* Size per-core state and allocate bss space for each core */ +SECTIONS { + .tbss (NOLOAD) : ALIGN(4) + { + . = ALIGN(4); + PROVIDE(__tbss_start = .); + *(.tbss .tbss.*); + *(.tcommon); + . = ALIGN(4); + PROVIDE(__tbss_end = .); + } > RAM + PROVIDE(__tbss_len = __tbss_end - __tbss_start); + + .tls_state (NOLOAD) : ALIGN(4) { + PROVIDE(TLS_CORE_0 = ALIGN(4)); + . += __tdata_len + __tbss_len; + PROVIDE(TLS_CORE_1 = ALIGN(4)); + . += __tdata_len + __tbss_len; + } > RAM +} INSERT AFTER .bss; + diff --git a/rp2040-hal/Cargo.toml b/rp2040-hal/Cargo.toml index 0718629ff..fec750101 100644 --- a/rp2040-hal/Cargo.toml +++ b/rp2040-hal/Cargo.toml @@ -106,6 +106,9 @@ rtic-monotonic = ["dep:rtic-monotonic"] # Implement `i2c-write-iter` traits i2c-write-iter = ["dep:i2c-write-iter"] +# Enable use of thread-local variables for multicore state +thread_local = [] + [[example]] # irq example uses cortex-m-rt::interrupt, need rt feature for that name = "gpio_irq_example" diff --git a/rp2040-hal/src/multicore.rs b/rp2040-hal/src/multicore.rs index d1b018402..afe809cf4 100644 --- a/rp2040-hal/src/multicore.rs +++ b/rp2040-hal/src/multicore.rs @@ -290,3 +290,68 @@ impl<'p> Core<'p> { } } } + +#[cfg(all(target_arch = "arm", feature = "thread_local"))] +mod thread_local { + use core::arch::global_asm; + use core::ptr::addr_of; + + extern "C" { + static TLS_CORE_0: u8; + static TLS_CORE_1: u8; + } + // Not really a const pointer, but we reform it into mut in the asm + static mut TLS_STATE: [*const u8; 2] = [ + // Point to linker-allocated space in .bss + unsafe { addr_of!(TLS_CORE_0) }, + unsafe { addr_of!(TLS_CORE_1) }, + ]; + + // Define `__aeabi_read_tp` called by the compiler to get access to + // thread-local storage. + global_asm! { + ".pushsection .text.__aeabi_read_tp", + ".align 4", + ".p2align 4,,15", + ".global __aeabi_read_tp", + ".type __aeabi_read_tp,%function", + + "__aeabi_read_tp:", + " push {{r1, lr}}", + " ldr r1, =0xd0000000", // Load SIO CPUID addr + " ldr r1, [r1]", // Get current CPUID + " lsls r1, r1, #2", // Scale by 4 + " ldr r0, ={tls_state}", // Load TLS_STATE base addr + " ldr r0, [r0, r1]", // Load CPU per-thread + " pop {{r1, pc}}", + + ".popsection", + tls_state = sym TLS_STATE, + } + + // Intercept __pre_init to hook into the startup code to copy the tdata into + // TLS_CORE_[01]. + global_asm! { + ".pushsection .text.__pre_init", + ".align 4", + ".p2align 4,,15", + ".global __pre_init", + ".type __pre_init,%function", + + "__pre_init:", + " push {{lr}}", + " ldr r0, ={tls_core_0}", + " ldr r1, =__tdata_start", + " ldr r2, =__tdata_len", + " bl __aeabi_memcpy", + " ldr r0, ={tls_core_1}", + " ldr r1, =__tdata_start", + " ldr r2, =__tdata_len", + " bl __aeabi_memcpy", + " pop {{pc}}", + + ".popsection", + tls_core_0 = sym TLS_CORE_0, + tls_core_1 = sym TLS_CORE_1, + } +}