Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support per-core state using #[thread_local] #794

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions memory.x
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,37 @@ SECTIONS {
KEEP(*(.boot2));
} > BOOT2
} INSERT BEFORE .text;

/* Per-core (thread) data into flash */
SECTIONS {
.tdata : ALIGN(4)
{
. = ALIGN(4);
PROVIDE(__tdata_start = .);
*(.tdata .tdata.*);
. = ALIGN(4);
PROVIDE(__tdata_end = .);
} > FLASH
PROVIDE(__tdata_len = __tdata_end - __tdata_start);
} INSERT AFTER .data;

/* Size per-core state and allocate bss space for each core */
SECTIONS {
.tbss (NOLOAD) : ALIGN(4)
{
. = ALIGN(4);
PROVIDE(__tbss_start = .);
*(.tbss .tbss.*);
*(.tcommon);
. = ALIGN(4);
PROVIDE(__tbss_end = .);
} > RAM
PROVIDE(__tbss_len = __tbss_end - __tbss_start);

.tls_state (NOLOAD) : ALIGN(4) {
PROVIDE(TLS_CORE_0 = ALIGN(4));
. += __tdata_len + __tbss_len;
PROVIDE(TLS_CORE_1 = ALIGN(4));
. += __tdata_len + __tbss_len;
} > RAM
} INSERT AFTER .bss;
9 changes: 9 additions & 0 deletions rp2040-hal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ bitfield = { version = "0.14.0" }

i2c-write-iter = { version = "1.0.0", features = ["async"], optional = true }

cortex-m-rt = { version = "0.7", optional = true }

[dev-dependencies]
cortex-m-rt = "0.7"
cortex-m-rtic = "1.1.4"
Expand Down Expand Up @@ -106,6 +108,9 @@ rtic-monotonic = ["dep:rtic-monotonic"]
# Implement `i2c-write-iter` traits
i2c-write-iter = ["dep:i2c-write-iter"]

# Enable use of thread-local variables for multicore state
thread_local = ["dep:cortex-m-rt"]

[[example]]
# irq example uses cortex-m-rt::interrupt, need rt feature for that
name = "gpio_irq_example"
Expand Down Expand Up @@ -198,6 +203,10 @@ required-features = ["critical-section-impl"]
name = "multicore_polyblink"
required-features = ["critical-section-impl"]

[[example]]
name = "multicore_percore_data"
required-features = ["critical-section-impl", "thread_local"]

[[example]]
name = "pio_blink"
required-features = ["critical-section-impl"]
Expand Down
166 changes: 166 additions & 0 deletions rp2040-hal/examples/multicore_percore_data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
//! # Multicore Blinking Example
//!
//! This application blinks two LEDs on GPIOs 2 and 3 at different rates (3Hz
//! and 4Hz respectively.)
//!
//! See the `Cargo.toml` file for Copyright and licence details.
#![no_std]
//#![cfg(feature = "thread_local")]
#![feature(thread_local)]
#![no_main]

use core::cell::RefCell;

use cortex_m::delay::Delay;

use hal::clocks::Clock;
use hal::gpio::{DynPinId, FunctionSio, Pin, Pins, PullDown, SioOutput};
use hal::multicore::{Multicore, Stack};
use hal::sio::Sio;
// Ensure we halt the program on panic (if we don't mention this crate it won't
// be linked)
use panic_halt as _;

// Alias for our HAL crate
use rp2040_hal as hal;

// A shorter alias for the Peripheral Access Crate, which provides low-level
// register access
use hal::pac;

// Some traits we need
use embedded_hal::digital::StatefulOutputPin;

/// The linker will place this boot block at the start of our program image. We
/// need this to help the ROM bootloader get our code up and running.
/// Note: This boot block is not necessary when using a rp-hal based BSP
/// as the BSPs already perform this step.
#[link_section = ".boot2"]
#[used]
pub static BOOT2: [u8; 256] = rp2040_boot2::BOOT_LOADER_GENERIC_03H;

/// External high-speed crystal on the Raspberry Pi Pico board is 12 MHz. Adjust
/// if your board has a different frequency
const XTAL_FREQ_HZ: u32 = 12_000_000u32;

/// The frequency at which core 0 will blink its LED (Hz).
const CORE0_FREQ: u32 = 3;
/// The frequency at which core 1 will blink its LED (Hz).
const CORE1_FREQ: u32 = 4;
/// The delay between each toggle of core 0's LED (us).
const CORE0_DELAY: u32 = 1_000_000 / CORE0_FREQ;
/// The delay between each toggle of core 1's LED (us).
const CORE1_DELAY: u32 = 1_000_000 / CORE1_FREQ;

/// Stack for core 1
///
/// Core 0 gets its stack via the normal route - any memory not used by static
/// values is reserved for stack and initialised by cortex-m-rt.
/// To get the same for Core 1, we would need to compile everything separately
/// and modify the linker file for both programs, and that's quite annoying.
/// So instead, core1.spawn takes a [usize] which gets used for the stack.
/// NOTE: We use the `Stack` struct here to ensure that it has 32-byte
/// alignment, which allows the stack guard to take up the least amount of
/// usable RAM.
static mut CORE1_STACK: Stack<4096> = Stack::new();

/// State for the blinker
struct BlinkState {
led: Pin<DynPinId, FunctionSio<SioOutput>, PullDown>,
delay: Delay,
delay_time: u32,
}

/// Per core blinker state
#[thread_local]
static STATE: RefCell<Option<BlinkState>> = RefCell::new(None);

/// Blink which ever LED with whatever delay, according to the per-core state.
fn blinker() -> ! {
let mut state = STATE.borrow_mut();
let BlinkState {
led,
delay,
delay_time,
} = state.as_mut().unwrap();
loop {
led.toggle().unwrap();
delay.delay_us(*delay_time);
}
}

/// Entry point to our bare-metal application.
///
/// The `#[rp2040_hal::entry]` macro ensures the Cortex-M start-up code calls this function
/// as soon as all global variables and the spinlock are initialised.
#[rp2040_hal::entry]
fn main() -> ! {
// Grab our singleton objects
let mut pac = pac::Peripherals::take().unwrap();
let core = pac::CorePeripherals::take().unwrap();

// Set up the watchdog driver - needed by the clock setup code
let mut watchdog = hal::watchdog::Watchdog::new(pac.WATCHDOG);

// Configure the clocks
let clocks = hal::clocks::init_clocks_and_plls(
XTAL_FREQ_HZ,
pac.XOSC,
pac.CLOCKS,
pac.PLL_SYS,
pac.PLL_USB,
&mut pac.RESETS,
&mut watchdog,
)
.unwrap();

let sys_freq = clocks.system_clock.freq().to_Hz();

// Set up the GPIO pins
let mut sio = Sio::new(pac.SIO);
let pins = Pins::new(
pac.IO_BANK0,
pac.PADS_BANK0,
sio.gpio_bank0,
&mut pac.RESETS,
);
let led1 = pins.gpio2.into_push_pull_output();
let led2 = pins.gpio3.into_push_pull_output();

// Start up the second core to blink the second LED
let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo);
let cores = mc.cores();
let core1 = &mut cores[1];
core1
.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
// Get the second core's copy of the `CorePeripherals`, which are per-core.
// Unfortunately, `cortex-m` doesn't support this properly right now,
// so we have to use `steal`.
let core = unsafe { pac::CorePeripherals::steal() };
// Set up the delay for the second core.
let delay = Delay::new(core.SYST, sys_freq);

STATE.borrow_mut().replace(BlinkState {
led: led2.into_dyn_pin(),
delay,
delay_time: CORE1_DELAY,
});

// Blink the second LED.
blinker();
})
.unwrap();

// Set up the delay for the first core.
let delay = Delay::new(core.SYST, sys_freq);

// Blink the first LED.
STATE.borrow_mut().replace(BlinkState {
led: led1.into_dyn_pin(),
delay,
delay_time: CORE0_DELAY,
});
blinker();
}

// End of file
89 changes: 89 additions & 0 deletions rp2040-hal/src/multicore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,47 @@
//! For inter-processor communications, see [`crate::sio::SioFifo`] and [`crate::sio::Spinlock0`]
//!
//! For a detailed example, see [examples/multicore_fifo_blink.rs](https://github.com/rp-rs/rp-hal/tree/main/rp2040-hal/examples/multicore_fifo_blink.rs)
//!
//! ## Per-core static data
//!
//! Both cores share the same memory, so a `static` variable will be accessible
//! and shared by both, requiring the same care as it would in a multi-threaded
//! program.
//!
//! With the `thread_local` feature enabled, this module supports the use of the
//! ([unstable](https://github.com/rust-lang/rust/issues/29594))
//! `#[thread_local]` attribute to make these per-core variables. This allows
//! the same code to run on both cores but with its own core-specific static
//! state, such maintaining program state, or for things like DMA buffers.
//!
//! For example:
//! ```rust,ignore
//! #![feature(thread_local)]
//! # use core::cell::RefCell;
//!
//! #[thread_local]
//! static MY_COUNTER: RefCell<usize> = RefCell::new(0);
//!
//! fn next_id() -> usize {
//! MY_COUNTER.replace_with(|c| *c + 1)
//! }
//! ```
//!
//! Each core will get its own instance of the `MY_COUNTER` variable. Since
//! these are not shared, they do not need atomic operations to update.
//!
//! These core-local variables are initialized on program startup and retain
//! their value from there on, even between invocations of [`Core::spawn`].
//!
//! Note that this requires some setup in the linker script to allocate space
//! for the static data. See memory.x for details.
//!
//! If the variables are zero-initialized then they will be reserved space in
//! the `.tbss` section in the executable, and then space in `.bss` for each
//! core. Similarly, variables initialized with non-zero constants will be in
//! the executable's `.tdata` section, and have space reserved in `.bss`; the
//! initial values are copied at program startup. Note that this uses the
//! `__pre_init` hook to do this, so it won't be available for other uses.

use core::mem::ManuallyDrop;
use core::sync::atomic::compiler_fence;
Expand Down Expand Up @@ -290,3 +331,51 @@ impl<'p> Core<'p> {
}
}
}

#[cfg(all(target_arch = "arm", feature = "thread_local"))]
mod thread_local {
use core::arch::global_asm;
use core::ptr::{addr_of, addr_of_mut};

extern "C" {
static mut TLS_CORE_0: u8;
static mut TLS_CORE_1: u8;
static __tdata_start: u8;
static __tdata_len: u8;
}

// Define `__aeabi_read_tp` called by the compiler to get access to
// thread-local storage.
global_asm! {
".pushsection .text.__aeabi_read_tp",
".align 4",
".p2align 4,,15",
".global __aeabi_read_tp",
".type __aeabi_read_tp,%function",

"__aeabi_read_tp:",
" ldr r0, =0xd0000000", // Load SIO CPUID addr
" ldr r0, [r0]", // Load CPUID
" cmp r0, #0", // Check core 0
" ldr r0, ={core_0}", // Set TLS_CORE_0
" beq 1f", // skip if done
" ldr r0, ={core_1}", // Set TLS_CORE_1
"1: bx lr",

".popsection",
core_0 = sym TLS_CORE_0,
core_1 = sym TLS_CORE_1,
}

// Intercept __pre_init to hook into the startup code to copy the tdata into
// TLS_CORE_[01].
//
// NB: Run as the very first thing, nothing has been initialized and memory
// could be in arbitrary state, so we only deal with things via raw pointers.
#[cortex_m_rt::pre_init]
unsafe fn tls_pre_init_hook() {
for dst in [addr_of_mut!(TLS_CORE_0), addr_of_mut!(TLS_CORE_1)] {
core::ptr::copy(addr_of!(__tdata_start), dst, addr_of!(__tdata_len) as usize);
}
}
}
Loading