Skip to content

Commit

Permalink
refactor: replace hwloc2 with hwlocality (#91)
Browse files Browse the repository at this point in the history
* replace hwloc2 by hwlocality

* fix thread binding code

this is patchwork; I'm not sure this works as intended

* add a CLI switch to bind threads

when `--bind-threads` is present, rayon threads will be bound
to cores

* add hwloc dep to clippy & build CI

* fix ci?

* add railguards for arch that do not support thread binding
  • Loading branch information
imrn99 authored Jul 15, 2024
1 parent 343c5a3 commit 6c0ad21
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 21 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/simple-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- run: sudo apt-get install -y libhwloc-dev
- run: sudo apt-get install -y libhwloc-dev libudev-dev
- name: Run rust tests
run: cargo test --all

Expand All @@ -38,6 +38,7 @@ jobs:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- run: sudo apt-get install -y libhwloc-dev
- name: Run clippy
run: cargo clippy -- -D warnings

Expand All @@ -48,6 +49,6 @@ jobs:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- run: sudo apt-get install -y libhwloc-dev
- run: sudo apt-get install -y libhwloc-dev libudev-dev
- name: Build the crate
run: cargo build --all
2 changes: 1 addition & 1 deletion fastiron/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ rand = { version = "0.8.5", features = ["small_rng"] }
tinyvec = { version = "1.6.1" }
rayon = { version = "1.10.0" }
atomic = { version = "0.5.3" } # further upgrade == breaking change
hwloc2 = { version = "2.2.0" } # should be replaced with hwlocality
hwlocality = { version = "1.0.0-alpha.5" }
libc = { version = "0.2.155" }
rustc-hash = { version = "2.0.0" }

Expand Down
61 changes: 44 additions & 17 deletions fastiron/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ use std::sync::{Arc, Mutex};
use std::time::Instant;

use clap::Parser;
use fastiron::data::tallies::TalliedEvent;
use hwloc2::{CpuBindFlags, ObjectType, Topology, TopologyObject};
use hwlocality::topology::support::{DiscoverySupport, FeatureSupport};
use num::{one, zero, FromPrimitive};

use hwlocality::cpu::binding::CpuBindingFlags;
use hwlocality::object::types::ObjectType;
use hwlocality::object::TopologyObject;
use hwlocality::Topology;
use rayon::ThreadPoolBuilder;

use fastiron::constants::sim::SRC_FRACTION;
use fastiron::constants::CustomFloat;
use fastiron::data::tallies::TalliedEvent;
use fastiron::init::{init_mcdata, init_mcunits, init_particle_containers, init_results};
use fastiron::montecarlo::{MonteCarloData, MonteCarloResults, MonteCarloUnit};
use fastiron::parameters::Parameters;
Expand Down Expand Up @@ -70,13 +75,27 @@ pub fn run<T: CustomFloat>(cli: Cli) {
// rayon only => one global thread pool
if mcdata.exec_info.exec_policy == ExecPolicy::Rayon {
// custom thread-pool init in this case
if mcdata.exec_info.n_rayon_threads != 0 {
let topo = Arc::new(Mutex::new(Topology::new().unwrap()));
ThreadPoolBuilder::new()
.num_threads(mcdata.exec_info.n_rayon_threads)
.start_handler(move |thread_id| bind_threads(thread_id, &topo))
.build_global()
.unwrap();
if mcdata.exec_info.n_rayon_threads != 0 && mcdata.exec_info.bind_threads {
let topology = Topology::new().unwrap();
// these railguards were taken from the bind_threads_cpu of hwlocality
if !topology.supports(FeatureSupport::discovery, DiscoverySupport::pu_count) {
println!("[Warning] cannot bind threads to core -- reporting of PU objects not supported");
} else if topology.feature_support().cpu_binding().is_none() {
println!("[Warning] cannot bind threads to core -- CPU binding not supported");
} else {
let cpu_support = topology.feature_support().cpu_binding().unwrap();
if !(cpu_support.get_thread() && cpu_support.set_thread()) {
println!("[Warning] cannot bind threads to core -- CPU binding queries not supported");
} else {
// we can get to work
let topo = Arc::new(Mutex::new(topology));
ThreadPoolBuilder::new()
.num_threads(mcdata.exec_info.n_rayon_threads)
.start_handler(move |thread_id| bind_threads(thread_id, &topo))
.build_global()
.unwrap();
}
}
}
mcdata.exec_info.n_rayon_threads = rayon::current_num_threads();
}
Expand Down Expand Up @@ -132,6 +151,10 @@ pub fn run<T: CustomFloat>(cli: Cli) {
coral_benchmark_correctness(&mcresults);
}

//==========================
// End of simulation cleanup
//==========================

pub fn game_over<T: CustomFloat>(
mcdata: &MonteCarloData<T>,
mcunits: &mut [MonteCarloUnit<T>],
Expand All @@ -153,29 +176,33 @@ pub fn game_over<T: CustomFloat>(
}
}

//========================
// Thread binding routines
//========================

pub fn bind_threads(thread_id: usize, topo: &Arc<Mutex<Topology>>) {
// get thread id
let pthread_id = unsafe { libc::pthread_self() };
// get cpu topology
let mut locked_topo = topo.lock().unwrap();
let locked_topo = topo.lock().unwrap();
// get current thread's cpu affinity
let cpu_set = {
let ancestor_lvl = locked_topo
.depth_or_above_for_type(&ObjectType::NUMANode)
.unwrap_or(0);
let targets = locked_topo.objects_at_depth(ancestor_lvl);
let ancestor = targets.first().expect("No common ancestor found");
let processing_units = locked_topo.objects_with_type(&ObjectType::PU).unwrap();
.depth_or_above_for_type(ObjectType::NUMANode)
.unwrap_or_default();
let mut targets = locked_topo.objects_at_depth(ancestor_lvl);
let ancestor = targets.next().expect("No common ancestor found");
let processing_units = locked_topo.objects_with_type(ObjectType::PU);
let unit = processing_units
.iter()
.into_iter()
.filter(|pu| has_ancestor(pu, ancestor))
.cycle()
.nth(thread_id)
.expect("No cores below given ancestor");
unit.cpuset().unwrap()
};

match locked_topo.set_cpubind_for_thread(pthread_id, cpu_set, CpuBindFlags::CPUBIND_THREAD) {
match locked_topo.bind_thread_cpu(pthread_id, cpu_set, CpuBindingFlags::THREAD) {
Ok(_) => {}
Err(e) => {
println!("[Error]: Could not bind threads to cpu cores:");
Expand Down
4 changes: 4 additions & 0 deletions fastiron/src/parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ pub struct SimulationParameters<T: CustomFloat> {
pub chunk_size: u64,
/// Number of threads that should be used to run the simulation.
pub n_rayon_threads: u64,
/// Switch used to bind rayon threads to physical cores.
pub bind_threads: bool,
/// Number of units that should be used to run the simulation.
pub n_units: u64,
/// Number of steps simulated by the program.
Expand Down Expand Up @@ -403,6 +405,7 @@ impl<T: CustomFloat> SimulationParameters<T> {
fetch_from_cli!(n_particles);
fetch_from_cli!(chunk_size);
fetch_from_cli!(n_rayon_threads);
simulation_params.bind_threads = cli.bind_threads;
fetch_from_cli!(n_units);
fetch_from_cli!(n_steps);
fetch_from_cli!(nx);
Expand All @@ -427,6 +430,7 @@ impl<T: CustomFloat> Default for SimulationParameters<T> {
n_particles: 1000000,
chunk_size: 0,
n_rayon_threads: 1,
bind_threads: false,
n_units: 1,
n_steps: 10,
nx: 10,
Expand Down
6 changes: 5 additions & 1 deletion fastiron/src/utils/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub struct Cli {

/// enable thread debugging if present
#[arg(short = 't', long = "debug-threads", num_args(0))]
pub debug_threads: bool,
pub debug_threads: bool, // currently unused

/// enable single-precision float type usage if present
#[arg(short = 'p', long = "single-precision", num_args(0))]
Expand Down Expand Up @@ -97,6 +97,10 @@ pub struct Cli {
)]
pub n_rayon_threads: Option<u64>,

/// bind rayon threads to physical cores -- can improve performance on large scale NUMA systems
#[arg(long = "bind-threads", num_args(0))]
pub bind_threads: bool,

/// number of units that should be used to run the simulation
#[arg(
short = 'u',
Expand Down
4 changes: 4 additions & 0 deletions fastiron/src/utils/mc_processor_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ pub struct MCProcessorInfo {
pub n_processors: usize,
/// Number of thread(s) used for execution.
pub n_rayon_threads: usize,
/// Switch to bind rayon threads to physical cores.
pub bind_threads: bool,
/// Size of the chunks used by rayon.
pub chunk_size: usize,
/// Number of unit(s) used for (distributed) execution.
Expand All @@ -59,6 +61,7 @@ impl MCProcessorInfo {
}
} else if sim_params.n_rayon_threads != 1 {
res.n_rayon_threads = sim_params.n_rayon_threads as usize;
res.bind_threads = sim_params.bind_threads;
res.exec_policy = ExecPolicy::Rayon;
};
res.chunk_size = sim_params.chunk_size as usize;
Expand All @@ -78,6 +81,7 @@ impl Default for MCProcessorInfo {
exec_policy: Default::default(),
n_processors: 1,
n_rayon_threads: 1,
bind_threads: false,
chunk_size: 0,
n_units: 1,
}
Expand Down

0 comments on commit 6c0ad21

Please sign in to comment.