Skip to content

Commit

Permalink
Further async integration progress, rustcoalescence fails to compile
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Jan 9, 2024
1 parent 29b098e commit 52f4ce2
Show file tree
Hide file tree
Showing 19 changed files with 517 additions and 387 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions necsim/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ contracts = "0.6.3"
serde = { version = "1.0", default-features = false, features = ["derive"] }

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"], optional = true }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"], optional = true }
4 changes: 2 additions & 2 deletions necsim/impls/cuda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ contracts = "0.6.3"
serde = { version = "1.0", default-features = false, features = ["derive"] }

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"] }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"] }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"] }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"] }
163 changes: 113 additions & 50 deletions necsim/impls/cuda/src/event_buffer.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
use core::fmt;

use const_type_layout::TypeGraphLayout;
#[cfg(not(target_os = "cuda"))]
use rust_cuda::deps::rustacuda::{
error::CudaResult,
function::{BlockSize, GridSize},
};

use rust_cuda::utils::{
aliasing::SplitSliceOverCudaThreadsDynamicStride, exchange::buffer::CudaExchangeBuffer,
use rust_cuda::{
lend::RustToCudaProxy,
safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly},
utils::{
aliasing::SplitSliceOverCudaThreadsDynamicStride,
exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem},
},
};

use necsim_core::{
Expand All @@ -27,8 +33,13 @@ use super::utils::MaybeSome;
#[derive(rust_cuda::lend::LendRustToCuda)]
#[cuda(free = "ReportSpeciation", free = "ReportDispersal")]
pub struct EventBuffer<ReportSpeciation: Boolean, ReportDispersal: Boolean> {
#[cfg(not(target_os = "cuda"))]
#[cuda(embed)]
event_mask: SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<bool, true, true>>,
#[cfg(target_os = "cuda")]
#[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<bool, true, true>>")]
event_mask: CudaExchangeSlice<CudaExchangeItem<bool, true, true>>,
#[cfg(not(target_os = "cuda"))]
#[cuda(embed)]
event_buffer: SplitSliceOverCudaThreadsDynamicStride<
CudaExchangeBuffer<
Expand All @@ -37,8 +48,41 @@ pub struct EventBuffer<ReportSpeciation: Boolean, ReportDispersal: Boolean> {
true,
>,
>,
max_events: usize,
event_counter: usize,
#[cfg(target_os = "cuda")]
#[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride<
CudaExchangeBuffer<
MaybeSome<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
false,
true,
>,
>")]
event_buffer: CudaExchangeSlice<
CudaExchangeItem<
MaybeSome<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
false,
true,
>,
>,
}

// Safety:
// - no mutable aliasing occurs since all parts implement SafeMutableAliasing
// - dropping does not trigger (de)alloc since EventBuffer doesn't impl Drop and
// all parts implement SafeMutableAliasing
// - EventBuffer has no shallow mutable state
unsafe impl<ReportSpeciation: Boolean, ReportDispersal: Boolean> SafeMutableAliasing
for EventBuffer<ReportSpeciation, ReportDispersal>
where
SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<bool, true, true>>:
SafeMutableAliasing,
SplitSliceOverCudaThreadsDynamicStride<
CudaExchangeBuffer<
MaybeSome<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
false,
true,
>,
>: SafeMutableAliasing,
{
}

pub trait EventType {
Expand Down Expand Up @@ -78,10 +122,7 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean> fmt::Debug
for EventBuffer<ReportSpeciation, ReportDispersal>
{
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_struct("EventBuffer")
.field("max_events", &self.max_events)
.field("event_counter", &self.event_counter)
.finish_non_exhaustive()
fmt.debug_struct("EventBuffer").finish_non_exhaustive()
}
}

Expand Down Expand Up @@ -122,8 +163,6 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
CudaExchangeBuffer::from_vec(event_buffer)?,
max_events,
),
max_events,
event_counter: 0_usize,
})
}

Expand All @@ -148,9 +187,26 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
mask.write(false);
}
}
}

pub fn max_events_per_individual(&self) -> usize {
self.max_events
#[cfg(target_os = "cuda")]
impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
EventBuffer<ReportSpeciation, ReportDispersal>
{
fn report_event(
&mut self,
event: impl Into<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
) {
if let ([mask, mask_rest @ ..], [buffer, buffer_rest @ ..]) = (
core::mem::take(&mut self.event_mask.0),
core::mem::take(&mut self.event_buffer.0),
) {
mask.write(true);
buffer.write(MaybeSome::Some(event.into()));

self.event_mask.0 = mask_rest;
self.event_buffer.0 = buffer_rest;
}
}
}

Expand All @@ -169,19 +225,11 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean> Reporter
impl Reporter for EventBuffer<False, True> {
impl_report!(
#[debug_requires(
self.event_counter < self.max_events,
!self.event_buffer.0.is_empty(),
"does not report extraneous dispersal events"
)]
dispersal(&mut self, event: Used) {
if let Some(mask) = self.event_mask.get_mut(self.event_counter) {
mask.write(true);

unsafe {
self.event_buffer.get_unchecked_mut(self.event_counter)
}.write(MaybeSome::Some(event.clone().into()));
}

self.event_counter += 1;
self.report_event(event.clone());
}
);
}
Expand All @@ -190,19 +238,14 @@ impl Reporter for EventBuffer<False, True> {
impl Reporter for EventBuffer<True, False> {
impl_report!(
#[debug_requires(
self.event_counter == 0,
!self.event_buffer.0.is_empty(),
"does not report extraneous speciation events"
)]
speciation(&mut self, event: Used) {
if let Some(mask) = self.event_mask.get_mut(0) {
mask.write(true);
self.report_event(event.clone());

unsafe {
self.event_buffer.get_unchecked_mut(0)
}.write(MaybeSome::Some(event.clone()));
}

self.event_counter = self.max_events;
self.event_mask.0 = &mut [];
self.event_buffer.0 = &mut [];
}
);
}
Expand All @@ -211,37 +254,57 @@ impl Reporter for EventBuffer<True, False> {
impl Reporter for EventBuffer<True, True> {
impl_report!(
#[debug_requires(
self.event_counter < self.max_events,
!self.event_buffer.0.is_empty(),
"does not report extraneous speciation events"
)]
speciation(&mut self, event: Used) {
if let Some(mask) = self.event_mask.get_mut(self.event_counter) {
mask.write(true);
self.report_event(event.clone());

unsafe {
self.event_buffer.get_unchecked_mut(self.event_counter)
}.write(MaybeSome::Some(event.clone().into()));
}

self.event_counter = self.max_events;
self.event_mask.0 = &mut [];
self.event_buffer.0 = &mut [];
}
);

impl_report!(
#[debug_requires(
self.event_counter < self.max_events,
!self.event_buffer.0.is_empty(),
"does not report extraneous dispersal events"
)]
dispersal(&mut self, event: Used) {
if let Some(mask) = self.event_mask.get_mut(self.event_counter) {
mask.write(true);

unsafe {
self.event_buffer.get_unchecked_mut(self.event_counter)
}.write(MaybeSome::Some(event.clone().into()));
}

self.event_counter += 1;
self.report_event(event.clone());
}
);
}

// FIXME: find a less hacky hack
struct CudaExchangeSlice<T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout>(
&'static mut [T],
);

impl<
T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout,
const M2D: bool,
const M2H: bool,
> RustToCudaProxy<CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>>>
for SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<T, M2D, M2H>>
{
fn from_ref(_val: &CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>>) -> &Self {
unsafe { unreachable_cuda_event_buffer_hack() }
}

fn from_mut(_val: &mut CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>>) -> &mut Self {
unsafe { unreachable_cuda_event_buffer_hack() }
}

fn into(mut self) -> CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>> {
let slice: &mut [CudaExchangeItem<T, M2D, M2H>] = &mut self;

let slice = unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) };

CudaExchangeSlice(slice)
}
}

extern "C" {
fn unreachable_cuda_event_buffer_hack() -> !;
}
17 changes: 16 additions & 1 deletion necsim/impls/cuda/src/value_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use core::iter::Iterator;

use const_type_layout::TypeGraphLayout;
use rust_cuda::{
safety::{PortableBitSemantics, StackOnly},
safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly},
utils::{
aliasing::SplitSliceOverCudaThreadsConstStride,
exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem},
Expand Down Expand Up @@ -32,6 +32,21 @@ where
SplitSliceOverCudaThreadsConstStride<CudaExchangeBuffer<MaybeSome<T>, M2D, M2H>, 1_usize>,
}

// Safety:
// - no mutable aliasing occurs since all parts implement SafeMutableAliasing
// - dropping does not trigger (de)alloc since ValueBuffer doesn't impl Drop and
// all parts implement SafeMutableAliasing
// - ValueBuffer has no shallow mutable state
unsafe impl<T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, const M2H: bool>
SafeMutableAliasing for ValueBuffer<T, M2D, M2H>
where
SplitSliceOverCudaThreadsConstStride<CudaExchangeBuffer<bool, true, true>, 1_usize>:
SafeMutableAliasing,
SplitSliceOverCudaThreadsConstStride<CudaExchangeBuffer<MaybeSome<T>, M2D, M2H>, 1_usize>:
SafeMutableAliasing,
{
}

#[cfg(not(target_os = "cuda"))]
impl<T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, const M2H: bool>
ValueBuffer<T, M2D, M2H>
Expand Down
4 changes: 2 additions & 2 deletions necsim/impls/no-std/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] }
rand_core = "0.6"

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final"], optional = true }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final", "host"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final", "host"], optional = true }
2 changes: 1 addition & 1 deletion rustcoalescence/algorithms/cuda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ thiserror = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_state = "0.4"
serde_derive_state = "0.4"
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] }
2 changes: 1 addition & 1 deletion rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c
necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" }
rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" }

rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] }
19 changes: 18 additions & 1 deletion rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,21 @@ pub struct SimulationKernelPtx<
A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda + Sync,
ReportSpeciation: Boolean,
ReportDispersal: Boolean,
>(std::marker::PhantomData<(M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal)>);
>(
std::marker::PhantomData<(
M,
H,
G,
S,
X,
D,
C,
T,
N,
E,
I,
A,
ReportSpeciation,
ReportDispersal,
)>,
);
8 changes: 5 additions & 3 deletions rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use necsim_impls_no_std::cogs::{
event_sampler::tracking::MinSpeciationTrackingEventSampler,
};

use rust_cuda::{lend::RustToCuda, kernel::CompiledKernelPtx};
use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda};

use rustcoalescence_algorithms_cuda_gpu_kernel::simulate;

Expand Down Expand Up @@ -43,8 +43,10 @@ unsafe impl<
A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda + Sync,
ReportSpeciation: Boolean,
ReportDispersal: Boolean,
> CompiledKernelPtx<simulate<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>>
for SimulationKernelPtx<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>
>
CompiledKernelPtx<
simulate<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>,
> for SimulationKernelPtx<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>
where
SimulationKernelPtx<M, H, G, S, X, D, C, T, N, E, I, A, False, False>:
CompiledKernelPtx<simulate<M, H, G, S, X, D, C, T, N, E, I, A, False, False>>,
Expand Down
Loading

0 comments on commit 52f4ce2

Please sign in to comment.