Further async integration progress, rustcoalescence fails to compile

juntyr · Jan 9, 2024 · 52f4ce2 · 52f4ce2
1 parent 29b098e
commit 52f4ce2
Show file tree

Hide file tree

Showing 19 changed files with 517 additions and 387 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml
@@ -20,7 +20,7 @@ contracts = "0.6.3"
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 
 [target.'cfg(target_os = "cuda")'.dependencies]
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"], optional = true }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"], optional = true }
 
 [target.'cfg(not(target_os = "cuda"))'.dependencies]
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"], optional = true }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"], optional = true }
diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml
@@ -15,7 +15,7 @@ contracts = "0.6.3"
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 
 [target.'cfg(target_os = "cuda")'.dependencies]
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive"] }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"] }
 
 [target.'cfg(not(target_os = "cuda"))'.dependencies]
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "host"] }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"] }
diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs
@@ -1,13 +1,19 @@
 use core::fmt;
 
+use const_type_layout::TypeGraphLayout;
 #[cfg(not(target_os = "cuda"))]
 use rust_cuda::deps::rustacuda::{
     error::CudaResult,
     function::{BlockSize, GridSize},
 };
 
-use rust_cuda::utils::{
-    aliasing::SplitSliceOverCudaThreadsDynamicStride, exchange::buffer::CudaExchangeBuffer,
+use rust_cuda::{
+    lend::RustToCudaProxy,
+    safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly},
+    utils::{
+        aliasing::SplitSliceOverCudaThreadsDynamicStride,
+        exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem},
+    },
 };
 
 use necsim_core::{
@@ -27,8 +33,13 @@ use super::utils::MaybeSome;
 #[derive(rust_cuda::lend::LendRustToCuda)]
 #[cuda(free = "ReportSpeciation", free = "ReportDispersal")]
 pub struct EventBuffer<ReportSpeciation: Boolean, ReportDispersal: Boolean> {
+    #[cfg(not(target_os = "cuda"))]
     #[cuda(embed)]
     event_mask: SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<bool, true, true>>,
+    #[cfg(target_os = "cuda")]
+    #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<bool, true, true>>")]
+    event_mask: CudaExchangeSlice<CudaExchangeItem<bool, true, true>>,
+    #[cfg(not(target_os = "cuda"))]
     #[cuda(embed)]
     event_buffer: SplitSliceOverCudaThreadsDynamicStride<
         CudaExchangeBuffer<
@@ -37,8 +48,41 @@ pub struct EventBuffer<ReportSpeciation: Boolean, ReportDispersal: Boolean> {
             true,
         >,
     >,
-    max_events: usize,
-    event_counter: usize,
+    #[cfg(target_os = "cuda")]
+    #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride<
+    CudaExchangeBuffer<
+        MaybeSome<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
+        false,
+        true,
+    >,
+>")]
+    event_buffer: CudaExchangeSlice<
+        CudaExchangeItem<
+            MaybeSome<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
+            false,
+            true,
+        >,
+    >,
+}
+
+// Safety:
+// - no mutable aliasing occurs since all parts implement SafeMutableAliasing
+// - dropping does not trigger (de)alloc since EventBuffer doesn't impl Drop and
+//   all parts implement SafeMutableAliasing
+// - EventBuffer has no shallow mutable state
+unsafe impl<ReportSpeciation: Boolean, ReportDispersal: Boolean> SafeMutableAliasing
+    for EventBuffer<ReportSpeciation, ReportDispersal>
+where
+    SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<bool, true, true>>:
+        SafeMutableAliasing,
+    SplitSliceOverCudaThreadsDynamicStride<
+        CudaExchangeBuffer<
+            MaybeSome<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
+            false,
+            true,
+        >,
+    >: SafeMutableAliasing,
+{
 }
 
 pub trait EventType {
@@ -78,10 +122,7 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean> fmt::Debug
     for EventBuffer<ReportSpeciation, ReportDispersal>
 {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        fmt.debug_struct("EventBuffer")
-            .field("max_events", &self.max_events)
-            .field("event_counter", &self.event_counter)
-            .finish_non_exhaustive()
+        fmt.debug_struct("EventBuffer").finish_non_exhaustive()
     }
 }
 
@@ -122,8 +163,6 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
                 CudaExchangeBuffer::from_vec(event_buffer)?,
                 max_events,
             ),
-            max_events,
-            event_counter: 0_usize,
         })
     }
 
@@ -148,9 +187,26 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
             mask.write(false);
         }
     }
+}
 
-    pub fn max_events_per_individual(&self) -> usize {
-        self.max_events
+#[cfg(target_os = "cuda")]
+impl<ReportSpeciation: Boolean, ReportDispersal: Boolean>
+    EventBuffer<ReportSpeciation, ReportDispersal>
+{
+    fn report_event(
+        &mut self,
+        event: impl Into<<EventBuffer<ReportSpeciation, ReportDispersal> as EventType>::Event>,
+    ) {
+        if let ([mask, mask_rest @ ..], [buffer, buffer_rest @ ..]) = (
+            core::mem::take(&mut self.event_mask.0),
+            core::mem::take(&mut self.event_buffer.0),
+        ) {
+            mask.write(true);
+            buffer.write(MaybeSome::Some(event.into()));
+
+            self.event_mask.0 = mask_rest;
+            self.event_buffer.0 = buffer_rest;
+        }
     }
 }
 
@@ -169,19 +225,11 @@ impl<ReportSpeciation: Boolean, ReportDispersal: Boolean> Reporter
 impl Reporter for EventBuffer<False, True> {
     impl_report!(
         #[debug_requires(
-            self.event_counter < self.max_events,
+            !self.event_buffer.0.is_empty(),
             "does not report extraneous dispersal events"
         )]
         dispersal(&mut self, event: Used) {
-            if let Some(mask) = self.event_mask.get_mut(self.event_counter) {
-                mask.write(true);
-
-                unsafe {
-                    self.event_buffer.get_unchecked_mut(self.event_counter)
-                }.write(MaybeSome::Some(event.clone().into()));
-            }
-
-            self.event_counter += 1;
+            self.report_event(event.clone());
         }
     );
 }
@@ -190,19 +238,14 @@ impl Reporter for EventBuffer<False, True> {
 impl Reporter for EventBuffer<True, False> {
     impl_report!(
         #[debug_requires(
-            self.event_counter == 0,
+            !self.event_buffer.0.is_empty(),
             "does not report extraneous speciation events"
         )]
         speciation(&mut self, event: Used) {
-            if let Some(mask) = self.event_mask.get_mut(0) {
-                mask.write(true);
+            self.report_event(event.clone());
 
-                unsafe {
-                    self.event_buffer.get_unchecked_mut(0)
-                }.write(MaybeSome::Some(event.clone()));
-            }
-
-            self.event_counter = self.max_events;
+            self.event_mask.0 = &mut [];
+            self.event_buffer.0 = &mut [];
         }
     );
 }
@@ -211,37 +254,57 @@ impl Reporter for EventBuffer<True, False> {
 impl Reporter for EventBuffer<True, True> {
     impl_report!(
         #[debug_requires(
-            self.event_counter < self.max_events,
+            !self.event_buffer.0.is_empty(),
             "does not report extraneous speciation events"
         )]
         speciation(&mut self, event: Used) {
-            if let Some(mask) = self.event_mask.get_mut(self.event_counter) {
-                mask.write(true);
+            self.report_event(event.clone());
 
-                unsafe {
-                    self.event_buffer.get_unchecked_mut(self.event_counter)
-                }.write(MaybeSome::Some(event.clone().into()));
-            }
-
-            self.event_counter = self.max_events;
+            self.event_mask.0 = &mut [];
+            self.event_buffer.0 = &mut [];
         }
     );
 
     impl_report!(
         #[debug_requires(
-            self.event_counter < self.max_events,
+            !self.event_buffer.0.is_empty(),
             "does not report extraneous dispersal events"
         )]
         dispersal(&mut self, event: Used) {
-            if let Some(mask) = self.event_mask.get_mut(self.event_counter) {
-                mask.write(true);
-
-                unsafe {
-                    self.event_buffer.get_unchecked_mut(self.event_counter)
-                }.write(MaybeSome::Some(event.clone().into()));
-            }
-
-            self.event_counter += 1;
+            self.report_event(event.clone());
         }
     );
 }
+
+// FIXME: find a less hacky hack
+struct CudaExchangeSlice<T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout>(
+    &'static mut [T],
+);
+
+impl<
+        T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout,
+        const M2D: bool,
+        const M2H: bool,
+    > RustToCudaProxy<CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>>>
+    for SplitSliceOverCudaThreadsDynamicStride<CudaExchangeBuffer<T, M2D, M2H>>
+{
+    fn from_ref(_val: &CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>>) -> &Self {
+        unsafe { unreachable_cuda_event_buffer_hack() }
+    }
+
+    fn from_mut(_val: &mut CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>>) -> &mut Self {
+        unsafe { unreachable_cuda_event_buffer_hack() }
+    }
+
+    fn into(mut self) -> CudaExchangeSlice<CudaExchangeItem<T, M2D, M2H>> {
+        let slice: &mut [CudaExchangeItem<T, M2D, M2H>] = &mut self;
+
+        let slice = unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) };
+
+        CudaExchangeSlice(slice)
+    }
+}
+
+extern "C" {
+    fn unreachable_cuda_event_buffer_hack() -> !;
+}
diff --git a/necsim/impls/cuda/src/value_buffer.rs b/necsim/impls/cuda/src/value_buffer.rs
@@ -3,7 +3,7 @@ use core::iter::Iterator;
 
 use const_type_layout::TypeGraphLayout;
 use rust_cuda::{
-    safety::{PortableBitSemantics, StackOnly},
+    safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly},
     utils::{
         aliasing::SplitSliceOverCudaThreadsConstStride,
         exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem},
@@ -32,6 +32,21 @@ where
         SplitSliceOverCudaThreadsConstStride<CudaExchangeBuffer<MaybeSome<T>, M2D, M2H>, 1_usize>,
 }
 
+// Safety:
+// - no mutable aliasing occurs since all parts implement SafeMutableAliasing
+// - dropping does not trigger (de)alloc since ValueBuffer doesn't impl Drop and
+//   all parts implement SafeMutableAliasing
+// - ValueBuffer has no shallow mutable state
+unsafe impl<T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, const M2H: bool>
+    SafeMutableAliasing for ValueBuffer<T, M2D, M2H>
+where
+    SplitSliceOverCudaThreadsConstStride<CudaExchangeBuffer<bool, true, true>, 1_usize>:
+        SafeMutableAliasing,
+    SplitSliceOverCudaThreadsConstStride<CudaExchangeBuffer<MaybeSome<T>, M2D, M2H>, 1_usize>:
+        SafeMutableAliasing,
+{
+}
+
 #[cfg(not(target_os = "cuda"))]
 impl<T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, const M2H: bool>
     ValueBuffer<T, M2D, M2H>

diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml
@@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] }
 rand_core = "0.6"
 
 [target.'cfg(target_os = "cuda")'.dependencies]
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final"], optional = true }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final"], optional = true }
 
 [target.'cfg(not(target_os = "cuda"))'.dependencies]
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["derive", "final", "host"], optional = true }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final", "host"], optional = true }
diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml
@@ -23,4 +23,4 @@ thiserror = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_state = "0.4"
 serde_derive_state = "0.4"
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] }
diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml
@@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c
 necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" }
 rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" }
 
-rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "8dc0c6d", features = ["host"] }
+rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] }
diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs
@@ -37,4 +37,21 @@ pub struct SimulationKernelPtx<
     A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda + Sync,
     ReportSpeciation: Boolean,
     ReportDispersal: Boolean,
->(std::marker::PhantomData<(M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal)>);
+>(
+    std::marker::PhantomData<(
+        M,
+        H,
+        G,
+        S,
+        X,
+        D,
+        C,
+        T,
+        N,
+        E,
+        I,
+        A,
+        ReportSpeciation,
+        ReportDispersal,
+    )>,
+);
diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs
@@ -12,7 +12,7 @@ use necsim_impls_no_std::cogs::{
     event_sampler::tracking::MinSpeciationTrackingEventSampler,
 };
 
-use rust_cuda::{lend::RustToCuda, kernel::CompiledKernelPtx};
+use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda};
 
 use rustcoalescence_algorithms_cuda_gpu_kernel::simulate;
 
@@ -43,8 +43,10 @@ unsafe impl<
         A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda + Sync,
         ReportSpeciation: Boolean,
         ReportDispersal: Boolean,
-    > CompiledKernelPtx<simulate<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>>
-    for SimulationKernelPtx<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>
+    >
+    CompiledKernelPtx<
+        simulate<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>,
+    > for SimulationKernelPtx<M, H, G, S, X, D, C, T, N, E, I, A, ReportSpeciation, ReportDispersal>
 where
     SimulationKernelPtx<M, H, G, S, X, D, C, T, N, E, I, A, False, False>:
         CompiledKernelPtx<simulate<M, H, G, S, X, D, C, T, N, E, I, A, False, False>>,