Generalise Clark2Dt to p/=0 + add tests

juntyr · Feb 21, 2024 · f3d070b · f3d070b
1 parent 1a44fc8
commit f3d070b
Show file tree

Hide file tree

Showing 6 changed files with 700 additions and 20 deletions.
diff --git a/docs/simulate.ron b/docs/simulate.ron
@@ -267,6 +267,8 @@
             ),
             /* shape (u) for the Clark 2Dt dispersal kernel */
             shape_u: (0.0 < f64),
+            /* tail (p) for the Clark 2Dt dispersal kernel */
+            tail_p: (0.0 < f64),
         )
         /* (almost) infinite spatially-explicit scenario with (approximate) Gaussian distributed dispersal
            each location (x, y) in the landscape has either habitat for exactly one individual,

diff --git a/necsim/core/maths/src/lib.rs b/necsim/core/maths/src/lib.rs
@@ -14,6 +14,8 @@ pub trait MathsCore: 'static + Clone + core::fmt::Debug {
     #[must_use]
     fn sqrt(x: f64) -> f64;
     #[must_use]
+    fn pow(x: f64, exp: f64) -> f64;
+    #[must_use]
     fn sin(x: f64) -> f64;
     #[must_use]
     fn cos(x: f64) -> f64;
@@ -50,6 +52,11 @@ impl MathsCore for IntrinsicsMathsCore {
         unsafe { core::intrinsics::sqrtf64(x) }
     }
 
+    #[inline]
+    fn pow(x: f64, exp: f64) -> f64 {
+        unsafe { core::intrinsics::powf64(x, exp) }
+    }
+
     #[inline]
     fn sin(x: f64) -> f64 {
         unsafe { core::intrinsics::sinf64(x) }

diff --git a/necsim/impls/cuda/src/cogs/maths.rs b/necsim/impls/cuda/src/cogs/maths.rs
@@ -74,6 +74,41 @@ impl MathsCore for NvptxMathsCore {
         unsafe { core::intrinsics::sqrtf64(x) }
     }
 
+    #[inline]
+    fn pow(x: f64, exp: f64) -> f64 {
+        // Guard against usage on the CPU as results will NOT match
+
+        #[cfg(target_os = "cuda")]
+        unsafe {
+            // Compute x ^ exp = 2 ^ (exp * log2(x))
+            // https://stackoverflow.com/a/54273307
+            // by https://stackoverflow.com/users/2341466/andars
+            // Licensed under CC BY-SA 4.0
+            #[allow(clippy::cast_possible_truncation)]
+            let x: f32 = x as f32;
+            #[allow(clippy::cast_possible_truncation)]
+            let exp: f32 = exp as f32;
+
+            let log2_x: f32;
+            core::arch::asm!("lg2.approx.f32 {}, {};", out(reg32) log2_x, in(reg32) x, options(pure, nomem, nostack));
+
+            let exp_log2_x = log2_x * exp;
+
+            let f: f32;
+            core::arch::asm!("ex2.approx.f32 {}, {};", out(reg32) f, in(reg32) exp_log2_x, options(pure, nomem, nostack));
+
+            f64::from(f)
+        }
+        #[cfg(not(target_os = "cuda"))]
+        {
+            extern "C" {
+                fn nvptx_maths_core_pow_on_cpu(_x: f64, _exp: f64) -> !;
+            }
+
+            unsafe { nvptx_maths_core_pow_on_cpu(x, exp) }
+        }
+    }
+
     #[inline]
     fn sin(x: f64) -> f64 {
         // Guard against usage on the CPU as results will NOT match