Skip to content

Commit

Permalink
Generalise Clark2Dt to p/=0 + add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Feb 21, 2024
1 parent 1a44fc8 commit f3d070b
Show file tree
Hide file tree
Showing 6 changed files with 700 additions and 20 deletions.
2 changes: 2 additions & 0 deletions docs/simulate.ron
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@
),
/* shape (u) for the Clark 2Dt dispersal kernel */
shape_u: (0.0 < f64),
/* tail (p) for the Clark 2Dt dispersal kernel */
tail_p: (0.0 < f64),
)
/* (almost) infinite spatially-explicit scenario with (approximate) Gaussian distributed dispersal
each location (x, y) in the landscape has either habitat for exactly one individual,
Expand Down
7 changes: 7 additions & 0 deletions necsim/core/maths/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ pub trait MathsCore: 'static + Clone + core::fmt::Debug {
#[must_use]
fn sqrt(x: f64) -> f64;
#[must_use]
fn pow(x: f64, exp: f64) -> f64;
#[must_use]
fn sin(x: f64) -> f64;
#[must_use]
fn cos(x: f64) -> f64;
Expand Down Expand Up @@ -50,6 +52,11 @@ impl MathsCore for IntrinsicsMathsCore {
unsafe { core::intrinsics::sqrtf64(x) }
}

#[inline]
fn pow(x: f64, exp: f64) -> f64 {
unsafe { core::intrinsics::powf64(x, exp) }
}

#[inline]
fn sin(x: f64) -> f64 {
unsafe { core::intrinsics::sinf64(x) }
Expand Down
35 changes: 35 additions & 0 deletions necsim/impls/cuda/src/cogs/maths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,41 @@ impl MathsCore for NvptxMathsCore {
unsafe { core::intrinsics::sqrtf64(x) }
}

#[inline]
fn pow(x: f64, exp: f64) -> f64 {
// Guard against usage on the CPU as results will NOT match

#[cfg(target_os = "cuda")]
unsafe {
// Compute x ^ exp = 2 ^ (exp * log2(x))
// https://stackoverflow.com/a/54273307
// by https://stackoverflow.com/users/2341466/andars
// Licensed under CC BY-SA 4.0
#[allow(clippy::cast_possible_truncation)]
let x: f32 = x as f32;
#[allow(clippy::cast_possible_truncation)]
let exp: f32 = exp as f32;

let log2_x: f32;
core::arch::asm!("lg2.approx.f32 {}, {};", out(reg32) log2_x, in(reg32) x, options(pure, nomem, nostack));

let exp_log2_x = log2_x * exp;

let f: f32;
core::arch::asm!("ex2.approx.f32 {}, {};", out(reg32) f, in(reg32) exp_log2_x, options(pure, nomem, nostack));

f64::from(f)
}
#[cfg(not(target_os = "cuda"))]
{
extern "C" {
fn nvptx_maths_core_pow_on_cpu(_x: f64, _exp: f64) -> !;
}

unsafe { nvptx_maths_core_pow_on_cpu(x, exp) }
}
}

#[inline]
fn sin(x: f64) -> f64 {
// Guard against usage on the CPU as results will NOT match
Expand Down
Loading

0 comments on commit f3d070b

Please sign in to comment.