diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 86631fc..bb866e5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: build: runs-on: ubuntu-20.04 - steps: + steps: - uses: actions/checkout@v3 with: submodules: "recursive" @@ -29,10 +29,9 @@ jobs: sudo apt-get update -y sudo apt-get install -y nasm cargo doc --no-deps + cargo clippy cargo fmt --check cargo test --release - name: Run sccache stat for check after shell: bash run: ${SCCACHE_PATH} --show-stats - - diff --git a/.github/workflows/main_nightly.yml b/.github/workflows/main_nightly.yml new file mode 100644 index 0000000..425c720 --- /dev/null +++ b/.github/workflows/main_nightly.yml @@ -0,0 +1,36 @@ +name: main +on: + push: + branches: [main] + pull_request: + + workflow_dispatch: +jobs: + build: + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v3 + with: + submodules: "recursive" + - name: Run sccache-cache + uses: mozilla-actions/sccache-action@v0.0.3 + - name: Run sccache stat for check before + shell: bash + run: ${SCCACHE_PATH} --show-stats + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + - name: cargo + env: + SCCACHE_GHA_ENABLED: "true" + RUSTC_WRAPPER: "sccache" + run: | + sudo apt-get update -y + sudo apt-get install -y nasm + cargo doc --no-deps --featues simd + cargo fmt --check --featues simd + cargo test --release --featues simd + - name: Run sccache stat for check after + shell: bash + run: ${SCCACHE_PATH} --show-stats diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..9373a29 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,22 @@ +name: pre-commit + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + pre-commit-check: + runs-on: ubuntu-latest + steps: + - name: Checkout workspace + uses: actions/checkout@v3 + - name: Install pre-commit and install + run: | + pip install pre-commit + pre-commit install + - name: Run pre-commit checks + run: pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..66686d7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + exclude: (thirdparty/.*)|(SOPHUS_VERSION)|(.txt)$ + - id: check-yaml + args: ["--unsafe"] + - id: check-json + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + args: + - --ignore-words-list + - "te,tring,crate" diff --git a/Cargo.toml b/Cargo.toml index 3dd5331..c352b62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,17 +21,17 @@ include = [ keywords = ["robotics", "optimization"] license = "MIT OR Apache-2.0" repository = "https://github.com/farm-ng/sophus-rs/" -version = "0.6.1" +version = "0.7.0" [workspace.dependencies] -sophus = {path = "crates/sophus", version = "0.6.1"} -sophus_core = {path = "crates/sophus_core", version = "0.6.1"} -sophus_image = {path = "crates/sophus_image", version = "0.6.1"} -sophus_lie = {path = "crates/sophus_lie", version = "0.6.1"} -sophus_opt = {path = "crates/sophus_opt", version = "0.6.1"} -sophus_pyo3 = {path = "crates/sophus_pyo3", version = "0.6.1"} -sophus_sensor = {path = "crates/sophus_sensor", version = "0.6.1"} -sophus_viewer = {path = "crates/sophus_viewer", version = "0.6.1"} +sophus = {path = "crates/sophus", version = "0.7.0"} +sophus_core = {path = "crates/sophus_core", version = "0.7.0"} +sophus_image = {path = "crates/sophus_image", version = "0.7.0"} +sophus_lie = {path = "crates/sophus_lie", version = "0.7.0"} +sophus_opt = {path = "crates/sophus_opt", version = "0.7.0"} +sophus_pyo3 = {path = "crates/sophus_pyo3", version = "0.7.0"} +sophus_sensor = {path = "crates/sophus_sensor", version = "0.7.0"} +sophus_viewer = {path = "crates/sophus_viewer", version = "0.7.0"} approx = "0.5" as-any = "0.3" @@ -58,7 +58,6 @@ num-traits = "0.2" numpy = "0.21" png = "0.17" rand = "0.8" -sleef = "0.3" tokio = {version = "1", features = ["full"]} typenum = {version = "1.17", features = ["const-generics"]} wgpu = "0.19" diff --git a/LICENSE-MIT b/LICENSE-MIT index 9e82be3..e8864fe 100644 --- a/LICENSE-MIT +++ b/LICENSE-MIT @@ -23,4 +23,4 @@ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. \ No newline at end of file +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 0d69f40..14613b4 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,35 @@ # sophus-rs -This library is in an early development stage - hence API is highly unstable. +sophus-rs is a Rust library for 2d and 3d geometry for Computer Vision and Robotics applications. +It is a spin-off of the [Sophus](https://github.com/strasdat/Sophus) C++ library which +focuses on **Lie groups** (e.g. rotations and transformations in 2d and 3d). -Sophus-rs currently requires rust nightly. The only nightly feature it uses is `portable-simd`. -It is the intention to move to rust stable once `portable-simd` is stabilized. There is no current -plan to depend on other nightly features. +In addition to Lie groups, sophus-rs also includes other geometric/maths concepts such unit vector, +splines, image classes, camera models as well as a other utilities such as a non-linear least +squares optimization. + +## Status + +This library is in an early development stage - hence API is highly unstable. It is likely that +existing features will be removed or changed in the future. + +However, the intend is to stride for correctness, facilitated using a comprehensive test suite. + +## Building + +sophus-rs builds on stable. + +```toml +[dependencies] +sophus = "0.7.0" +``` + +To allow for batch types, such as BatchScalarF64, the 'simd' feature is required. This feature +depends on [`portable-simd`](https://doc.rust-lang.org/std/simd/index.html), which is currently +only available on [nightly](https://doc.rust-lang.org/book/appendix-07-nightly-rust.html). There +are plans to use any other nightly features. + +```toml +[dependencies] +sophus = { version = "0.7.0", features = ["simd"] } +``` diff --git a/crates/sophus/Cargo.toml b/crates/sophus/Cargo.toml index 277f003..f23004c 100644 --- a/crates/sophus/Cargo.toml +++ b/crates/sophus/Cargo.toml @@ -30,3 +30,13 @@ nalgebra.workspace = true ndarray.workspace = true tokio.workspace = true wgpu.workspace = true + +[features] +simd = [ + "sophus_core/simd", + "sophus_image/simd", + "sophus_lie/simd", + "sophus_opt/simd", + "sophus_sensor/simd", + "sophus_viewer/simd", +] diff --git a/crates/sophus/src/lib.rs b/crates/sophus/src/lib.rs index 4ae3763..f316375 100644 --- a/crates/sophus/src/lib.rs +++ b/crates/sophus/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] #![allow(clippy::needless_range_loop)] pub mod examples; diff --git a/crates/sophus_core/Cargo.toml b/crates/sophus_core/Cargo.toml index c4786f2..4ca03b5 100644 --- a/crates/sophus_core/Cargo.toml +++ b/crates/sophus_core/Cargo.toml @@ -17,5 +17,10 @@ concat-arrays.workspace = true nalgebra.workspace = true ndarray.workspace = true num-traits.workspace = true -sleef.workspace = true typenum.workspace = true + +sleef = { version = "0.3", optional = true } + + +[features] +simd = ["sleef"] diff --git a/crates/sophus_core/src/calculus/dual.rs b/crates/sophus_core/src/calculus/dual.rs index bb7ec3a..5f0d6a3 100644 --- a/crates/sophus_core/src/calculus/dual.rs +++ b/crates/sophus_core/src/calculus/dual.rs @@ -1,14 +1,30 @@ -/// DualScalar matrix. +/// Dual matrix. pub mod dual_matrix; -pub use crate::calculus::dual::dual_matrix::DualBatchMatrix; pub use crate::calculus::dual::dual_matrix::DualMatrix; -/// DualScalar scalar. +#[cfg(feature = "simd")] +/// Dual batch matrix. +pub mod dual_batch_matrix; +#[cfg(feature = "simd")] +pub use crate::calculus::dual::dual_batch_matrix::DualBatchMatrix; + +/// Dual scalar. pub mod dual_scalar; -pub use crate::calculus::dual::dual_scalar::DualBatchScalar; pub use crate::calculus::dual::dual_scalar::DualScalar; -/// DualScalar vector. +#[cfg(feature = "simd")] +/// Dual batch scalar. +pub mod dual_batch_scalar; +#[cfg(feature = "simd")] +pub use crate::calculus::dual::dual_batch_scalar::DualBatchScalar; + + +/// Dual vector. pub mod dual_vector; -pub use crate::calculus::dual::dual_vector::DualBatchVector; pub use crate::calculus::dual::dual_vector::DualVector; + +#[cfg(feature = "simd")] +/// Dual batch vector. +pub mod dual_batch_vector; +#[cfg(feature = "simd")] +pub use crate::calculus::dual::dual_batch_vector::DualBatchVector; diff --git a/crates/sophus_core/src/calculus/dual/dual_batch_matrix.rs b/crates/sophus_core/src/calculus/dual/dual_batch_matrix.rs new file mode 100644 index 0000000..c5ba96b --- /dev/null +++ b/crates/sophus_core/src/calculus/dual/dual_batch_matrix.rs @@ -0,0 +1,694 @@ +use crate::calculus::dual::dual_batch_scalar::DualBatchScalar; +use crate::calculus::dual::dual_matrix::DijPairMV; +use crate::calculus::dual::DualBatchVector; +use crate::linalg::BatchMatF64; +use crate::linalg::BatchScalarF64; +use crate::linalg::BatchVecF64; +use crate::prelude::*; +use crate::tensor::mut_tensor::MutTensorDD; +use crate::tensor::mut_tensor::MutTensorDDR; +use crate::tensor::mut_tensor::MutTensorDDRC; +use approx::AbsDiffEq; +use approx::RelativeEq; +use num_traits::Zero; +use std::fmt::Debug; +use std::ops::Add; +use std::ops::Mul; +use std::ops::Neg; +use std::ops::Sub; +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::SupportedLaneCount; + +use crate::calculus::dual::dual_matrix::DijPairM; + +/// DualScalarLike matrix +#[derive(Clone)] +pub struct DualBatchMatrix +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + /// real part + pub real_part: BatchMatF64, + /// infinitesimal part - represents derivative + pub dij_part: Option, ROWS, COLS>>, +} + +impl IsDual + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ +} + +impl + IsDualMatrix, ROWS, COLS, BATCH> for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + /// Create a new dual number + fn new_with_dij(val: BatchMatF64) -> Self { + let mut dij_val = + MutTensorDDRC::, ROWS, COLS>::from_shape([ROWS, COLS]); + for i in 0..ROWS { + for j in 0..COLS { + dij_val.mut_view().get_mut([i, j])[(i, j)] = BatchScalarF64::::from_f64(1.0); + } + } + + Self { + real_part: val, + dij_part: Some(dij_val), + } + } + + /// Get the derivative + fn dij_val(self) -> Option, ROWS, COLS>> { + self.dij_part + } +} + +impl DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + fn binary_mm_dij< + const R0: usize, + const R1: usize, + const C0: usize, + const C1: usize, + F: FnMut(&BatchMatF64) -> BatchMatF64, + G: FnMut(&BatchMatF64) -> BatchMatF64, + >( + lhs_dx: &Option, R0, C0>>, + rhs_dx: &Option, R1, C1>>, + mut left_op: F, + mut right_op: G, + ) -> Option, ROWS, COLS>> { + match (lhs_dx, rhs_dx) { + (None, None) => None, + (None, Some(rhs_dij)) => { + let out_dij = MutTensorDDRC::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); + Some(out_dij) + } + (Some(lhs_dij), None) => { + let out_dij = MutTensorDDRC::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); + Some(out_dij) + } + (Some(lhs_dij), Some(rhs_dij)) => { + let dyn_mat = + MutTensorDDRC::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { + left_op(l_dij) + right_op(r_dij) + }); + Some(dyn_mat) + } + } + } + + fn binary_mv_dij< + const R0: usize, + const R1: usize, + const C0: usize, + F: FnMut(&BatchMatF64) -> BatchVecF64, + G: FnMut(&BatchVecF64) -> BatchVecF64, + >( + lhs_dx: &Option, R0, C0>>, + rhs_dx: &Option, R1>>, + mut left_op: F, + mut right_op: G, + ) -> Option, ROWS>> { + match (lhs_dx, rhs_dx) { + (None, None) => None, + (None, Some(rhs_dij)) => { + let out_dij = MutTensorDDR::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); + Some(out_dij) + } + (Some(lhs_dij), None) => { + let out_dij = MutTensorDDR::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); + Some(out_dij) + } + (Some(lhs_dij), Some(rhs_dij)) => { + let dyn_mat = + MutTensorDDR::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { + left_op(l_dij) + right_op(r_dij) + }); + Some(dyn_mat) + } + } + } + + fn binary_ms_dij< + const R0: usize, + const C0: usize, + F: FnMut(&BatchMatF64) -> BatchMatF64, + G: FnMut(&BatchScalarF64) -> BatchMatF64, + >( + lhs_dx: &Option, R0, C0>>, + rhs_dx: &Option>>, + mut left_op: F, + mut right_op: G, + ) -> Option, ROWS, COLS>> { + match (lhs_dx, rhs_dx) { + (None, None) => None, + (None, Some(rhs_dij)) => { + let out_dij = MutTensorDDRC::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); + Some(out_dij) + } + (Some(lhs_dij), None) => { + let out_dij = MutTensorDDRC::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); + Some(out_dij) + } + (Some(lhs_dij), Some(rhs_dij)) => { + let dyn_mat = + MutTensorDDRC::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { + left_op(l_dij) + right_op(r_dij) + }); + Some(dyn_mat) + } + } + } + + /// derivatives + pub fn two_dx( + mut lhs_dx: Option, R1, C1>>, + mut rhs_dx: Option, R2, C2>>, + ) -> Option, R1, C1, R2, C2>> { + if lhs_dx.is_none() && rhs_dx.is_none() { + return None; + } + + if lhs_dx.is_some() && rhs_dx.is_some() { + assert_eq!( + lhs_dx.clone().unwrap().dims(), + rhs_dx.clone().unwrap().dims() + ); + } + + if lhs_dx.is_none() { + lhs_dx = Some(MutTensorDDRC::, R1, C1>::from_shape( + rhs_dx.clone().unwrap().dims(), + )) + } else if rhs_dx.is_none() { + rhs_dx = Some(MutTensorDDRC::, R2, C2>::from_shape( + lhs_dx.clone().unwrap().dims(), + )) + } + + Some(DijPairM { + lhs: lhs_dx.unwrap(), + rhs: rhs_dx.unwrap(), + }) + } + + /// derivatives + pub fn two_dx_from_vec( + mut lhs_dx: Option, ROWS, COLS>>, + mut rhs_dx: Option, COLS>>, + ) -> Option, ROWS, COLS>> { + if lhs_dx.is_none() && rhs_dx.is_none() { + return None; + } + + if lhs_dx.is_some() && rhs_dx.is_some() { + assert_eq!( + lhs_dx.clone().unwrap().dims(), + rhs_dx.clone().unwrap().dims() + ); + } + + if lhs_dx.is_none() { + lhs_dx = Some( + MutTensorDDRC::, ROWS, COLS>::from_shape( + rhs_dx.clone().unwrap().dims(), + ), + ) + } else if rhs_dx.is_none() { + rhs_dx = Some(MutTensorDDR::, COLS>::from_shape( + lhs_dx.clone().unwrap().dims(), + )) + } + + Some(DijPairMV::, ROWS, COLS> { + lhs: lhs_dx.unwrap(), + rhs: rhs_dx.unwrap(), + }) + } +} + +impl PartialEq + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + fn eq(&self, other: &Self) -> bool { + self.real_part == other.real_part && self.dij_part == other.dij_part + } +} + +impl AbsDiffEq + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + type Epsilon = f64; + + fn default_epsilon() -> Self::Epsilon { + f64::default_epsilon() + } + + fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool { + self.real_part.abs_diff_eq(&other.real_part, epsilon) + } +} + +impl RelativeEq + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + fn default_max_relative() -> Self::Epsilon { + f64::default_max_relative() + } + + fn relative_eq( + &self, + other: &Self, + epsilon: Self::Epsilon, + max_relative: Self::Epsilon, + ) -> bool { + self.real_part + .relative_eq(&other.real_part, epsilon, max_relative) + } +} + +impl + IsMatrix, ROWS, COLS, BATCH> for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + fn from_f64(val: f64) -> Self { + DualBatchMatrix { + real_part: BatchMatF64::::from_f64(val), + dij_part: None, + } + } + + fn set_elem(&mut self, idx: [usize; 2], val: DualBatchScalar) { + self.real_part.set_elem(idx, val.real_part); + if self.dij_part.is_some() { + let dij = &mut self.dij_part.as_mut().unwrap(); + for i in 0..dij.dims()[0] { + for j in 0..dij.dims()[1] { + dij.mut_view().get_mut([i, j])[(idx[0], idx[1])] = + val.dij_part.clone().unwrap().get([i, j]); + } + } + } + } + + fn from_scalar(val: DualBatchScalar) -> Self { + DualBatchMatrix { + real_part: BatchMatF64::::from_scalar(val.real_part), + dij_part: val.dij_part.map(|dij_val| { + MutTensorDDRC::from_map(&dij_val.view(), |v| { + BatchMatF64::::from_scalar(*v) + }) + }), + } + } + + fn mat_mul( + &self, + rhs: DualBatchMatrix, + ) -> DualBatchMatrix { + DualBatchMatrix { + real_part: self.real_part * rhs.real_part, + dij_part: DualBatchMatrix::::binary_mm_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| l_dij * rhs.real_part, + |r_dij| self.real_part * r_dij, + ), + } + } + + fn from_real_matrix(val: BatchMatF64) -> Self { + Self { + real_part: val, + dij_part: None, + } + } + + fn scaled(&self, s: DualBatchScalar) -> Self { + DualBatchMatrix { + real_part: self.real_part * s.real_part, + dij_part: DualBatchMatrix::::binary_ms_dij( + &self.dij_part, + &s.dij_part, + |l_dij| l_dij * s.real_part, + |r_dij| self.real_part * *r_dij, + ), + } + } + + fn identity() -> Self { + DualBatchMatrix::from_real_matrix(BatchMatF64::::identity()) + } + + fn get_elem(&self, idx: [usize; 2]) -> DualBatchScalar { + DualBatchScalar:: { + real_part: self.real_part.get_elem(idx), + dij_part: self + .dij_part + .clone() + .map(|dij_val| MutTensorDD::from_map(&dij_val.view(), |v| v[(idx[0], idx[1])])), + } + } + + fn from_array2(duals: [[DualBatchScalar; COLS]; ROWS]) -> Self { + let mut shape = None; + let mut val_mat = BatchMatF64::::zeros(); + for i in 0..duals.len() { + let d_rows = duals[i].clone(); + for j in 0..d_rows.len() { + let d = d_rows.clone()[j].clone(); + + val_mat[(i, j)] = d.real_part; + if d.dij_part.is_some() { + shape = Some(d.dij_part.clone().unwrap().dims()); + } + } + } + + if shape.is_none() { + return DualBatchMatrix { + real_part: val_mat, + dij_part: None, + }; + } + let shape = shape.unwrap(); + + let mut r = MutTensorDDRC::, ROWS, COLS>::from_shape(shape); + for i in 0..duals.len() { + let d_rows = duals[i].clone(); + for j in 0..d_rows.len() { + let d = d_rows.clone()[j].clone(); + if d.dij_part.is_some() { + for d0 in 0..shape[0] { + for d1 in 0..shape[1] { + r.mut_view().get_mut([d0, d1])[(i, j)] = + d.dij_part.clone().unwrap().get([d0, d1]); + } + } + } + } + } + DualBatchMatrix { + real_part: val_mat, + dij_part: Some(r), + } + } + + fn real_matrix(&self) -> &BatchMatF64 { + &self.real_part + } + + fn block_mat2x2( + top_row: ( + DualBatchMatrix, + DualBatchMatrix, + ), + bot_row: ( + DualBatchMatrix, + DualBatchMatrix, + ), + ) -> Self { + assert_eq!(R0 + R1, ROWS); + assert_eq!(C0 + C1, COLS); + + Self::block_mat2x1( + DualBatchMatrix::::block_mat1x2(top_row.0, top_row.1), + DualBatchMatrix::::block_mat1x2(bot_row.0, bot_row.1), + ) + } + + fn block_mat2x1( + top_row: DualBatchMatrix, + bot_row: DualBatchMatrix, + ) -> Self { + assert_eq!(R0 + R1, ROWS); + let maybe_dij = Self::two_dx(top_row.dij_part, bot_row.dij_part); + + Self { + real_part: BatchMatF64::::block_mat2x1( + top_row.real_part, + bot_row.real_part, + ), + dij_part: match maybe_dij { + Some(dij_val) => { + let mut r = MutTensorDDRC::, ROWS, COLS>::from_shape( + dij_val.shape(), + ); + for d0 in 0..dij_val.shape()[0] { + for d1 in 0..dij_val.shape()[1] { + *r.mut_view().get_mut([d0, d1]) = + BatchMatF64::::block_mat2x1( + dij_val.lhs.get([d0, d1]), + dij_val.rhs.get([d0, d1]), + ); + } + } + Some(r) + } + None => None, + }, + } + } + + fn block_mat1x2( + left_col: DualBatchMatrix, + righ_col: DualBatchMatrix, + ) -> Self { + assert_eq!(C0 + C1, COLS); + let maybe_dij = Self::two_dx(left_col.dij_part, righ_col.dij_part); + + Self { + real_part: BatchMatF64::::block_mat1x2( + left_col.real_part, + righ_col.real_part, + ), + dij_part: match maybe_dij { + Some(dij_val) => { + let mut r = MutTensorDDRC::, ROWS, COLS>::from_shape( + dij_val.shape(), + ); + for d0 in 0..dij_val.shape()[0] { + for d1 in 0..dij_val.shape()[1] { + *r.mut_view().get_mut([d0, d1]) = + BatchMatF64::::block_mat1x2( + dij_val.lhs.get([d0, d1]), + dij_val.rhs.get([d0, d1]), + ); + } + } + Some(r) + } + None => None, + }, + } + } + + fn get_fixed_submat( + &self, + start_r: usize, + start_c: usize, + ) -> DualBatchMatrix { + DualBatchMatrix { + real_part: self.real_part.get_fixed_submat(start_r, start_c), + dij_part: self.dij_part.clone().map(|dij_val| { + MutTensorDDRC::from_map(&dij_val.view(), |v| v.get_fixed_submat(start_r, start_c)) + }), + } + } + + fn get_col_vec(&self, start_r: usize) -> DualBatchVector { + DualBatchVector { + real_part: self.real_part.get_col_vec(start_r), + dij_part: self + .dij_part + .clone() + .map(|dij_val| MutTensorDDR::from_map(&dij_val.view(), |v| v.get_col_vec(start_r))), + } + } + + fn get_row_vec(&self, c: usize) -> DualBatchVector { + DualBatchVector { + real_part: self.real_part.get_row_vec(c), + dij_part: self + .dij_part + .clone() + .map(|dij_val| MutTensorDDR::from_map(&dij_val.view(), |v| v.get_row_vec(c))), + } + } + + fn from_real_scalar_array2(vals: [[BatchScalarF64; COLS]; ROWS]) -> Self { + DualBatchMatrix { + real_part: BatchMatF64::from_real_scalar_array2(vals), + dij_part: None, + } + } + + fn from_f64_array2(vals: [[f64; COLS]; ROWS]) -> Self { + DualBatchMatrix { + real_part: BatchMatF64::from_f64_array2(vals), + dij_part: None, + } + } + + fn set_col_vec( + &mut self, + c: usize, + v: as IsScalar>::Vector, + ) { + self.real_part.set_col_vec(c, v.real_part); + todo!(); + } + + fn to_dual(self) -> as IsScalar>::DualMatrix { + self + } + + fn select(self, mask: &Mask, other: Self) -> Self { + let maybe_dij = Self::two_dx(self.dij_part, other.dij_part); + + DualBatchMatrix { + real_part: self.real_part.select(mask, other.real_part), + dij_part: match maybe_dij { + Some(dij) => { + let mut r = + MutTensorDDRC::, ROWS, COLS>::from_shape(dij.shape()); + for i in 0..dij.shape()[0] { + for j in 0..dij.shape()[1] { + *r.get_mut([i, j]) = + dij.lhs.get([i, j]).select(mask, dij.rhs.get([i, j])); + } + } + Some(r) + } + _ => None, + }, + } + } +} + +impl Add + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchMatrix; + + fn add(self, rhs: Self) -> Self::Output { + DualBatchMatrix { + real_part: self.real_part + rhs.real_part, + dij_part: Self::binary_mm_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij, + |r_dij| *r_dij, + ), + } + } +} + +impl Sub + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchMatrix; + + fn sub(self, rhs: Self) -> Self::Output { + DualBatchMatrix { + real_part: self.real_part - rhs.real_part, + dij_part: Self::binary_mm_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij, + |r_dij| -r_dij, + ), + } + } +} + +impl Neg + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchMatrix; + + fn neg(self) -> Self::Output { + DualBatchMatrix { + real_part: -self.real_part, + dij_part: self + .dij_part + .clone() + .map(|dij_val| MutTensorDDRC::from_map(&dij_val.view(), |v| -v)), + } + } +} + +impl Zero + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + fn zero() -> Self { + Self::from_real_matrix(BatchMatF64::zeros()) + } + + fn is_zero(&self) -> bool { + self.real_part.is_zero() + } +} + +impl Mul> + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchVector; + + fn mul(self, rhs: DualBatchVector) -> Self::Output { + Self::Output { + real_part: self.real_part * rhs.real_part, + dij_part: Self::binary_mv_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| l_dij * rhs.real_part, + |r_dij| self.real_part * r_dij, + ), + } + } +} + +impl Debug + for DualBatchMatrix +where + LaneCount: SupportedLaneCount, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.dij_part.is_some() { + f.debug_struct("DualScalarLike") + .field("val", &self.real_part) + .field("dij_val", &self.dij_part.as_ref().unwrap().elem_view()) + .finish() + } else { + f.debug_struct("DualScalarLike") + .field("val", &self.real_part) + .finish() + } + } +} diff --git a/crates/sophus_core/src/calculus/dual/dual_batch_scalar.rs b/crates/sophus_core/src/calculus/dual/dual_batch_scalar.rs new file mode 100644 index 0000000..1d46b0f --- /dev/null +++ b/crates/sophus_core/src/calculus/dual/dual_batch_scalar.rs @@ -0,0 +1,740 @@ +use crate::linalg::BatchMatF64; +use crate::linalg::BatchScalarF64; +use crate::linalg::BatchVecF64; +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::SupportedLaneCount; +use super::dual_scalar::DualScalar; +use crate::linalg::scalar::NumberCategory; +use crate::prelude::*; +use crate::tensor::mut_tensor::InnerScalarToVec; +use crate::tensor::mut_tensor::MutTensorDD; +use approx::assert_abs_diff_eq; +use approx::AbsDiffEq; +use approx::RelativeEq; +use num_traits::One; +use num_traits::Zero; +use std::fmt::Debug; +use std::ops::Add; +use std::ops::AddAssign; +use std::ops::Div; +use std::ops::Mul; +use std::ops::Neg; +use std::ops::Sub; +use std::ops::SubAssign; +pub use crate::calculus::dual::dual_batch_matrix::DualBatchMatrix; +pub use crate::calculus::dual::dual_batch_vector::DualBatchVector; + + +/// Dual number - a real number and an infinitesimal number (batch version) +#[derive(Clone)] +pub struct DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + /// real part + pub real_part: BatchScalarF64, + + /// infinitesimal part - represents derivative + pub dij_part: Option>>, +} + +impl IsDual for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ +} + +impl IsDualScalar for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn new_with_dij(val: BatchScalarF64) -> Self { + let dij_val = MutTensorDD::from_shape_and_val([1, 1], BatchScalarF64::::ones()); + Self { + real_part: val, + dij_part: Some(dij_val), + } + } + + fn dij_val(self) -> Option>> { + self.dij_part + } + + fn vector_with_dij(val: Self::RealVector) -> Self::Vector { + DualBatchVector::::new_with_dij(val) + } + + fn matrix_with_dij( + val: Self::RealMatrix, + ) -> Self::Matrix { + DualBatchMatrix::::new_with_dij(val) + } +} + +impl IsCoreScalar for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn number_category() -> NumberCategory { + NumberCategory::Real + } +} + +impl AsRef> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn as_ref(&self) -> &DualBatchScalar + where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, + { + self + } +} + +impl One for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn one() -> Self { + >::from_f64(1.0) + } +} + +impl Zero for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn zero() -> Self { + >::from_f64(0.0) + } + + fn is_zero(&self) -> bool { + self.real_part == >::from_f64(0.0).real_part() + } +} + +impl DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn binary_dij< + F: FnMut(&BatchScalarF64) -> BatchScalarF64, + G: FnMut(&BatchScalarF64) -> BatchScalarF64, + >( + lhs_dx: &Option>>, + rhs_dx: &Option>>, + mut left_op: F, + mut right_op: G, + ) -> Option>> { + match (lhs_dx, rhs_dx) { + (None, None) => None, + (None, Some(rhs_dij)) => { + let out_dij = + MutTensorDD::from_map(&rhs_dij.view(), |r_dij: &BatchScalarF64| { + right_op(r_dij) + }); + Some(out_dij) + } + (Some(lhs_dij), None) => { + let out_dij = + MutTensorDD::from_map(&lhs_dij.view(), |l_dij: &BatchScalarF64| { + left_op(l_dij) + }); + Some(out_dij) + } + (Some(lhs_dij), Some(rhs_dij)) => { + let dyn_mat = MutTensorDD::from_map2( + &lhs_dij.view(), + &rhs_dij.view(), + |l_dij: &BatchScalarF64, r_dij: &BatchScalarF64| { + left_op(l_dij) + right_op(r_dij) + }, + ); + Some(dyn_mat) + } + } + } +} + +impl Neg for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + + fn neg(self) -> Self { + Self { + real_part: -self.real_part, + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |v: &BatchScalarF64| -*v); + + Some(dyn_mat) + } + None => None, + }, + } + } +} + +impl PartialEq for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn eq(&self, other: &Self) -> bool { + self.real_part == other.real_part + } +} + +impl AbsDiffEq for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Epsilon = f64; + + fn default_epsilon() -> Self::Epsilon { + f64::default_epsilon() + } + + fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool { + for i in 0..BATCH { + if !self.real_part.extract_single(i).abs_diff_eq( + &other.real_part.extract_single(i), + epsilon.extract_single(i), + ) { + return false; + } + } + true + } +} + +impl RelativeEq for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn default_max_relative() -> Self::Epsilon { + f64::default_max_relative() + } + + fn relative_eq( + &self, + other: &Self, + epsilon: Self::Epsilon, + max_relative: Self::Epsilon, + ) -> bool { + for i in 0..BATCH { + if !self.real_part.extract_single(i).relative_eq( + &other.real_part.extract_single(i), + epsilon.extract_single(i), + max_relative.extract_single(i), + ) { + return false; + } + } + true + } +} + +impl From> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn from(value: BatchScalarF64) -> Self { + Self::from_real_scalar(value) + } +} + +impl Debug for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.dij_part.is_some() { + f.debug_struct("DualScalar") + .field("val", &self.real_part) + .field("dij_val", &self.dij_part.as_ref().unwrap().elem_view()) + .finish() + } else { + f.debug_struct("DualScalar") + .field("val", &self.real_part) + .finish() + } + } +} + +impl IsScalar for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Scalar = DualBatchScalar; + type RealScalar = BatchScalarF64; + type SingleScalar = DualScalar; + type DualVector = DualBatchVector; + type DualMatrix = DualBatchMatrix; + type RealVector = BatchVecF64; + type RealMatrix = BatchMatF64; + type Vector = DualBatchVector; + type Matrix = DualBatchMatrix; + + type Mask = Mask; + + fn from_real_scalar(val: BatchScalarF64) -> Self { + Self { + real_part: val, + dij_part: None, + } + } + + fn scalar_examples() -> Vec { + [1.0, 2.0, 3.0].iter().map(|&v| Self::from_f64(v)).collect() + } + + fn from_real_array(arr: [f64; BATCH]) -> Self { + Self::from_real_scalar(BatchScalarF64::::from_real_array(arr)) + } + + fn to_real_array(&self) -> [f64; BATCH] { + self.real_part.to_real_array() + } + + fn extract_single(&self, i: usize) -> Self::SingleScalar { + Self::SingleScalar { + real_part: self.real_part.extract_single(i), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + dij.extract_single(i) + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn cos(self) -> DualBatchScalar + where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, + { + Self { + real_part: self.real_part.cos(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + -*dij * self.real_part.sin() + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn signum(&self) -> Self { + Self { + real_part: self.real_part.signum(), + dij_part: None, + } + } + + fn sin(self) -> DualBatchScalar + where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, + { + Self { + real_part: self.real_part.sin(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + *dij * self.real_part.cos() + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn abs(self) -> Self { + Self { + real_part: self.real_part.abs(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + *dij * self.real_part.signum() + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn atan2(self, rhs: Self) -> Self { + let inv_sq_nrm: BatchScalarF64 = BatchScalarF64::ones() + / (self.real_part * self.real_part + rhs.real_part * rhs.real_part); + Self { + real_part: self.real_part.atan2(rhs.real_part), + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| inv_sq_nrm * ((*l_dij) * rhs.real_part), + |r_dij| -inv_sq_nrm * (self.real_part * (*r_dij)), + ), + } + } + + fn real_part(&self) -> BatchScalarF64 { + self.real_part + } + + fn sqrt(self) -> Self { + let sqrt = self.real_part.sqrt(); + Self { + real_part: sqrt, + dij_part: match self.dij_part { + Some(dij) => { + let out_dij = + MutTensorDD::from_map(&dij.view(), |dij: &BatchScalarF64| { + *dij * BatchScalarF64::::from_f64(1.0) + / (BatchScalarF64::::from_f64(2.0) * sqrt) + }); + Some(out_dij) + } + None => None, + }, + } + } + + fn to_vec(self) -> DualBatchVector<1, BATCH> { + DualBatchVector::<1, BATCH> { + real_part: self.real_part.real_part().to_vec(), + dij_part: match self.dij_part { + Some(dij) => { + let tmp = dij.inner_scalar_to_vec(); + Some(tmp) + } + None => None, + }, + } + } + + fn tan(self) -> Self { + Self { + real_part: self.real_part.tan(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let c = self.real_part.cos(); + let sec_squared = BatchScalarF64::::ones() / (c * c); + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + *dij * sec_squared + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn acos(self) -> Self { + Self { + real_part: self.real_part.acos(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dval = -BatchScalarF64::::ones() + / (BatchScalarF64::::ones() - self.real_part * self.real_part) + .sqrt(); + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + *dij * dval + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn asin(self) -> Self { + Self { + real_part: self.real_part.asin(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dval = BatchScalarF64::::ones() + / (BatchScalarF64::::ones() - self.real_part * self.real_part) + .sqrt(); + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + *dij * dval + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn atan(self) -> Self { + Self { + real_part: self.real_part.atan(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dval = BatchScalarF64::::ones() + / (BatchScalarF64::::ones() + self.real_part * self.real_part); + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { + *dij * dval + }); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn fract(self) -> Self { + Self { + real_part: self.real_part.fract(), + dij_part: match self.dij_part.clone() { + Some(dij_val) => { + let dyn_mat = + MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| *dij); + Some(dyn_mat) + } + None => None, + }, + } + } + + fn floor(&self) -> BatchScalarF64 { + self.real_part.floor() + } + + fn from_f64(val: f64) -> Self { + Self::from_real_scalar(BatchScalarF64::::from_f64(val)) + } + + type DualScalar = Self; + + fn ones() -> Self { + Self::from_f64(1.0) + } + + fn zeros() -> Self { + Self::from_f64(0.0) + } + + fn test_suite() { + let examples = Self::scalar_examples(); + for a in &examples { + let sin_a = a.clone().sin(); + let cos_a = a.clone().cos(); + let val = sin_a.clone() * sin_a + cos_a.clone() * cos_a; + let one = Self::ones(); + + for i in 0..BATCH { + assert_abs_diff_eq!(val.extract_single(i), one.extract_single(i)); + } + } + } + + fn less_equal(&self, rhs: &Self) -> Self::Mask { + self.real_part.less_equal(&rhs.real_part) + } + + fn to_dual(self) -> Self::DualScalar { + self + } + + fn select(self, mask: &Self::Mask, other: Self) -> Self { + Self { + real_part: self.real_part.select(mask, other.real_part), + dij_part: match (self.dij_part, other.dij_part) { + (Some(lhs), Some(rhs)) => { + let dyn_mat = MutTensorDD::from_map2( + &lhs.view(), + &rhs.view(), + |l: &BatchScalarF64, r: &BatchScalarF64| l.select(mask, *r), + ); + Some(dyn_mat) + } + _ => None, + }, + } + } + + fn greater_equal(&self, rhs: &Self) -> Self::Mask { + self.real_part.greater_equal(&rhs.real_part) + } +} + +impl AddAssign> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn add_assign(&mut self, rhs: Self) { + *self = self.clone().add(&rhs); + } +} +impl SubAssign> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn sub_assign(&mut self, rhs: Self) { + *self = self.clone().sub(&rhs); + } +} + +impl Add> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn add(self, rhs: Self) -> Self::Output { + self.add(&rhs) + } +} + +impl Add<&DualBatchScalar> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn add(self, rhs: &Self) -> Self::Output { + let r = self.real_part + rhs.real_part; + + Self { + real_part: r, + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij, + |r_dij| *r_dij, + ), + } + } +} + +impl Mul> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn mul(self, rhs: Self) -> Self::Output { + self.mul(&rhs) + } +} + +impl Mul<&DualBatchScalar> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn mul(self, rhs: &Self) -> Self::Output { + let r = self.real_part * rhs.real_part; + + Self { + real_part: r, + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| (*l_dij) * rhs.real_part, + |r_dij| (*r_dij) * self.real_part, + ), + } + } +} + +impl Div> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn div(self, rhs: Self) -> Self::Output { + self.div(&rhs) + } +} + +impl Div<&DualBatchScalar> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn div(self, rhs: &Self) -> Self::Output { + let rhs_inv = BatchScalarF64::::ones() / rhs.real_part; + Self { + real_part: self.real_part * rhs_inv, + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij * rhs_inv, + |r_dij| -self.real_part * (*r_dij) * rhs_inv * rhs_inv, + ), + } + } +} + +impl Sub> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn sub(self, rhs: Self) -> Self::Output { + self.sub(&rhs) + } +} + +impl Sub<&DualBatchScalar> for DualBatchScalar +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchScalar; + fn sub(self, rhs: &Self) -> Self::Output { + Self { + real_part: self.real_part - rhs.real_part, + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij, + |r_dij| -(*r_dij), + ), + } + } +} diff --git a/crates/sophus_core/src/calculus/dual/dual_batch_vector.rs b/crates/sophus_core/src/calculus/dual/dual_batch_vector.rs new file mode 100644 index 0000000..6128733 --- /dev/null +++ b/crates/sophus_core/src/calculus/dual/dual_batch_vector.rs @@ -0,0 +1,528 @@ + + +pub use crate::calculus::dual::dual_batch_scalar::DualBatchScalar; +use crate::calculus::dual::dual_vector::DijPair; +use crate::calculus::dual::DualBatchMatrix; +use crate::linalg::BatchScalarF64; +use crate::linalg::BatchVecF64; +use crate::prelude::*; +use crate::tensor::mut_tensor::InnerVecToMat; +use crate::tensor::mut_tensor::MutTensorDD; +use crate::tensor::mut_tensor::MutTensorDDR; +use approx::AbsDiffEq; +use approx::RelativeEq; +use std::fmt::Debug; +use std::ops::Add; +use std::ops::Neg; +use std::ops::Sub; +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::SupportedLaneCount; + +/// Dual vector (batch version) +#[derive(Clone, Debug)] +pub struct DualBatchVector +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + /// real part + pub real_part: BatchVecF64, + /// infinitesimal part - represents derivative + pub dij_part: Option, ROWS>>, +} + +impl num_traits::Zero for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + fn zero() -> Self { + DualBatchVector { + real_part: BatchVecF64::::zeros(), + dij_part: None, + } + } + + fn is_zero(&self) -> bool { + self.real_part == BatchVecF64::::zeros() + } +} + +impl IsDual for DualBatchVector where + LaneCount: SupportedLaneCount +{ +} + +impl PartialEq for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + fn eq(&self, other: &Self) -> bool { + self.real_part == other.real_part && self.dij_part == other.dij_part + } +} + +impl AbsDiffEq for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + type Epsilon = f64; + + fn default_epsilon() -> Self::Epsilon { + f64::default_epsilon() + } + + fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool { + self.real_part.abs_diff_eq(&other.real_part, epsilon) + } +} + +impl RelativeEq for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + fn default_max_relative() -> Self::Epsilon { + f64::default_max_relative() + } + + fn relative_eq( + &self, + other: &Self, + epsilon: Self::Epsilon, + max_relative: Self::Epsilon, + ) -> bool { + self.real_part + .relative_eq(&other.real_part, epsilon, max_relative) + } +} + +impl IsDualVector, ROWS, BATCH> + for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + fn new_with_dij(val: BatchVecF64) -> Self { + let mut dij_val = MutTensorDDR::, ROWS>::from_shape([ROWS, 1]); + for i in 0..ROWS { + dij_val.mut_view().get_mut([i, 0])[(i, 0)] = BatchScalarF64::::ones(); + } + + Self { + real_part: val, + dij_part: Some(dij_val), + } + } + + fn dij_val(self) -> Option, ROWS>> { + self.dij_part + } +} + +impl DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + fn binary_dij< + const R0: usize, + const R1: usize, + F: FnMut(&BatchVecF64) -> BatchVecF64, + G: FnMut(&BatchVecF64) -> BatchVecF64, + >( + lhs_dx: &Option, R0>>, + rhs_dx: &Option, R1>>, + mut left_op: F, + mut right_op: G, + ) -> Option, ROWS>> { + match (lhs_dx, rhs_dx) { + (None, None) => None, + (None, Some(rhs_dij)) => { + let out_dij = MutTensorDDR::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); + Some(out_dij) + } + (Some(lhs_dij), None) => { + let out_dij = MutTensorDDR::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); + Some(out_dij) + } + (Some(lhs_dij), Some(rhs_dij)) => { + let dyn_mat = + MutTensorDDR::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { + left_op(l_dij) + right_op(r_dij) + }); + Some(dyn_mat) + } + } + } + + fn binary_vs_dij< + const R0: usize, + F: FnMut(&BatchVecF64) -> BatchVecF64, + G: FnMut(&BatchScalarF64) -> BatchVecF64, + >( + lhs_dx: &Option, R0>>, + rhs_dx: &Option>>, + mut left_op: F, + mut right_op: G, + ) -> Option, ROWS>> { + match (lhs_dx, rhs_dx) { + (None, None) => None, + (None, Some(rhs_dij)) => { + let out_dij = MutTensorDDR::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); + Some(out_dij) + } + (Some(lhs_dij), None) => { + let out_dij = MutTensorDDR::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); + Some(out_dij) + } + (Some(lhs_dij), Some(rhs_dij)) => { + let dyn_mat = + MutTensorDDR::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { + left_op(l_dij) + right_op(r_dij) + }); + Some(dyn_mat) + } + } + } + + fn two_dx( + mut lhs_dx: Option, R0>>, + mut rhs_dx: Option, R1>>, + ) -> Option, R0, R1>> { + if lhs_dx.is_none() && rhs_dx.is_none() { + return None; + } + + if lhs_dx.is_some() && rhs_dx.is_some() { + assert_eq!( + lhs_dx.clone().unwrap().dims(), + rhs_dx.clone().unwrap().dims() + ); + } + + if lhs_dx.is_none() { + lhs_dx = Some(MutTensorDDR::from_shape(rhs_dx.clone().unwrap().dims())) + } else if rhs_dx.is_none() { + rhs_dx = Some(MutTensorDDR::from_shape(lhs_dx.clone().unwrap().dims())) + } + + Some(DijPair { + lhs: lhs_dx.unwrap(), + rhs: rhs_dx.unwrap(), + }) + } +} + +impl Neg for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchVector; + + fn neg(self) -> Self::Output { + DualBatchVector { + real_part: -self.real_part, + dij_part: self + .dij_part + .clone() + .map(|dij_val| MutTensorDDR::from_map(&dij_val.view(), |v| -v)), + } + } +} + +impl Sub for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchVector; + + fn sub(self, rhs: Self) -> Self::Output { + DualBatchVector { + real_part: self.real_part - rhs.real_part, + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij, + |r_dij| -r_dij, + ), + } + } +} + +impl Add for DualBatchVector +where + LaneCount: SupportedLaneCount, +{ + type Output = DualBatchVector; + + fn add(self, rhs: Self) -> Self::Output { + DualBatchVector { + real_part: self.real_part + rhs.real_part, + dij_part: Self::binary_dij( + &self.dij_part, + &rhs.dij_part, + |l_dij| *l_dij, + |r_dij| *r_dij, + ), + } + } +} + +impl IsVector, ROWS, BATCH> + for DualBatchVector +where + BatchScalarF64: IsCoreScalar, + LaneCount: SupportedLaneCount, +{ + fn from_f64(val: f64) -> Self { + DualBatchVector { + real_part: + as IsVector, ROWS, BATCH>>::from_f64( + val, + ), + dij_part: None, + } + } + + fn outer( + self, + rhs: DualBatchVector, + ) -> DualBatchMatrix { + let mut result = DualBatchMatrix::zeros(); + for i in 0..ROWS { + for j in 0..R2 { + result.set_elem([i, j], self.get_elem(i) * rhs.get_elem(j)); + } + } + result + } + + fn norm(&self) -> DualBatchScalar { + self.clone().dot(self.clone()).sqrt() + } + + fn squared_norm(&self) -> DualBatchScalar { + self.clone().dot(self.clone()) + } + + fn get_elem(&self, idx: usize) -> DualBatchScalar { + DualBatchScalar { + real_part: self.real_part[idx], + dij_part: self + .dij_part + .clone() + .map(|dij_val| MutTensorDD::from_map(&dij_val.view(), |v| v[idx])), + } + } + + fn from_array(duals: [DualBatchScalar; ROWS]) -> Self { + let mut shape = None; + let mut val_v = BatchVecF64::::zeros(); + for i in 0..duals.len() { + let d = duals.clone()[i].clone(); + + val_v[i] = d.real_part; + if d.dij_part.is_some() { + shape = Some(d.dij_part.clone().unwrap().dims()); + } + } + + if shape.is_none() { + return DualBatchVector { + real_part: val_v, + dij_part: None, + }; + } + let shape = shape.unwrap(); + + let mut r = MutTensorDDR::, ROWS>::from_shape(shape); + + for i in 0..duals.len() { + let d = duals.clone()[i].clone(); + if d.dij_part.is_some() { + for d0 in 0..shape[0] { + for d1 in 0..shape[1] { + r.mut_view().get_mut([d0, d1])[(i, 0)] = + d.dij_part.clone().unwrap().get([d0, d1]); + } + } + } + } + DualBatchVector { + real_part: val_v, + dij_part: Some(r), + } + } + + fn from_real_array(vals: [BatchScalarF64; ROWS]) -> Self { + DualBatchVector { + real_part: BatchVecF64::from_real_array(vals), + dij_part: None, + } + } + + fn from_real_vector(val: BatchVecF64) -> Self { + Self { + real_part: val, + dij_part: None, + } + } + + fn real_vector(&self) -> &BatchVecF64 { + &self.real_part + } + + fn to_mat(self) -> DualBatchMatrix { + DualBatchMatrix { + real_part: self.real_part, + dij_part: self.dij_part.map(|dij| dij.inner_vec_to_mat()), + } + } + + fn block_vec2( + top_row: DualBatchVector, + bot_row: DualBatchVector, + ) -> Self { + assert_eq!(R0 + R1, ROWS); + + let maybe_dij = Self::two_dx(top_row.dij_part, bot_row.dij_part); + Self { + real_part: BatchVecF64::::block_vec2(top_row.real_part, bot_row.real_part), + dij_part: match maybe_dij { + Some(dij_val) => { + let mut r = + MutTensorDDR::, ROWS>::from_shape(dij_val.shape()); + for d0 in 0..dij_val.shape()[0] { + for d1 in 0..dij_val.shape()[1] { + *r.mut_view().get_mut([d0, d1]) = + BatchVecF64::::block_vec2( + dij_val.lhs.get([d0, d1]), + dij_val.rhs.get([d0, d1]), + ); + } + } + Some(r) + } + None => None, + }, + } + } + + fn scaled(&self, s: DualBatchScalar) -> Self { + DualBatchVector { + real_part: self.real_part * s.real_part, + dij_part: Self::binary_vs_dij( + &self.dij_part, + &s.dij_part, + |l_dij| l_dij * s.real_part, + |r_dij| self.real_part * *r_dij, + ), + } + } + + fn dot(self, rhs: Self) -> DualBatchScalar { + let mut sum = DualBatchScalar::from_f64(0.0); + + for i in 0..ROWS { + sum += self.get_elem(i) * rhs.get_elem(i); + } + + sum + } + + fn normalized(&self) -> Self { + self.clone() + .scaled(DualBatchScalar::::from_f64(1.0) / self.norm()) + } + + fn from_f64_array(vals: [f64; ROWS]) -> Self { + DualBatchVector { + real_part: BatchVecF64::from_f64_array(vals), + dij_part: None, + } + } + + fn from_scalar_array(vals: [DualBatchScalar; ROWS]) -> Self { + let mut shape = None; + let mut val_v = BatchVecF64::::zeros(); + for i in 0..vals.len() { + let d = vals.clone()[i].clone(); + + val_v[i] = d.real_part; + if d.dij_part.is_some() { + shape = Some(d.dij_part.clone().unwrap().dims()); + } + } + + if shape.is_none() { + return DualBatchVector { + real_part: val_v, + dij_part: None, + }; + } + let shape = shape.unwrap(); + + let mut r = MutTensorDDR::, ROWS>::from_shape(shape); + + for i in 0..vals.len() { + let d = vals.clone()[i].clone(); + if d.dij_part.is_some() { + for d0 in 0..shape[0] { + for d1 in 0..shape[1] { + r.mut_view().get_mut([d0, d1])[(i, 0)] = + d.dij_part.clone().unwrap().get([d0, d1]); + } + } + } + } + DualBatchVector { + real_part: val_v, + dij_part: Some(r), + } + } + + fn set_elem(&mut self, idx: usize, v: DualBatchScalar) { + self.real_part[idx] = v.real_part; + if self.dij_part.is_some() { + let dij = &mut self.dij_part.as_mut().unwrap(); + for i in 0..dij.dims()[0] { + for j in 0..dij.dims()[1] { + dij.mut_view().get_mut([i, j])[idx] = v.dij_part.clone().unwrap().get([i, j]); + } + } + } + } + + fn to_dual(self) -> as IsScalar>::DualVector { + self + } + + fn select(self, mask: &Mask, other: Self) -> Self { + let maybe_dij = Self::two_dx(self.dij_part, other.dij_part); + + Self { + real_part: IsVector::select(self.real_part, mask, other.real_part), + dij_part: match maybe_dij { + Some(dij) => { + let mut r = + MutTensorDDR::, ROWS>::from_shape(dij.shape()); + for i in 0..dij.shape()[0] { + for j in 0..dij.shape()[1] { + *r.get_mut([i, j]) = + IsVector::select(dij.lhs.get([i, j]), mask, dij.rhs.get([i, j])); + } + } + Some(r) + } + _ => None, + }, + } + } + + fn get_fixed_subvec(&self, start_r: usize) -> DualBatchVector { + DualBatchVector { + real_part: self.real_part.fixed_rows::(start_r).into(), + dij_part: self.dij_part.clone().map(|dij_val| { + MutTensorDDR::from_map(&dij_val.view(), |v| v.fixed_rows::(start_r).into()) + }), + } + } +} diff --git a/crates/sophus_core/src/calculus/dual/dual_matrix.rs b/crates/sophus_core/src/calculus/dual/dual_matrix.rs index a4dbcbb..b886cd4 100644 --- a/crates/sophus_core/src/calculus/dual/dual_matrix.rs +++ b/crates/sophus_core/src/calculus/dual/dual_matrix.rs @@ -1,10 +1,5 @@ -use crate::calculus::dual::DualBatchScalar; -use crate::calculus::dual::DualBatchVector; use crate::calculus::dual::DualScalar; use crate::calculus::dual::DualVector; -use crate::linalg::BatchMatF64; -use crate::linalg::BatchScalarF64; -use crate::linalg::BatchVecF64; use crate::linalg::MatF64; use crate::linalg::VecF64; use crate::prelude::*; @@ -19,9 +14,7 @@ use std::ops::Add; use std::ops::Mul; use std::ops::Neg; use std::ops::Sub; -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::SupportedLaneCount; + /// DualScalarLike matrix #[derive(Clone)] @@ -32,17 +25,19 @@ pub struct DualMatrix { pub dij_part: Option>, } -/// DualScalarLike matrix -#[derive(Clone)] -pub struct DualBatchMatrix -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - /// real part - pub real_part: BatchMatF64, - /// infinitesimal part - represents derivative - pub dij_part: Option, ROWS, COLS>>, +impl Debug for DualMatrix { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.dij_part.is_some() { + f.debug_struct("DualScalarLike") + .field("val", &self.real_part) + .field("dij_val", &self.dij_part.as_ref().unwrap().elem_view()) + .finish() + } else { + f.debug_struct("DualScalarLike") + .field("val", &self.real_part) + .finish() + } + } } impl IsSingleMatrix @@ -105,8 +100,8 @@ pub struct DijPairM< const ROWS2: usize, const COLS2: usize, > { - lhs: MutTensorDDRC, - rhs: MutTensorDDRC, + pub(crate) lhs: MutTensorDDRC, + pub(crate) rhs: MutTensorDDRC, } /// Pair of dual matrices @@ -125,7 +120,7 @@ impl< const COLS2: usize, > DijPairM { - fn shape(&self) -> [usize; 2] { + pub(crate) fn shape(&self) -> [usize; 2] { self.lhs.dims() } } @@ -656,684 +651,14 @@ impl Mul> for DualMatrix< } } -impl Debug for DualMatrix { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.dij_part.is_some() { - f.debug_struct("DualScalarLike") - .field("val", &self.real_part) - .field("dij_val", &self.dij_part.as_ref().unwrap().elem_view()) - .finish() - } else { - f.debug_struct("DualScalarLike") - .field("val", &self.real_part) - .finish() - } - } -} - -impl IsDual - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ -} - -impl - IsDualMatrix, ROWS, COLS, BATCH> for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - /// Create a new dual number - fn new_with_dij(val: BatchMatF64) -> Self { - let mut dij_val = - MutTensorDDRC::, ROWS, COLS>::from_shape([ROWS, COLS]); - for i in 0..ROWS { - for j in 0..COLS { - dij_val.mut_view().get_mut([i, j])[(i, j)] = BatchScalarF64::::from_f64(1.0); - } - } - - Self { - real_part: val, - dij_part: Some(dij_val), - } - } - - /// Get the derivative - fn dij_val(self) -> Option, ROWS, COLS>> { - self.dij_part - } -} - -impl DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - fn binary_mm_dij< - const R0: usize, - const R1: usize, - const C0: usize, - const C1: usize, - F: FnMut(&BatchMatF64) -> BatchMatF64, - G: FnMut(&BatchMatF64) -> BatchMatF64, - >( - lhs_dx: &Option, R0, C0>>, - rhs_dx: &Option, R1, C1>>, - mut left_op: F, - mut right_op: G, - ) -> Option, ROWS, COLS>> { - match (lhs_dx, rhs_dx) { - (None, None) => None, - (None, Some(rhs_dij)) => { - let out_dij = MutTensorDDRC::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); - Some(out_dij) - } - (Some(lhs_dij), None) => { - let out_dij = MutTensorDDRC::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); - Some(out_dij) - } - (Some(lhs_dij), Some(rhs_dij)) => { - let dyn_mat = - MutTensorDDRC::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { - left_op(l_dij) + right_op(r_dij) - }); - Some(dyn_mat) - } - } - } - - fn binary_mv_dij< - const R0: usize, - const R1: usize, - const C0: usize, - F: FnMut(&BatchMatF64) -> BatchVecF64, - G: FnMut(&BatchVecF64) -> BatchVecF64, - >( - lhs_dx: &Option, R0, C0>>, - rhs_dx: &Option, R1>>, - mut left_op: F, - mut right_op: G, - ) -> Option, ROWS>> { - match (lhs_dx, rhs_dx) { - (None, None) => None, - (None, Some(rhs_dij)) => { - let out_dij = MutTensorDDR::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); - Some(out_dij) - } - (Some(lhs_dij), None) => { - let out_dij = MutTensorDDR::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); - Some(out_dij) - } - (Some(lhs_dij), Some(rhs_dij)) => { - let dyn_mat = - MutTensorDDR::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { - left_op(l_dij) + right_op(r_dij) - }); - Some(dyn_mat) - } - } - } - - fn binary_ms_dij< - const R0: usize, - const C0: usize, - F: FnMut(&BatchMatF64) -> BatchMatF64, - G: FnMut(&BatchScalarF64) -> BatchMatF64, - >( - lhs_dx: &Option, R0, C0>>, - rhs_dx: &Option>>, - mut left_op: F, - mut right_op: G, - ) -> Option, ROWS, COLS>> { - match (lhs_dx, rhs_dx) { - (None, None) => None, - (None, Some(rhs_dij)) => { - let out_dij = MutTensorDDRC::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); - Some(out_dij) - } - (Some(lhs_dij), None) => { - let out_dij = MutTensorDDRC::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); - Some(out_dij) - } - (Some(lhs_dij), Some(rhs_dij)) => { - let dyn_mat = - MutTensorDDRC::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { - left_op(l_dij) + right_op(r_dij) - }); - Some(dyn_mat) - } - } - } - - /// derivatives - pub fn two_dx( - mut lhs_dx: Option, R1, C1>>, - mut rhs_dx: Option, R2, C2>>, - ) -> Option, R1, C1, R2, C2>> { - if lhs_dx.is_none() && rhs_dx.is_none() { - return None; - } - - if lhs_dx.is_some() && rhs_dx.is_some() { - assert_eq!( - lhs_dx.clone().unwrap().dims(), - rhs_dx.clone().unwrap().dims() - ); - } - - if lhs_dx.is_none() { - lhs_dx = Some(MutTensorDDRC::, R1, C1>::from_shape( - rhs_dx.clone().unwrap().dims(), - )) - } else if rhs_dx.is_none() { - rhs_dx = Some(MutTensorDDRC::, R2, C2>::from_shape( - lhs_dx.clone().unwrap().dims(), - )) - } - - Some(DijPairM { - lhs: lhs_dx.unwrap(), - rhs: rhs_dx.unwrap(), - }) - } - - /// derivatives - pub fn two_dx_from_vec( - mut lhs_dx: Option, ROWS, COLS>>, - mut rhs_dx: Option, COLS>>, - ) -> Option, ROWS, COLS>> { - if lhs_dx.is_none() && rhs_dx.is_none() { - return None; - } - - if lhs_dx.is_some() && rhs_dx.is_some() { - assert_eq!( - lhs_dx.clone().unwrap().dims(), - rhs_dx.clone().unwrap().dims() - ); - } - - if lhs_dx.is_none() { - lhs_dx = Some( - MutTensorDDRC::, ROWS, COLS>::from_shape( - rhs_dx.clone().unwrap().dims(), - ), - ) - } else if rhs_dx.is_none() { - rhs_dx = Some(MutTensorDDR::, COLS>::from_shape( - lhs_dx.clone().unwrap().dims(), - )) - } - - Some(DijPairMV::, ROWS, COLS> { - lhs: lhs_dx.unwrap(), - rhs: rhs_dx.unwrap(), - }) - } -} - -impl PartialEq - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - fn eq(&self, other: &Self) -> bool { - self.real_part == other.real_part && self.dij_part == other.dij_part - } -} - -impl AbsDiffEq - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - type Epsilon = f64; - - fn default_epsilon() -> Self::Epsilon { - f64::default_epsilon() - } - - fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool { - self.real_part.abs_diff_eq(&other.real_part, epsilon) - } -} - -impl RelativeEq - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - fn default_max_relative() -> Self::Epsilon { - f64::default_max_relative() - } - - fn relative_eq( - &self, - other: &Self, - epsilon: Self::Epsilon, - max_relative: Self::Epsilon, - ) -> bool { - self.real_part - .relative_eq(&other.real_part, epsilon, max_relative) - } -} - -impl - IsMatrix, ROWS, COLS, BATCH> for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - fn from_f64(val: f64) -> Self { - DualBatchMatrix { - real_part: BatchMatF64::::from_f64(val), - dij_part: None, - } - } - - fn set_elem(&mut self, idx: [usize; 2], val: DualBatchScalar) { - self.real_part.set_elem(idx, val.real_part); - if self.dij_part.is_some() { - let dij = &mut self.dij_part.as_mut().unwrap(); - for i in 0..dij.dims()[0] { - for j in 0..dij.dims()[1] { - dij.mut_view().get_mut([i, j])[(idx[0], idx[1])] = - val.dij_part.clone().unwrap().get([i, j]); - } - } - } - } - - fn from_scalar(val: DualBatchScalar) -> Self { - DualBatchMatrix { - real_part: BatchMatF64::::from_scalar(val.real_part), - dij_part: val.dij_part.map(|dij_val| { - MutTensorDDRC::from_map(&dij_val.view(), |v| { - BatchMatF64::::from_scalar(*v) - }) - }), - } - } - - fn mat_mul( - &self, - rhs: DualBatchMatrix, - ) -> DualBatchMatrix { - DualBatchMatrix { - real_part: self.real_part * rhs.real_part, - dij_part: DualBatchMatrix::::binary_mm_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| l_dij * rhs.real_part, - |r_dij| self.real_part * r_dij, - ), - } - } - - fn from_real_matrix(val: BatchMatF64) -> Self { - Self { - real_part: val, - dij_part: None, - } - } - - fn scaled(&self, s: DualBatchScalar) -> Self { - DualBatchMatrix { - real_part: self.real_part * s.real_part, - dij_part: DualBatchMatrix::::binary_ms_dij( - &self.dij_part, - &s.dij_part, - |l_dij| l_dij * s.real_part, - |r_dij| self.real_part * *r_dij, - ), - } - } - - fn identity() -> Self { - DualBatchMatrix::from_real_matrix(BatchMatF64::::identity()) - } - - fn get_elem(&self, idx: [usize; 2]) -> DualBatchScalar { - DualBatchScalar:: { - real_part: self.real_part.get_elem(idx), - dij_part: self - .dij_part - .clone() - .map(|dij_val| MutTensorDD::from_map(&dij_val.view(), |v| v[(idx[0], idx[1])])), - } - } - - fn from_array2(duals: [[DualBatchScalar; COLS]; ROWS]) -> Self { - let mut shape = None; - let mut val_mat = BatchMatF64::::zeros(); - for i in 0..duals.len() { - let d_rows = duals[i].clone(); - for j in 0..d_rows.len() { - let d = d_rows.clone()[j].clone(); - - val_mat[(i, j)] = d.real_part; - if d.dij_part.is_some() { - shape = Some(d.dij_part.clone().unwrap().dims()); - } - } - } - - if shape.is_none() { - return DualBatchMatrix { - real_part: val_mat, - dij_part: None, - }; - } - let shape = shape.unwrap(); - - let mut r = MutTensorDDRC::, ROWS, COLS>::from_shape(shape); - for i in 0..duals.len() { - let d_rows = duals[i].clone(); - for j in 0..d_rows.len() { - let d = d_rows.clone()[j].clone(); - if d.dij_part.is_some() { - for d0 in 0..shape[0] { - for d1 in 0..shape[1] { - r.mut_view().get_mut([d0, d1])[(i, j)] = - d.dij_part.clone().unwrap().get([d0, d1]); - } - } - } - } - } - DualBatchMatrix { - real_part: val_mat, - dij_part: Some(r), - } - } - - fn real_matrix(&self) -> &BatchMatF64 { - &self.real_part - } - - fn block_mat2x2( - top_row: ( - DualBatchMatrix, - DualBatchMatrix, - ), - bot_row: ( - DualBatchMatrix, - DualBatchMatrix, - ), - ) -> Self { - assert_eq!(R0 + R1, ROWS); - assert_eq!(C0 + C1, COLS); - - Self::block_mat2x1( - DualBatchMatrix::::block_mat1x2(top_row.0, top_row.1), - DualBatchMatrix::::block_mat1x2(bot_row.0, bot_row.1), - ) - } - - fn block_mat2x1( - top_row: DualBatchMatrix, - bot_row: DualBatchMatrix, - ) -> Self { - assert_eq!(R0 + R1, ROWS); - let maybe_dij = Self::two_dx(top_row.dij_part, bot_row.dij_part); - - Self { - real_part: BatchMatF64::::block_mat2x1( - top_row.real_part, - bot_row.real_part, - ), - dij_part: match maybe_dij { - Some(dij_val) => { - let mut r = MutTensorDDRC::, ROWS, COLS>::from_shape( - dij_val.shape(), - ); - for d0 in 0..dij_val.shape()[0] { - for d1 in 0..dij_val.shape()[1] { - *r.mut_view().get_mut([d0, d1]) = - BatchMatF64::::block_mat2x1( - dij_val.lhs.get([d0, d1]), - dij_val.rhs.get([d0, d1]), - ); - } - } - Some(r) - } - None => None, - }, - } - } - - fn block_mat1x2( - left_col: DualBatchMatrix, - righ_col: DualBatchMatrix, - ) -> Self { - assert_eq!(C0 + C1, COLS); - let maybe_dij = Self::two_dx(left_col.dij_part, righ_col.dij_part); - - Self { - real_part: BatchMatF64::::block_mat1x2( - left_col.real_part, - righ_col.real_part, - ), - dij_part: match maybe_dij { - Some(dij_val) => { - let mut r = MutTensorDDRC::, ROWS, COLS>::from_shape( - dij_val.shape(), - ); - for d0 in 0..dij_val.shape()[0] { - for d1 in 0..dij_val.shape()[1] { - *r.mut_view().get_mut([d0, d1]) = - BatchMatF64::::block_mat1x2( - dij_val.lhs.get([d0, d1]), - dij_val.rhs.get([d0, d1]), - ); - } - } - Some(r) - } - None => None, - }, - } - } - - fn get_fixed_submat( - &self, - start_r: usize, - start_c: usize, - ) -> DualBatchMatrix { - DualBatchMatrix { - real_part: self.real_part.get_fixed_submat(start_r, start_c), - dij_part: self.dij_part.clone().map(|dij_val| { - MutTensorDDRC::from_map(&dij_val.view(), |v| v.get_fixed_submat(start_r, start_c)) - }), - } - } - - fn get_col_vec(&self, start_r: usize) -> DualBatchVector { - DualBatchVector { - real_part: self.real_part.get_col_vec(start_r), - dij_part: self - .dij_part - .clone() - .map(|dij_val| MutTensorDDR::from_map(&dij_val.view(), |v| v.get_col_vec(start_r))), - } - } - - fn get_row_vec(&self, c: usize) -> DualBatchVector { - DualBatchVector { - real_part: self.real_part.get_row_vec(c), - dij_part: self - .dij_part - .clone() - .map(|dij_val| MutTensorDDR::from_map(&dij_val.view(), |v| v.get_row_vec(c))), - } - } - - fn from_real_scalar_array2(vals: [[BatchScalarF64; COLS]; ROWS]) -> Self { - DualBatchMatrix { - real_part: BatchMatF64::from_real_scalar_array2(vals), - dij_part: None, - } - } - - fn from_f64_array2(vals: [[f64; COLS]; ROWS]) -> Self { - DualBatchMatrix { - real_part: BatchMatF64::from_f64_array2(vals), - dij_part: None, - } - } - - fn set_col_vec( - &mut self, - c: usize, - v: as IsScalar>::Vector, - ) { - self.real_part.set_col_vec(c, v.real_part); - todo!(); - } - - fn to_dual(self) -> as IsScalar>::DualMatrix { - self - } - - fn select(self, mask: &Mask, other: Self) -> Self { - let maybe_dij = Self::two_dx(self.dij_part, other.dij_part); - - DualBatchMatrix { - real_part: self.real_part.select(mask, other.real_part), - dij_part: match maybe_dij { - Some(dij) => { - let mut r = - MutTensorDDRC::, ROWS, COLS>::from_shape(dij.shape()); - for i in 0..dij.shape()[0] { - for j in 0..dij.shape()[1] { - *r.get_mut([i, j]) = - dij.lhs.get([i, j]).select(mask, dij.rhs.get([i, j])); - } - } - Some(r) - } - _ => None, - }, - } - } -} - -impl Add - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchMatrix; - - fn add(self, rhs: Self) -> Self::Output { - DualBatchMatrix { - real_part: self.real_part + rhs.real_part, - dij_part: Self::binary_mm_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij, - |r_dij| *r_dij, - ), - } - } -} - -impl Sub - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchMatrix; - - fn sub(self, rhs: Self) -> Self::Output { - DualBatchMatrix { - real_part: self.real_part - rhs.real_part, - dij_part: Self::binary_mm_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij, - |r_dij| -r_dij, - ), - } - } -} - -impl Neg - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchMatrix; - - fn neg(self) -> Self::Output { - DualBatchMatrix { - real_part: -self.real_part, - dij_part: self - .dij_part - .clone() - .map(|dij_val| MutTensorDDRC::from_map(&dij_val.view(), |v| -v)), - } - } -} - -impl Zero - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - fn zero() -> Self { - Self::from_real_matrix(BatchMatF64::zeros()) - } - - fn is_zero(&self) -> bool { - self.real_part.is_zero() - } -} - -impl Mul> - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchVector; - - fn mul(self, rhs: DualBatchVector) -> Self::Output { - Self::Output { - real_part: self.real_part * rhs.real_part, - dij_part: Self::binary_mv_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| l_dij * rhs.real_part, - |r_dij| self.real_part * r_dij, - ), - } - } -} - -impl Debug - for DualBatchMatrix -where - LaneCount: SupportedLaneCount, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.dij_part.is_some() { - f.debug_struct("DualScalarLike") - .field("val", &self.real_part) - .field("dij_val", &self.dij_part.as_ref().unwrap().elem_view()) - .finish() - } else { - f.debug_struct("DualScalarLike") - .field("val", &self.real_part) - .finish() - } - } -} - #[test] fn dual_matrix_tests() { - use crate::calculus::dual::dual_scalar::DualBatchScalar; - use crate::calculus::dual::dual_scalar::DualScalar; + use crate::calculus::dual::DualScalar; use crate::calculus::maps::matrix_valued_maps::MatrixValuedMapFromMatrix; + + #[cfg(feature = "simd")] + use crate::calculus::dual::DualBatchScalar; + #[cfg(feature = "simd")] use crate::linalg::BatchScalarF64; #[cfg(test)] @@ -1454,10 +779,14 @@ fn dual_matrix_tests() { } def_test_template!(f64, DualScalar, 1); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<2>, DualBatchScalar<2>, 2); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<4>, DualBatchScalar<4>, 4); f64::run(); + #[cfg(feature = "simd")] BatchScalarF64::<2>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<4>::run(); } diff --git a/crates/sophus_core/src/calculus/dual/dual_scalar.rs b/crates/sophus_core/src/calculus/dual/dual_scalar.rs index 711d545..16fa342 100644 --- a/crates/sophus_core/src/calculus/dual/dual_scalar.rs +++ b/crates/sophus_core/src/calculus/dual/dual_scalar.rs @@ -1,21 +1,19 @@ -use super::dual_matrix::DualBatchMatrix; use super::dual_matrix::DualMatrix; -use super::dual_vector::DualBatchVector; use super::dual_vector::DualVector; + use crate::linalg::scalar::NumberCategory; -use crate::linalg::BatchMatF64; -use crate::linalg::BatchScalarF64; -use crate::linalg::BatchVecF64; + use crate::linalg::MatF64; use crate::linalg::VecF64; use crate::prelude::*; use crate::tensor::mut_tensor::InnerScalarToVec; use crate::tensor::mut_tensor::MutTensorDD; -use approx::assert_abs_diff_eq; + use approx::AbsDiffEq; use approx::RelativeEq; use num_traits::One; use num_traits::Zero; + use std::fmt::Debug; use std::ops::Add; use std::ops::AddAssign; @@ -24,9 +22,6 @@ use std::ops::Mul; use std::ops::Neg; use std::ops::Sub; use std::ops::SubAssign; -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::SupportedLaneCount; /// Trait for dual numbers pub trait IsDual {} @@ -43,27 +38,6 @@ pub struct DualScalar { impl IsDual for DualScalar {} -/// Dual number - a real number and an infinitesimal number (batch version) -#[derive(Clone)] -pub struct DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - /// real part - pub real_part: BatchScalarF64, - - /// infinitesimal part - represents derivative - pub dij_part: Option>>, -} - -impl IsDual for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ -} - /// Trait for scalar dual numbers pub trait IsDualScalar: IsScalar + IsDual { /// Create a new dual scalar from real scalar for auto-differentiation with respect to self @@ -633,703 +607,16 @@ impl Sub<&DualScalar> for DualScalar { } } } - -impl IsDualScalar for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn new_with_dij(val: BatchScalarF64) -> Self { - let dij_val = MutTensorDD::from_shape_and_val([1, 1], BatchScalarF64::::ones()); - Self { - real_part: val, - dij_part: Some(dij_val), - } - } - - fn dij_val(self) -> Option>> { - self.dij_part - } - - fn vector_with_dij(val: Self::RealVector) -> Self::Vector { - DualBatchVector::::new_with_dij(val) - } - - fn matrix_with_dij( - val: Self::RealMatrix, - ) -> Self::Matrix { - DualBatchMatrix::::new_with_dij(val) - } -} - -impl IsCoreScalar for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn number_category() -> NumberCategory { - NumberCategory::Real - } -} - -impl AsRef> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn as_ref(&self) -> &DualBatchScalar - where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, - { - self - } -} - -impl One for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn one() -> Self { - >::from_f64(1.0) - } -} - -impl Zero for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn zero() -> Self { - >::from_f64(0.0) - } - - fn is_zero(&self) -> bool { - self.real_part == >::from_f64(0.0).real_part() - } -} - -impl DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn binary_dij< - F: FnMut(&BatchScalarF64) -> BatchScalarF64, - G: FnMut(&BatchScalarF64) -> BatchScalarF64, - >( - lhs_dx: &Option>>, - rhs_dx: &Option>>, - mut left_op: F, - mut right_op: G, - ) -> Option>> { - match (lhs_dx, rhs_dx) { - (None, None) => None, - (None, Some(rhs_dij)) => { - let out_dij = - MutTensorDD::from_map(&rhs_dij.view(), |r_dij: &BatchScalarF64| { - right_op(r_dij) - }); - Some(out_dij) - } - (Some(lhs_dij), None) => { - let out_dij = - MutTensorDD::from_map(&lhs_dij.view(), |l_dij: &BatchScalarF64| { - left_op(l_dij) - }); - Some(out_dij) - } - (Some(lhs_dij), Some(rhs_dij)) => { - let dyn_mat = MutTensorDD::from_map2( - &lhs_dij.view(), - &rhs_dij.view(), - |l_dij: &BatchScalarF64, r_dij: &BatchScalarF64| { - left_op(l_dij) + right_op(r_dij) - }, - ); - Some(dyn_mat) - } - } - } -} - -impl Neg for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - - fn neg(self) -> Self { - Self { - real_part: -self.real_part, - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |v: &BatchScalarF64| -*v); - - Some(dyn_mat) - } - None => None, - }, - } - } -} - -impl PartialEq for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn eq(&self, other: &Self) -> bool { - self.real_part == other.real_part - } -} - -impl AbsDiffEq for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Epsilon = f64; - - fn default_epsilon() -> Self::Epsilon { - f64::default_epsilon() - } - - fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool { - for i in 0..BATCH { - if !self.real_part.extract_single(i).abs_diff_eq( - &other.real_part.extract_single(i), - epsilon.extract_single(i), - ) { - return false; - } - } - true - } -} - -impl RelativeEq for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn default_max_relative() -> Self::Epsilon { - f64::default_max_relative() - } - - fn relative_eq( - &self, - other: &Self, - epsilon: Self::Epsilon, - max_relative: Self::Epsilon, - ) -> bool { - for i in 0..BATCH { - if !self.real_part.extract_single(i).relative_eq( - &other.real_part.extract_single(i), - epsilon.extract_single(i), - max_relative.extract_single(i), - ) { - return false; - } - } - true - } -} - -impl From> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn from(value: BatchScalarF64) -> Self { - Self::from_real_scalar(value) - } -} - -impl Debug for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.dij_part.is_some() { - f.debug_struct("DualScalar") - .field("val", &self.real_part) - .field("dij_val", &self.dij_part.as_ref().unwrap().elem_view()) - .finish() - } else { - f.debug_struct("DualScalar") - .field("val", &self.real_part) - .finish() - } - } -} - -impl IsScalar for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Scalar = DualBatchScalar; - type RealScalar = BatchScalarF64; - type SingleScalar = DualScalar; - type DualVector = DualBatchVector; - type DualMatrix = DualBatchMatrix; - type RealVector = BatchVecF64; - type RealMatrix = BatchMatF64; - type Vector = DualBatchVector; - type Matrix = DualBatchMatrix; - - type Mask = Mask; - - fn from_real_scalar(val: BatchScalarF64) -> Self { - Self { - real_part: val, - dij_part: None, - } - } - - fn scalar_examples() -> Vec { - [1.0, 2.0, 3.0].iter().map(|&v| Self::from_f64(v)).collect() - } - - fn from_real_array(arr: [f64; BATCH]) -> Self { - Self::from_real_scalar(BatchScalarF64::::from_real_array(arr)) - } - - fn to_real_array(&self) -> [f64; BATCH] { - self.real_part.to_real_array() - } - - fn extract_single(&self, i: usize) -> Self::SingleScalar { - Self::SingleScalar { - real_part: self.real_part.extract_single(i), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - dij.extract_single(i) - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn cos(self) -> DualBatchScalar - where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, - { - Self { - real_part: self.real_part.cos(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - -*dij * self.real_part.sin() - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn signum(&self) -> Self { - Self { - real_part: self.real_part.signum(), - dij_part: None, - } - } - - fn sin(self) -> DualBatchScalar - where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, - { - Self { - real_part: self.real_part.sin(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - *dij * self.real_part.cos() - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn abs(self) -> Self { - Self { - real_part: self.real_part.abs(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - *dij * self.real_part.signum() - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn atan2(self, rhs: Self) -> Self { - let inv_sq_nrm: BatchScalarF64 = BatchScalarF64::ones() - / (self.real_part * self.real_part + rhs.real_part * rhs.real_part); - Self { - real_part: self.real_part.atan2(rhs.real_part), - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| inv_sq_nrm * ((*l_dij) * rhs.real_part), - |r_dij| -inv_sq_nrm * (self.real_part * (*r_dij)), - ), - } - } - - fn real_part(&self) -> BatchScalarF64 { - self.real_part - } - - fn sqrt(self) -> Self { - let sqrt = self.real_part.sqrt(); - Self { - real_part: sqrt, - dij_part: match self.dij_part { - Some(dij) => { - let out_dij = - MutTensorDD::from_map(&dij.view(), |dij: &BatchScalarF64| { - *dij * BatchScalarF64::::from_f64(1.0) - / (BatchScalarF64::::from_f64(2.0) * sqrt) - }); - Some(out_dij) - } - None => None, - }, - } - } - - fn to_vec(self) -> DualBatchVector<1, BATCH> { - DualBatchVector::<1, BATCH> { - real_part: self.real_part.real_part().to_vec(), - dij_part: match self.dij_part { - Some(dij) => { - let tmp = dij.inner_scalar_to_vec(); - Some(tmp) - } - None => None, - }, - } - } - - fn tan(self) -> Self { - Self { - real_part: self.real_part.tan(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let c = self.real_part.cos(); - let sec_squared = BatchScalarF64::::ones() / (c * c); - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - *dij * sec_squared - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn acos(self) -> Self { - Self { - real_part: self.real_part.acos(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dval = -BatchScalarF64::::ones() - / (BatchScalarF64::::ones() - self.real_part * self.real_part) - .sqrt(); - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - *dij * dval - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn asin(self) -> Self { - Self { - real_part: self.real_part.asin(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dval = BatchScalarF64::::ones() - / (BatchScalarF64::::ones() - self.real_part * self.real_part) - .sqrt(); - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - *dij * dval - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn atan(self) -> Self { - Self { - real_part: self.real_part.atan(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dval = BatchScalarF64::::ones() - / (BatchScalarF64::::ones() + self.real_part * self.real_part); - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| { - *dij * dval - }); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn fract(self) -> Self { - Self { - real_part: self.real_part.fract(), - dij_part: match self.dij_part.clone() { - Some(dij_val) => { - let dyn_mat = - MutTensorDD::from_map(&dij_val.view(), |dij: &BatchScalarF64| *dij); - Some(dyn_mat) - } - None => None, - }, - } - } - - fn floor(&self) -> BatchScalarF64 { - self.real_part.floor() - } - - fn from_f64(val: f64) -> Self { - Self::from_real_scalar(BatchScalarF64::::from_f64(val)) - } - - type DualScalar = Self; - - fn ones() -> Self { - Self::from_f64(1.0) - } - - fn zeros() -> Self { - Self::from_f64(0.0) - } - - fn test_suite() { - let examples = Self::scalar_examples(); - for a in &examples { - let sin_a = a.clone().sin(); - let cos_a = a.clone().cos(); - let val = sin_a.clone() * sin_a + cos_a.clone() * cos_a; - let one = Self::ones(); - - for i in 0..BATCH { - assert_abs_diff_eq!(val.extract_single(i), one.extract_single(i)); - } - } - } - - fn less_equal(&self, rhs: &Self) -> Self::Mask { - self.real_part.less_equal(&rhs.real_part) - } - - fn to_dual(self) -> Self::DualScalar { - self - } - - fn select(self, mask: &Self::Mask, other: Self) -> Self { - Self { - real_part: self.real_part.select(mask, other.real_part), - dij_part: match (self.dij_part, other.dij_part) { - (Some(lhs), Some(rhs)) => { - let dyn_mat = MutTensorDD::from_map2( - &lhs.view(), - &rhs.view(), - |l: &BatchScalarF64, r: &BatchScalarF64| l.select(mask, *r), - ); - Some(dyn_mat) - } - _ => None, - }, - } - } - - fn greater_equal(&self, rhs: &Self) -> Self::Mask { - self.real_part.greater_equal(&rhs.real_part) - } -} - -impl AddAssign> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn add_assign(&mut self, rhs: Self) { - *self = self.clone().add(&rhs); - } -} -impl SubAssign> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn sub_assign(&mut self, rhs: Self) { - *self = self.clone().sub(&rhs); - } -} - -impl Add> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn add(self, rhs: Self) -> Self::Output { - self.add(&rhs) - } -} - -impl Add<&DualBatchScalar> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn add(self, rhs: &Self) -> Self::Output { - let r = self.real_part + rhs.real_part; - - Self { - real_part: r, - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij, - |r_dij| *r_dij, - ), - } - } -} - -impl Mul> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn mul(self, rhs: Self) -> Self::Output { - self.mul(&rhs) - } -} - -impl Mul<&DualBatchScalar> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn mul(self, rhs: &Self) -> Self::Output { - let r = self.real_part * rhs.real_part; - - Self { - real_part: r, - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| (*l_dij) * rhs.real_part, - |r_dij| (*r_dij) * self.real_part, - ), - } - } -} - -impl Div> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn div(self, rhs: Self) -> Self::Output { - self.div(&rhs) - } -} - -impl Div<&DualBatchScalar> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn div(self, rhs: &Self) -> Self::Output { - let rhs_inv = BatchScalarF64::::ones() / rhs.real_part; - Self { - real_part: self.real_part * rhs_inv, - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij * rhs_inv, - |r_dij| -self.real_part * (*r_dij) * rhs_inv * rhs_inv, - ), - } - } -} - -impl Sub> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn sub(self, rhs: Self) -> Self::Output { - self.sub(&rhs) - } -} - -impl Sub<&DualBatchScalar> for DualBatchScalar -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchScalar; - fn sub(self, rhs: &Self) -> Self::Output { - Self { - real_part: self.real_part - rhs.real_part, - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij, - |r_dij| -(*r_dij), - ), - } - } -} - #[test] fn dual_scalar_tests() { use crate::calculus::maps::curves::ScalarValuedCurve; + #[cfg(feature = "simd")] + use crate::calculus::dual::DualBatchScalar; + #[cfg(feature = "simd")] + use crate::linalg::BatchScalarF64; + + trait DualScalarTest { fn run_dual_scalar_test(); } @@ -1469,11 +756,18 @@ fn dual_scalar_tests() { } def_dual_scalar_test_template!(1, f64, DualScalar); + #[cfg(feature = "simd")] def_dual_scalar_test_template!(2, BatchScalarF64<2>, DualBatchScalar<2>); + #[cfg(feature = "simd")] def_dual_scalar_test_template!(4, BatchScalarF64<4>, DualBatchScalar<4>); + #[cfg(feature = "simd")] def_dual_scalar_test_template!(8, BatchScalarF64<8>, DualBatchScalar<8>); + DualScalar::run_dual_scalar_test(); + #[cfg(feature = "simd")] DualBatchScalar::<2>::run_dual_scalar_test(); + #[cfg(feature = "simd")] DualBatchScalar::<4>::run_dual_scalar_test(); + #[cfg(feature = "simd")] DualBatchScalar::<8>::run_dual_scalar_test(); } diff --git a/crates/sophus_core/src/calculus/dual/dual_vector.rs b/crates/sophus_core/src/calculus/dual/dual_vector.rs index 40bdc05..02c3146 100644 --- a/crates/sophus_core/src/calculus/dual/dual_vector.rs +++ b/crates/sophus_core/src/calculus/dual/dual_vector.rs @@ -1,9 +1,5 @@ use super::dual_matrix::DualMatrix; -use super::dual_scalar::DualBatchScalar; use super::dual_scalar::DualScalar; -use crate::calculus::dual::DualBatchMatrix; -use crate::linalg::BatchScalarF64; -use crate::linalg::BatchVecF64; use crate::linalg::VecF64; use crate::prelude::*; use crate::tensor::mut_tensor::InnerVecToMat; @@ -15,9 +11,6 @@ use std::fmt::Debug; use std::ops::Add; use std::ops::Neg; use std::ops::Sub; -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::SupportedLaneCount; /// Dual vector #[derive(Clone)] @@ -45,19 +38,6 @@ pub trait IsDualVector, const ROWS: usize, const BATCH: u fn dij_val(self) -> Option>; } -/// Dual vector (batch version) -#[derive(Clone, Debug)] -pub struct DualBatchVector -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - /// real part - pub real_part: BatchVecF64, - /// infinitesimal part - represents derivative - pub dij_part: Option, ROWS>>, -} - impl IsDual for DualVector {} impl IsDualVector for DualVector { @@ -100,13 +80,13 @@ where } } -struct DijPair { - lhs: MutTensorDDR, - rhs: MutTensorDDR, +pub(crate) struct DijPair { + pub(crate) lhs: MutTensorDDR, + pub(crate) rhs: MutTensorDDR, } impl DijPair { - fn shape(&self) -> [usize; 2] { + pub(crate) fn shape(&self) -> [usize; 2] { self.lhs.dims() } } @@ -533,511 +513,20 @@ impl IsVector for DualVector { } } -impl num_traits::Zero for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - fn zero() -> Self { - DualBatchVector { - real_part: BatchVecF64::::zeros(), - dij_part: None, - } - } - - fn is_zero(&self) -> bool { - self.real_part == BatchVecF64::::zeros() - } -} - -impl IsDual for DualBatchVector where - LaneCount: SupportedLaneCount -{ -} - -impl PartialEq for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - fn eq(&self, other: &Self) -> bool { - self.real_part == other.real_part && self.dij_part == other.dij_part - } -} - -impl AbsDiffEq for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - type Epsilon = f64; - - fn default_epsilon() -> Self::Epsilon { - f64::default_epsilon() - } - - fn abs_diff_eq(&self, other: &Self, epsilon: Self::Epsilon) -> bool { - self.real_part.abs_diff_eq(&other.real_part, epsilon) - } -} - -impl RelativeEq for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - fn default_max_relative() -> Self::Epsilon { - f64::default_max_relative() - } - - fn relative_eq( - &self, - other: &Self, - epsilon: Self::Epsilon, - max_relative: Self::Epsilon, - ) -> bool { - self.real_part - .relative_eq(&other.real_part, epsilon, max_relative) - } -} - -impl IsDualVector, ROWS, BATCH> - for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - fn new_with_dij(val: BatchVecF64) -> Self { - let mut dij_val = MutTensorDDR::, ROWS>::from_shape([ROWS, 1]); - for i in 0..ROWS { - dij_val.mut_view().get_mut([i, 0])[(i, 0)] = BatchScalarF64::::ones(); - } - - Self { - real_part: val, - dij_part: Some(dij_val), - } - } - - fn dij_val(self) -> Option, ROWS>> { - self.dij_part - } -} - -impl DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - fn binary_dij< - const R0: usize, - const R1: usize, - F: FnMut(&BatchVecF64) -> BatchVecF64, - G: FnMut(&BatchVecF64) -> BatchVecF64, - >( - lhs_dx: &Option, R0>>, - rhs_dx: &Option, R1>>, - mut left_op: F, - mut right_op: G, - ) -> Option, ROWS>> { - match (lhs_dx, rhs_dx) { - (None, None) => None, - (None, Some(rhs_dij)) => { - let out_dij = MutTensorDDR::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); - Some(out_dij) - } - (Some(lhs_dij), None) => { - let out_dij = MutTensorDDR::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); - Some(out_dij) - } - (Some(lhs_dij), Some(rhs_dij)) => { - let dyn_mat = - MutTensorDDR::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { - left_op(l_dij) + right_op(r_dij) - }); - Some(dyn_mat) - } - } - } - - fn binary_vs_dij< - const R0: usize, - F: FnMut(&BatchVecF64) -> BatchVecF64, - G: FnMut(&BatchScalarF64) -> BatchVecF64, - >( - lhs_dx: &Option, R0>>, - rhs_dx: &Option>>, - mut left_op: F, - mut right_op: G, - ) -> Option, ROWS>> { - match (lhs_dx, rhs_dx) { - (None, None) => None, - (None, Some(rhs_dij)) => { - let out_dij = MutTensorDDR::from_map(&rhs_dij.view(), |r_dij| right_op(r_dij)); - Some(out_dij) - } - (Some(lhs_dij), None) => { - let out_dij = MutTensorDDR::from_map(&lhs_dij.view(), |l_dij| left_op(l_dij)); - Some(out_dij) - } - (Some(lhs_dij), Some(rhs_dij)) => { - let dyn_mat = - MutTensorDDR::from_map2(&lhs_dij.view(), &rhs_dij.view(), |l_dij, r_dij| { - left_op(l_dij) + right_op(r_dij) - }); - Some(dyn_mat) - } - } - } - - fn two_dx( - mut lhs_dx: Option, R0>>, - mut rhs_dx: Option, R1>>, - ) -> Option, R0, R1>> { - if lhs_dx.is_none() && rhs_dx.is_none() { - return None; - } - - if lhs_dx.is_some() && rhs_dx.is_some() { - assert_eq!( - lhs_dx.clone().unwrap().dims(), - rhs_dx.clone().unwrap().dims() - ); - } - - if lhs_dx.is_none() { - lhs_dx = Some(MutTensorDDR::from_shape(rhs_dx.clone().unwrap().dims())) - } else if rhs_dx.is_none() { - rhs_dx = Some(MutTensorDDR::from_shape(lhs_dx.clone().unwrap().dims())) - } - - Some(DijPair { - lhs: lhs_dx.unwrap(), - rhs: rhs_dx.unwrap(), - }) - } -} - -impl Neg for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchVector; - - fn neg(self) -> Self::Output { - DualBatchVector { - real_part: -self.real_part, - dij_part: self - .dij_part - .clone() - .map(|dij_val| MutTensorDDR::from_map(&dij_val.view(), |v| -v)), - } - } -} - -impl Sub for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchVector; - - fn sub(self, rhs: Self) -> Self::Output { - DualBatchVector { - real_part: self.real_part - rhs.real_part, - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij, - |r_dij| -r_dij, - ), - } - } -} - -impl Add for DualBatchVector -where - LaneCount: SupportedLaneCount, -{ - type Output = DualBatchVector; - - fn add(self, rhs: Self) -> Self::Output { - DualBatchVector { - real_part: self.real_part + rhs.real_part, - dij_part: Self::binary_dij( - &self.dij_part, - &rhs.dij_part, - |l_dij| *l_dij, - |r_dij| *r_dij, - ), - } - } -} - -impl IsVector, ROWS, BATCH> - for DualBatchVector -where - BatchScalarF64: IsCoreScalar, - LaneCount: SupportedLaneCount, -{ - fn from_f64(val: f64) -> Self { - DualBatchVector { - real_part: - as IsVector, ROWS, BATCH>>::from_f64( - val, - ), - dij_part: None, - } - } - - fn outer( - self, - rhs: DualBatchVector, - ) -> DualBatchMatrix { - let mut result = DualBatchMatrix::zeros(); - for i in 0..ROWS { - for j in 0..R2 { - result.set_elem([i, j], self.get_elem(i) * rhs.get_elem(j)); - } - } - result - } - - fn norm(&self) -> DualBatchScalar { - self.clone().dot(self.clone()).sqrt() - } - - fn squared_norm(&self) -> DualBatchScalar { - self.clone().dot(self.clone()) - } - - fn get_elem(&self, idx: usize) -> DualBatchScalar { - DualBatchScalar { - real_part: self.real_part[idx], - dij_part: self - .dij_part - .clone() - .map(|dij_val| MutTensorDD::from_map(&dij_val.view(), |v| v[idx])), - } - } - - fn from_array(duals: [DualBatchScalar; ROWS]) -> Self { - let mut shape = None; - let mut val_v = BatchVecF64::::zeros(); - for i in 0..duals.len() { - let d = duals.clone()[i].clone(); - - val_v[i] = d.real_part; - if d.dij_part.is_some() { - shape = Some(d.dij_part.clone().unwrap().dims()); - } - } - - if shape.is_none() { - return DualBatchVector { - real_part: val_v, - dij_part: None, - }; - } - let shape = shape.unwrap(); - - let mut r = MutTensorDDR::, ROWS>::from_shape(shape); - - for i in 0..duals.len() { - let d = duals.clone()[i].clone(); - if d.dij_part.is_some() { - for d0 in 0..shape[0] { - for d1 in 0..shape[1] { - r.mut_view().get_mut([d0, d1])[(i, 0)] = - d.dij_part.clone().unwrap().get([d0, d1]); - } - } - } - } - DualBatchVector { - real_part: val_v, - dij_part: Some(r), - } - } - - fn from_real_array(vals: [BatchScalarF64; ROWS]) -> Self { - DualBatchVector { - real_part: BatchVecF64::from_real_array(vals), - dij_part: None, - } - } - - fn from_real_vector(val: BatchVecF64) -> Self { - Self { - real_part: val, - dij_part: None, - } - } - - fn real_vector(&self) -> &BatchVecF64 { - &self.real_part - } - - fn to_mat(self) -> DualBatchMatrix { - DualBatchMatrix { - real_part: self.real_part, - dij_part: self.dij_part.map(|dij| dij.inner_vec_to_mat()), - } - } - - fn block_vec2( - top_row: DualBatchVector, - bot_row: DualBatchVector, - ) -> Self { - assert_eq!(R0 + R1, ROWS); - - let maybe_dij = Self::two_dx(top_row.dij_part, bot_row.dij_part); - Self { - real_part: BatchVecF64::::block_vec2(top_row.real_part, bot_row.real_part), - dij_part: match maybe_dij { - Some(dij_val) => { - let mut r = - MutTensorDDR::, ROWS>::from_shape(dij_val.shape()); - for d0 in 0..dij_val.shape()[0] { - for d1 in 0..dij_val.shape()[1] { - *r.mut_view().get_mut([d0, d1]) = - BatchVecF64::::block_vec2( - dij_val.lhs.get([d0, d1]), - dij_val.rhs.get([d0, d1]), - ); - } - } - Some(r) - } - None => None, - }, - } - } - - fn scaled(&self, s: DualBatchScalar) -> Self { - DualBatchVector { - real_part: self.real_part * s.real_part, - dij_part: Self::binary_vs_dij( - &self.dij_part, - &s.dij_part, - |l_dij| l_dij * s.real_part, - |r_dij| self.real_part * *r_dij, - ), - } - } - - fn dot(self, rhs: Self) -> DualBatchScalar { - let mut sum = DualBatchScalar::from_f64(0.0); - - for i in 0..ROWS { - sum += self.get_elem(i) * rhs.get_elem(i); - } - - sum - } - - fn normalized(&self) -> Self { - self.clone() - .scaled(DualBatchScalar::::from_f64(1.0) / self.norm()) - } - - fn from_f64_array(vals: [f64; ROWS]) -> Self { - DualBatchVector { - real_part: BatchVecF64::from_f64_array(vals), - dij_part: None, - } - } - - fn from_scalar_array(vals: [DualBatchScalar; ROWS]) -> Self { - let mut shape = None; - let mut val_v = BatchVecF64::::zeros(); - for i in 0..vals.len() { - let d = vals.clone()[i].clone(); - - val_v[i] = d.real_part; - if d.dij_part.is_some() { - shape = Some(d.dij_part.clone().unwrap().dims()); - } - } - - if shape.is_none() { - return DualBatchVector { - real_part: val_v, - dij_part: None, - }; - } - let shape = shape.unwrap(); - - let mut r = MutTensorDDR::, ROWS>::from_shape(shape); - - for i in 0..vals.len() { - let d = vals.clone()[i].clone(); - if d.dij_part.is_some() { - for d0 in 0..shape[0] { - for d1 in 0..shape[1] { - r.mut_view().get_mut([d0, d1])[(i, 0)] = - d.dij_part.clone().unwrap().get([d0, d1]); - } - } - } - } - DualBatchVector { - real_part: val_v, - dij_part: Some(r), - } - } - - fn set_elem(&mut self, idx: usize, v: DualBatchScalar) { - self.real_part[idx] = v.real_part; - if self.dij_part.is_some() { - let dij = &mut self.dij_part.as_mut().unwrap(); - for i in 0..dij.dims()[0] { - for j in 0..dij.dims()[1] { - dij.mut_view().get_mut([i, j])[idx] = v.dij_part.clone().unwrap().get([i, j]); - } - } - } - } - - fn to_dual(self) -> as IsScalar>::DualVector { - self - } - - fn select(self, mask: &Mask, other: Self) -> Self { - let maybe_dij = Self::two_dx(self.dij_part, other.dij_part); - - Self { - real_part: IsVector::select(self.real_part, mask, other.real_part), - dij_part: match maybe_dij { - Some(dij) => { - let mut r = - MutTensorDDR::, ROWS>::from_shape(dij.shape()); - for i in 0..dij.shape()[0] { - for j in 0..dij.shape()[1] { - *r.get_mut([i, j]) = - IsVector::select(dij.lhs.get([i, j]), mask, dij.rhs.get([i, j])); - } - } - Some(r) - } - _ => None, - }, - } - } - - fn get_fixed_subvec(&self, start_r: usize) -> DualBatchVector { - DualBatchVector { - real_part: self.real_part.fixed_rows::(start_r).into(), - dij_part: self.dij_part.clone().map(|dij_val| { - MutTensorDDR::from_map(&dij_val.view(), |v| v.fixed_rows::(start_r).into()) - }), - } - } -} - #[test] fn dual_vector_tests() { - use crate::calculus::dual::dual_scalar::DualBatchScalar; use crate::calculus::dual::dual_scalar::DualScalar; use crate::calculus::maps::scalar_valued_maps::ScalarValuedMapFromVector; use crate::calculus::maps::vector_valued_maps::VectorValuedMapFromVector; use crate::linalg::vector::IsVector; - use crate::linalg::BatchScalarF64; use crate::points::example_points; + #[cfg(feature = "simd")] + use crate::calculus::dual::DualBatchScalar; + #[cfg(feature = "simd")] + use crate::linalg::BatchScalarF64; + + #[cfg(test)] trait Test { fn run(); @@ -1132,10 +621,14 @@ fn dual_vector_tests() { } def_test_template!(f64, DualScalar, 1); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<2>, DualBatchScalar<2>, 2); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<4>, DualBatchScalar<4>, 4); f64::run(); + #[cfg(feature = "simd")] BatchScalarF64::<2>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<4>::run(); } diff --git a/crates/sophus_core/src/calculus/maps/curves.rs b/crates/sophus_core/src/calculus/maps/curves.rs index 151200f..49d83ef 100644 --- a/crates/sophus_core/src/calculus/maps/curves.rs +++ b/crates/sophus_core/src/calculus/maps/curves.rs @@ -115,9 +115,12 @@ impl, const BATCH: usize> MatrixValuedCurve { #[test] fn curve_test() { - use crate::calculus::dual::dual_scalar::DualBatchScalar; - use crate::calculus::dual::dual_scalar::DualScalar; + use crate::calculus::dual::DualScalar; use crate::linalg::scalar::IsScalar; + + #[cfg(feature = "simd")] + use crate::calculus::dual::DualBatchScalar; + #[cfg(feature = "simd")] use crate::linalg::BatchScalarF64; trait CurveTest { @@ -196,12 +199,18 @@ fn curve_test() { } def_curve_test_template!(1, f64, DualScalar); + #[cfg(feature = "simd")] def_curve_test_template!(2, BatchScalarF64<2>, DualBatchScalar<2>); + #[cfg(feature = "simd")] def_curve_test_template!(4, BatchScalarF64<4>, DualBatchScalar<4>); + #[cfg(feature = "simd")] def_curve_test_template!(8, BatchScalarF64<8>, DualBatchScalar<8>); DualScalar::run_curve_test(); + #[cfg(feature = "simd")] DualBatchScalar::<2>::run_curve_test(); + #[cfg(feature = "simd")] DualBatchScalar::<4>::run_curve_test(); + #[cfg(feature = "simd")] DualBatchScalar::<8>::run_curve_test(); } diff --git a/crates/sophus_core/src/calculus/maps/matrix_valued_maps.rs b/crates/sophus_core/src/calculus/maps/matrix_valued_maps.rs index cbbc596..f04d0a9 100644 --- a/crates/sophus_core/src/calculus/maps/matrix_valued_maps.rs +++ b/crates/sophus_core/src/calculus/maps/matrix_valued_maps.rs @@ -155,15 +155,18 @@ impl, const BATCH: usize> #[test] fn matrix_valued_map_from_vector_tests() { - use crate::calculus::dual::dual_scalar::DualBatchScalar; use crate::calculus::dual::dual_scalar::DualScalar; use crate::calculus::maps::matrix_valued_maps::MatrixValuedMapFromVector; - use crate::linalg::scalar::IsScalar; use crate::linalg::vector::IsVector; - use crate::linalg::BatchScalarF64; use crate::tensor::tensor_view::IsTensorLike; + + #[cfg(feature = "simd")] + use crate::calculus::dual::DualBatchScalar; + #[cfg(feature = "simd")] + use crate::linalg::BatchScalarF64; + #[cfg(test)] trait Test { fn run(); @@ -284,10 +287,14 @@ fn matrix_valued_map_from_vector_tests() { } def_test_template!(f64, DualScalar, 1); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<2>, DualBatchScalar<2>, 2); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<4>, DualBatchScalar<4>, 4); f64::run(); + #[cfg(feature = "simd")] BatchScalarF64::<2>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<4>::run(); } diff --git a/crates/sophus_core/src/calculus/maps/scalar_valued_maps.rs b/crates/sophus_core/src/calculus/maps/scalar_valued_maps.rs index ddee6d7..fc554bc 100644 --- a/crates/sophus_core/src/calculus/maps/scalar_valued_maps.rs +++ b/crates/sophus_core/src/calculus/maps/scalar_valued_maps.rs @@ -132,8 +132,10 @@ impl, const BATCH: usize> ScalarValuedMapFromMatrix, DualBatchScalar<2>); + #[cfg(feature = "simd")] def_scalar_valued_map_test_template!(4, BatchScalarF64<4>, DualBatchScalar<4>); + #[cfg(feature = "simd")] def_scalar_valued_map_test_template!(8, BatchScalarF64<8>, DualBatchScalar<8>); + #[cfg(feature = "simd")] def_scalar_valued_map_test_template!(16, BatchScalarF64<16>, DualBatchScalar<16>); + #[cfg(feature = "simd")] def_scalar_valued_map_test_template!(32, BatchScalarF64<32>, DualBatchScalar<32>); + #[cfg(feature = "simd")] def_scalar_valued_map_test_template!(64, BatchScalarF64<64>, DualBatchScalar<64>); f64::run(); + #[cfg(feature = "simd")] BatchScalarF64::<2>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<4>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<8>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<16>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<32>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<64>::run(); } diff --git a/crates/sophus_core/src/calculus/maps/vector_valued_maps.rs b/crates/sophus_core/src/calculus/maps/vector_valued_maps.rs index f63bf08..107789e 100644 --- a/crates/sophus_core/src/calculus/maps/vector_valued_maps.rs +++ b/crates/sophus_core/src/calculus/maps/vector_valued_maps.rs @@ -192,15 +192,19 @@ impl, const BATCH: usize> #[test] fn vector_valued_map_from_vector_tests() { - use crate::calculus::dual::dual_scalar::DualBatchScalar; use crate::calculus::dual::dual_scalar::DualScalar; use crate::calculus::maps::vector_valued_maps::VectorValuedMapFromMatrix; use crate::calculus::maps::vector_valued_maps::VectorValuedMapFromVector; use crate::linalg::vector::IsVector; - use crate::linalg::BatchScalarF64; use crate::tensor::tensor_view::IsTensorLike; + #[cfg(feature = "simd")] + use crate::linalg::BatchScalarF64; + #[cfg(feature = "simd")] + use crate::calculus::dual::DualBatchScalar; + + #[cfg(test)] trait Test { fn run(); @@ -306,10 +310,14 @@ fn vector_valued_map_from_vector_tests() { } def_test_template!(f64, DualScalar, 1); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<2>, DualBatchScalar<2>, 2); + #[cfg(feature = "simd")] def_test_template!(BatchScalarF64<4>, DualBatchScalar<4>, 4); f64::run(); + #[cfg(feature = "simd")] BatchScalarF64::<2>::run(); + #[cfg(feature = "simd")] BatchScalarF64::<4>::run(); } diff --git a/crates/sophus_core/src/lib.rs b/crates/sophus_core/src/lib.rs index 05e99d0..ef09adf 100644 --- a/crates/sophus_core/src/lib.rs +++ b/crates/sophus_core/src/lib.rs @@ -1,11 +1,12 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] + #![deny(missing_docs)] #![allow(clippy::needless_range_loop)] //! Core math functionality including //! - linear algebra types //! * such as [linalg::VecF64], and [linalg::MatF64] //! * batch types such as [linalg::BatchScalarF64], [linalg::BatchVecF64], -//! [linalg::BatchMatF64] +//! [linalg::BatchMatF64] - require the `simd` feature //! - tensors //! * design: dynamic tensor (ndarray) of static tensors (nalgebra) //! - differentiation tools diff --git a/crates/sophus_core/src/linalg.rs b/crates/sophus_core/src/linalg.rs index d4c1ff1..4244d53 100644 --- a/crates/sophus_core/src/linalg.rs +++ b/crates/sophus_core/src/linalg.rs @@ -1,22 +1,43 @@ +#[cfg(feature = "simd")] /// Boolean mask - generalization of bool to SIMD +pub mod batch_mask; +/// Bool and boolean mask traits pub mod bool_mask; +#[cfg(feature = "simd")] +/// Batch matrix types - require the `simd` feature +pub mod batch_matrix; /// Matrix types pub mod matrix; +#[cfg(feature = "simd")] +/// Batch scalar types - require the `simd` feature +pub mod batch_scalar; /// Scalar types pub mod scalar; +#[cfg(feature = "simd")] +/// Batch vector types - require the `simd` feature +pub mod batch_vector; /// Vector types pub mod vector; +#[cfg(feature = "simd")] use std::ops::Add; + +#[cfg(feature = "simd")] use std::simd::cmp::SimdPartialEq; +#[cfg(feature = "simd")] use std::simd::num::SimdFloat; +#[cfg(feature = "simd")] use std::simd::LaneCount; +#[cfg(feature = "simd")] use std::simd::Mask; +#[cfg(feature = "simd")] use std::simd::Simd; +#[cfg(feature = "simd")] use std::simd::SimdElement; +#[cfg(feature = "simd")] use std::simd::SupportedLaneCount; /// Static vector @@ -25,6 +46,16 @@ pub type SVec = nalgebra::SVector = nalgebra::SMatrix; +/// f32 vector +pub type VecF32 = nalgebra::SVector; +/// f64 vector +pub type VecF64 = nalgebra::SMatrix; +/// f64 matrix +pub type MatF32 = nalgebra::SMatrix; +/// f64 matrix +pub type MatF64 = nalgebra::SMatrix; + +#[cfg(feature = "simd")] /// Batch of scalar #[derive(Clone, Debug, PartialEq, Copy)] pub struct BatchScalar( @@ -33,31 +64,28 @@ pub struct BatchScalar( where LaneCount: SupportedLaneCount; +#[cfg(feature = "simd")] /// Batch of vectors pub type BatchVec = nalgebra::SVector, ROWS>; +#[cfg(feature = "simd")] /// Batch of matrices pub type BatchMat = nalgebra::SMatrix, ROWS, COLS>; -/// f32 vector -pub type VecF32 = nalgebra::SVector; -/// f64 vector -pub type VecF64 = nalgebra::SMatrix; -/// f64 matrix -pub type MatF32 = nalgebra::SMatrix; -/// f64 matrix -pub type MatF64 = nalgebra::SMatrix; - +#[cfg(feature = "simd")] /// batch of f64 scalars pub type BatchScalarF64 = BatchScalar; /// batch of f64 vectors +#[cfg(feature = "simd")] pub type BatchVecF64 = BatchVec; /// batch of f64 matrices +#[cfg(feature = "simd")] pub type BatchMatF64 = BatchMat; +#[cfg(feature = "simd")] impl Add for BatchScalar where LaneCount: SupportedLaneCount, @@ -70,6 +98,7 @@ where } } +#[cfg(feature = "simd")] impl num_traits::Zero for BatchScalar where @@ -90,11 +119,17 @@ where #[test] fn test_core() { - use crate::linalg::scalar::IsScalar; use approx::assert_abs_diff_eq; let vec = SVec::::new(1.0, 2.0, 3.0); assert_abs_diff_eq!(vec, SVec::::new(1.0, 2.0, 3.0)); +} + +#[test] +#[cfg(feature = "simd")] +fn test_simd_core() { + use crate::linalg::scalar::IsScalar; + use approx::assert_abs_diff_eq; let batch_scalar = BatchScalar::(Simd::::from_array([1.0, 2.0, 3.0, 4.0])); assert_abs_diff_eq!( diff --git a/crates/sophus_core/src/linalg/batch_mask.rs b/crates/sophus_core/src/linalg/batch_mask.rs new file mode 100644 index 0000000..2862350 --- /dev/null +++ b/crates/sophus_core/src/linalg/batch_mask.rs @@ -0,0 +1,35 @@ +use crate::prelude::IsBoolMask; +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::MaskElement; +use std::simd::SupportedLaneCount; + +impl IsBoolMask for Mask +where + T: MaskElement, + LaneCount: SupportedLaneCount, +{ + fn all_true() -> Self { + Mask::from_array([true; BATCH]) + } + + fn all_false() -> Self { + Mask::from_array([false; BATCH]) + } + + fn all(&self) -> bool { + Mask::all(*self) + } + + fn any(&self) -> bool { + Mask::any(*self) + } + + fn count(&self) -> usize { + self.to_array().iter().filter(|x| **x).count() + } + + fn lanes(&self) -> usize { + BATCH + } +} diff --git a/crates/sophus_core/src/linalg/batch_matrix.rs b/crates/sophus_core/src/linalg/batch_matrix.rs new file mode 100644 index 0000000..cfc95d8 --- /dev/null +++ b/crates/sophus_core/src/linalg/batch_matrix.rs @@ -0,0 +1,148 @@ +use crate::calculus::dual::DualBatchMatrix; + +use crate::linalg::BatchMatF64; +use crate::linalg::BatchScalarF64; +use crate::linalg::BatchVecF64; + +use crate::prelude::*; + +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::SupportedLaneCount; + +impl + IsMatrix, ROWS, COLS, BATCH> for BatchMatF64 +where + LaneCount: SupportedLaneCount, +{ + fn from_scalar(val: BatchScalarF64) -> Self { + Self::from_element(val) + } + + fn from_real_matrix(val: Self) -> Self { + val + } + + fn real_matrix(&self) -> &Self { + self + } + + fn scaled(&self, v: BatchScalarF64) -> Self { + self * v + } + + fn identity() -> Self { + nalgebra::SMatrix::, ROWS, COLS>::identity() + } + + fn from_array2(vals: [[BatchScalarF64; COLS]; ROWS]) -> Self { + Self::from_fn(|r, c| vals[r][c]) + } + + fn from_real_scalar_array2(vals: [[BatchScalarF64; COLS]; ROWS]) -> Self { + Self::from_fn(|r, c| vals[r][c]) + } + + fn from_f64_array2(vals: [[f64; COLS]; ROWS]) -> Self { + Self::from_fn(|r, c| BatchScalarF64::::from_f64(vals[r][c])) + } + + fn get_elem(&self, idx: [usize; 2]) -> BatchScalarF64 { + self[(idx[0], idx[1])] + } + + fn block_mat2x1( + top_row: BatchMatF64, + bot_row: BatchMatF64, + ) -> Self { + Self::from_fn(|r, c| { + if r < R0 { + top_row[(r, c)] + } else { + bot_row[(r - R0, c)] + } + }) + } + + fn block_mat1x2( + left_col: BatchMatF64, + righ_col: BatchMatF64, + ) -> Self { + Self::from_fn(|r, c| { + if c < C0 { + left_col[(r, c)] + } else { + righ_col[(r, c - C0)] + } + }) + } + + fn block_mat2x2( + top_row: (BatchMatF64, BatchMatF64), + bot_row: (BatchMatF64, BatchMatF64), + ) -> Self { + Self::from_fn(|r, c| { + if r < R0 { + if c < C0 { + top_row.0[(r, c)] + } else { + top_row.1[(r, c - C0)] + } + } else if c < C0 { + bot_row.0[(r - R0, c)] + } else { + bot_row.1[(r - R0, c - C0)] + } + }) + } + + fn mat_mul( + &self, + other: BatchMatF64, + ) -> BatchMatF64 { + self * other + } + + fn set_col_vec(&mut self, c: usize, v: BatchVecF64) { + self.fixed_columns_mut::<1>(c).copy_from(&v); + } + + fn get_fixed_submat( + &self, + start_r: usize, + start_c: usize, + ) -> BatchMatF64 { + self.fixed_view::(start_r, start_c).into() + } + + fn get_col_vec(&self, c: usize) -> BatchVecF64 { + self.fixed_view::(0, c).into() + } + + fn get_row_vec(&self, r: usize) -> BatchVecF64 { + self.fixed_view::<1, COLS>(r, 0).transpose() + } + + fn from_f64(val: f64) -> Self { + Self::from_element(BatchScalarF64::::from_f64(val)) + } + + fn to_dual(self) -> as IsScalar>::DualMatrix { + DualBatchMatrix::from_real_matrix(self) + } + + fn select(self, mask: &Mask, other: Self) -> Self { + self.zip_map(&other, |a, b| a.select(mask, b)) + } + + fn set_elem(&mut self, idx: [usize; 2], val: BatchScalarF64) { + self[(idx[0], idx[1])] = val; + } +} + +impl + IsRealMatrix, ROWS, COLS, BATCH> for BatchMatF64 +where + LaneCount: SupportedLaneCount, +{ +} diff --git a/crates/sophus_core/src/linalg/batch_scalar.rs b/crates/sophus_core/src/linalg/batch_scalar.rs new file mode 100644 index 0000000..c2bdaad --- /dev/null +++ b/crates/sophus_core/src/linalg/batch_scalar.rs @@ -0,0 +1,318 @@ +use crate::calculus::dual::DualBatchMatrix; +use crate::calculus::dual::DualBatchScalar; +use crate::calculus::dual::DualBatchVector; +use crate::linalg::scalar::IsBatchScalar; +use crate::linalg::scalar::NumberCategory; +use crate::linalg::BatchMatF64; +use crate::linalg::BatchScalar; +use crate::linalg::BatchScalarF64; +use crate::linalg::BatchVecF64; +use crate::prelude::IsCoreScalar; +use crate::prelude::*; +use approx::AbsDiffEq; +use approx::RelativeEq; +use std::fmt::Debug; +use std::ops::AddAssign; +use std::ops::Div; +use std::ops::Mul; +use std::ops::MulAssign; +use std::ops::Neg; +use std::ops::Sub; +use std::ops::SubAssign; +use std::simd::cmp::SimdPartialOrd; +use std::simd::num::SimdFloat; +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::Simd; +use std::simd::SimdElement; +use std::simd::StdFloat; +use std::simd::SupportedLaneCount; + +impl IsCoreScalar + for BatchScalar +where + LaneCount: SupportedLaneCount, + Simd: SimdFloat, + BatchScalar: + Clone + Debug + nalgebra::Scalar + num_traits::Zero + std::ops::AddAssign, +{ + fn number_category() -> NumberCategory { + NumberCategory::Real + } +} + +impl AbsDiffEq for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + type Epsilon = f64; + + fn default_epsilon() -> f64 { + f64::default_epsilon() + } + + fn abs_diff_eq(&self, other: &Self, epsilon: f64) -> bool { + for i in 0..BATCH { + if !self.0[i].abs_diff_eq(&other.0[i], epsilon) { + return false; + } + } + true + } +} + +impl RelativeEq for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + fn default_max_relative() -> Self::Epsilon { + f64::default_max_relative() + } + + fn relative_eq( + &self, + other: &Self, + epsilon: Self::Epsilon, + max_relative: Self::Epsilon, + ) -> bool { + for i in 0..BATCH { + if !self.0[i].relative_eq(&other.0[i], epsilon, max_relative) { + return false; + } + } + true + } +} + +impl AddAssign for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } +} + +impl SubAssign for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + fn sub_assign(&mut self, rhs: Self) { + self.0 -= rhs.0; + } +} + +impl MulAssign for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + fn mul_assign(&mut self, rhs: Self) { + self.0 *= rhs.0; + } +} + +impl Neg for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + type Output = Self; + + fn neg(self) -> Self { + BatchScalarF64 { 0: -self.0 } + } +} + +impl Sub for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + type Output = Self; + + fn sub(self, rhs: Self) -> Self { + BatchScalarF64 { 0: self.0 - rhs.0 } + } +} + +impl Mul for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + type Output = Self; + + fn mul(self, rhs: Self) -> Self { + BatchScalarF64 { 0: self.0 * rhs.0 } + } +} + +impl Div for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + type Output = Self; + + fn div(self, rhs: Self) -> Self { + BatchScalarF64 { 0: self.0 / rhs.0 } + } +} + +impl num_traits::One for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + fn one() -> Self { + Self(Simd::::splat(1.0)) + } +} +impl IsRealScalar for BatchScalarF64 where + LaneCount: SupportedLaneCount +{ +} + +impl IsBatchScalar for BatchScalarF64 where + LaneCount: SupportedLaneCount +{ +} + +impl IsScalar for BatchScalarF64 +where + LaneCount: SupportedLaneCount, +{ + type Scalar = BatchScalarF64; + type RealScalar = Self; + type SingleScalar = f64; + type DualScalar = DualBatchScalar; + + type RealVector = BatchVecF64; + + type Vector = BatchVecF64; + type Matrix = BatchMatF64; + type RealMatrix = BatchMatF64; + + type Mask = Mask; + + fn less_equal(&self, rhs: &Self) -> Self::Mask { + self.0.simd_le(rhs.0) + } + + fn greater_equal(&self, rhs: &Self) -> Self::Mask { + self.0.simd_ge(rhs.0) + } + + fn scalar_examples() -> Vec> { + vec![ + BatchScalarF64::::from_f64(1.0), + BatchScalarF64::::from_f64(2.0), + BatchScalarF64::::from_f64(3.0), + ] + } + + fn extract_single(&self, i: usize) -> f64 { + self.0[i] + } + + fn from_real_scalar(val: BatchScalarF64) -> Self { + val + } + + fn real_part(&self) -> Self { + *self + } + + fn from_f64(val: f64) -> Self { + BatchScalarF64 { + 0: Simd::::splat(val), + } + } + + fn abs(self) -> Self { + BatchScalarF64 { + 0: SimdFloat::abs(self.0), + } + } + + fn from_real_array(arr: [f64; BATCH]) -> Self { + BatchScalarF64 { + 0: Simd::::from_array(arr), + } + } + + fn to_real_array(&self) -> [f64; BATCH] { + self.0.to_array() + } + + fn cos(self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::cos(self.0), + } + } + + fn sin(self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::sin(self.0), + } + } + + fn tan(self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::tan(self.0), + } + } + + fn acos(self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::acos(self.0), + } + } + + fn asin(self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::asin(self.0), + } + } + + fn atan(self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::atan(self.0), + } + } + + fn sqrt(self) -> Self { + BatchScalarF64 { 0: self.0.sqrt() } + } + + fn atan2(self, x: Self) -> Self { + BatchScalarF64 { + 0: sleef::Sleef::atan2(self.0, x.0), + } + } + + fn to_vec(self) -> Self::Vector<1> { + BatchVecF64::<1, BATCH>::from_scalar(self) + } + + fn fract(self) -> Self { + BatchScalarF64 { 0: self.0.fract() } + } + + fn floor(&self) -> BatchScalarF64 { + BatchScalarF64 { 0: self.0.floor() } + } + + fn signum(&self) -> Self { + BatchScalarF64 { 0: self.0.signum() } + } + + type DualVector = DualBatchVector; + + type DualMatrix = DualBatchMatrix; + + fn to_dual(self) -> Self::DualScalar { + DualBatchScalar::from_real_scalar(self) + } + + fn select(self, mask: &Self::Mask, other: Self) -> Self { + BatchScalarF64 { + 0: mask.select(self.0, other.0), + } + } +} diff --git a/crates/sophus_core/src/linalg/batch_vector.rs b/crates/sophus_core/src/linalg/batch_vector.rs new file mode 100644 index 0000000..8509356 --- /dev/null +++ b/crates/sophus_core/src/linalg/batch_vector.rs @@ -0,0 +1,119 @@ +use crate::calculus::dual::DualBatchVector; +use crate::linalg::BatchMatF64; +use crate::linalg::BatchScalarF64; +use crate::linalg::BatchVecF64; +use crate::prelude::*; +use std::simd::LaneCount; +use std::simd::Mask; +use std::simd::SupportedLaneCount; + +impl IsVector, ROWS, BATCH> + for BatchVecF64 +where + LaneCount: SupportedLaneCount, +{ + fn block_vec2( + top_row: BatchVecF64, + bot_row: BatchVecF64, + ) -> Self { + assert_eq!(ROWS, R0 + R1); + let mut m = Self::zeros(); + + m.fixed_view_mut::(0, 0).copy_from(&top_row); + m.fixed_view_mut::(R0, 0).copy_from(&bot_row); + m + } + + fn dot(self, rhs: Self) -> BatchScalarF64 { + (self.transpose() * rhs)[0] + } + + fn from_array(vals: [BatchScalarF64; ROWS]) -> Self { + Self::from_fn(|i, _| vals[i]) + } + + fn from_real_array(vals: [BatchScalarF64; ROWS]) -> Self { + Self::from_fn(|i, _| vals[i]) + } + + fn from_f64_array(vals: [f64; ROWS]) -> Self { + Self::from_fn(|i, _| BatchScalarF64::::from_f64(vals[i])) + } + + fn from_scalar_array(vals: [BatchScalarF64; ROWS]) -> Self { + Self::from_fn(|i, _| vals[i]) + } + + fn from_real_vector(val: BatchVecF64) -> Self { + val + } + + fn get_elem(&self, idx: usize) -> BatchScalarF64 { + self[idx] + } + + fn norm(&self) -> BatchScalarF64 { + self.squared_norm().sqrt() + } + + fn normalized(&self) -> Self { + let norm = self.norm(); + if norm == as IsScalar>::zeros() { + return *self; + } + let factor = BatchScalarF64::::ones() / norm; + self * factor + } + + fn real_vector(&self) -> &BatchVecF64 { + self + } + + fn scaled(&self, v: BatchScalarF64) -> Self { + self * v + } + + fn set_elem(&mut self, idx: usize, v: BatchScalarF64) { + self[idx] = v; + } + + fn squared_norm(&self) -> BatchScalarF64 { + let mut squared_norm = as IsScalar>::zeros(); + for i in 0..ROWS { + let val = IsVector::get_elem(self, i); + squared_norm += val * val; + } + squared_norm + } + + fn to_mat(self) -> BatchMatF64 { + self + } + + fn from_f64(val: f64) -> Self { + Self::from_element(BatchScalarF64::::from_f64(val)) + } + + fn to_dual(self) -> as IsScalar>::DualVector { + DualBatchVector::from_real_vector(self) + } + + fn outer(self, rhs: BatchVecF64) -> BatchMatF64 { + self * rhs.transpose() + } + + fn select(self, mask: &Mask, other: Self) -> Self { + self.zip_map(&other, |a, b| a.select(mask, b)) + } + + fn get_fixed_subvec(&self, start_r: usize) -> BatchVecF64 { + self.fixed_rows::(start_r).into() + } +} + +impl IsRealVector, ROWS, BATCH> + for BatchVecF64 +where + LaneCount: SupportedLaneCount, +{ +} diff --git a/crates/sophus_core/src/linalg/bool_mask.rs b/crates/sophus_core/src/linalg/bool_mask.rs index 9448909..0b8c7c3 100644 --- a/crates/sophus_core/src/linalg/bool_mask.rs +++ b/crates/sophus_core/src/linalg/bool_mask.rs @@ -1,7 +1,4 @@ -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::MaskElement; -use std::simd::SupportedLaneCount; + /// Boolean mask - generalization of boolean comparison to SIMDs pub trait IsBoolMask { @@ -51,33 +48,3 @@ impl IsBoolMask for bool { 1 } } - -impl IsBoolMask for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - fn all_true() -> Self { - Mask::from_array([true; BATCH]) - } - - fn all_false() -> Self { - Mask::from_array([false; BATCH]) - } - - fn all(&self) -> bool { - Mask::all(*self) - } - - fn any(&self) -> bool { - Mask::any(*self) - } - - fn count(&self) -> usize { - self.to_array().iter().filter(|x| **x).count() - } - - fn lanes(&self) -> usize { - BATCH - } -} diff --git a/crates/sophus_core/src/linalg/matrix.rs b/crates/sophus_core/src/linalg/matrix.rs index 6c7cd2f..61ec567 100644 --- a/crates/sophus_core/src/linalg/matrix.rs +++ b/crates/sophus_core/src/linalg/matrix.rs @@ -1,8 +1,4 @@ -use crate::calculus::dual::DualBatchMatrix; use crate::calculus::dual::DualMatrix; -use crate::linalg::BatchMatF64; -use crate::linalg::BatchScalarF64; -use crate::linalg::BatchVecF64; use crate::linalg::MatF64; use crate::linalg::VecF64; use crate::prelude::*; @@ -15,9 +11,6 @@ use std::ops::IndexMut; use std::ops::Mul; use std::ops::Neg; use std::ops::Sub; -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::SupportedLaneCount; /// Matrix trait /// - either a real (f64) or a dual number matrix @@ -303,140 +296,3 @@ impl IsMatrix for MatF self[(idx[0], idx[1])] = val; } } - -impl - IsMatrix, ROWS, COLS, BATCH> for BatchMatF64 -where - LaneCount: SupportedLaneCount, -{ - fn from_scalar(val: BatchScalarF64) -> Self { - Self::from_element(val) - } - - fn from_real_matrix(val: Self) -> Self { - val - } - - fn real_matrix(&self) -> &Self { - self - } - - fn scaled(&self, v: BatchScalarF64) -> Self { - self * v - } - - fn identity() -> Self { - nalgebra::SMatrix::, ROWS, COLS>::identity() - } - - fn from_array2(vals: [[BatchScalarF64; COLS]; ROWS]) -> Self { - Self::from_fn(|r, c| vals[r][c]) - } - - fn from_real_scalar_array2(vals: [[BatchScalarF64; COLS]; ROWS]) -> Self { - Self::from_fn(|r, c| vals[r][c]) - } - - fn from_f64_array2(vals: [[f64; COLS]; ROWS]) -> Self { - Self::from_fn(|r, c| BatchScalarF64::::from_f64(vals[r][c])) - } - - fn get_elem(&self, idx: [usize; 2]) -> BatchScalarF64 { - self[(idx[0], idx[1])] - } - - fn block_mat2x1( - top_row: BatchMatF64, - bot_row: BatchMatF64, - ) -> Self { - Self::from_fn(|r, c| { - if r < R0 { - top_row[(r, c)] - } else { - bot_row[(r - R0, c)] - } - }) - } - - fn block_mat1x2( - left_col: BatchMatF64, - righ_col: BatchMatF64, - ) -> Self { - Self::from_fn(|r, c| { - if c < C0 { - left_col[(r, c)] - } else { - righ_col[(r, c - C0)] - } - }) - } - - fn block_mat2x2( - top_row: (BatchMatF64, BatchMatF64), - bot_row: (BatchMatF64, BatchMatF64), - ) -> Self { - Self::from_fn(|r, c| { - if r < R0 { - if c < C0 { - top_row.0[(r, c)] - } else { - top_row.1[(r, c - C0)] - } - } else if c < C0 { - bot_row.0[(r - R0, c)] - } else { - bot_row.1[(r - R0, c - C0)] - } - }) - } - - fn mat_mul( - &self, - other: BatchMatF64, - ) -> BatchMatF64 { - self * other - } - - fn set_col_vec(&mut self, c: usize, v: BatchVecF64) { - self.fixed_columns_mut::<1>(c).copy_from(&v); - } - - fn get_fixed_submat( - &self, - start_r: usize, - start_c: usize, - ) -> BatchMatF64 { - self.fixed_view::(start_r, start_c).into() - } - - fn get_col_vec(&self, c: usize) -> BatchVecF64 { - self.fixed_view::(0, c).into() - } - - fn get_row_vec(&self, r: usize) -> BatchVecF64 { - self.fixed_view::<1, COLS>(r, 0).transpose() - } - - fn from_f64(val: f64) -> Self { - Self::from_element(BatchScalarF64::::from_f64(val)) - } - - fn to_dual(self) -> as IsScalar>::DualMatrix { - DualBatchMatrix::from_real_matrix(self) - } - - fn select(self, mask: &Mask, other: Self) -> Self { - self.zip_map(&other, |a, b| a.select(mask, b)) - } - - fn set_elem(&mut self, idx: [usize; 2], val: BatchScalarF64) { - self[(idx[0], idx[1])] = val; - } -} - -impl - IsRealMatrix, ROWS, COLS, BATCH> for BatchMatF64 -where - LaneCount: SupportedLaneCount, -{ -} diff --git a/crates/sophus_core/src/linalg/scalar.rs b/crates/sophus_core/src/linalg/scalar.rs index b9613f6..c6a1e93 100644 --- a/crates/sophus_core/src/linalg/scalar.rs +++ b/crates/sophus_core/src/linalg/scalar.rs @@ -1,13 +1,7 @@ -use crate::calculus::dual::DualBatchMatrix; -use crate::calculus::dual::DualBatchScalar; -use crate::calculus::dual::DualBatchVector; + use crate::calculus::dual::DualMatrix; use crate::calculus::dual::DualScalar; use crate::calculus::dual::DualVector; -use crate::linalg::BatchMatF64; -use crate::linalg::BatchScalar; -use crate::linalg::BatchScalarF64; -use crate::linalg::BatchVecF64; use crate::linalg::MatF64; use crate::linalg::VecF64; use crate::prelude::*; @@ -20,18 +14,10 @@ use std::ops::Add; use std::ops::AddAssign; use std::ops::Div; use std::ops::Mul; -use std::ops::MulAssign; use std::ops::Neg; use std::ops::Sub; use std::ops::SubAssign; -use std::simd::cmp::SimdPartialOrd; -use std::simd::num::SimdFloat; -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::Simd; -use std::simd::SimdElement; -use std::simd::StdFloat; -use std::simd::SupportedLaneCount; + /// Number category #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -73,18 +59,7 @@ def_is_tensor_scalar_single!(i64, NumberCategory::Signed); def_is_tensor_scalar_single!(f32, NumberCategory::Real); def_is_tensor_scalar_single!(f64, NumberCategory::Real); -impl IsCoreScalar - for BatchScalar -where - LaneCount: SupportedLaneCount, - Simd: SimdFloat, - BatchScalar: - Clone + Debug + nalgebra::Scalar + num_traits::Zero + std::ops::AddAssign, -{ - fn number_category() -> NumberCategory { - NumberCategory::Real - } -} + /// Scalar trait /// @@ -185,7 +160,7 @@ pub trait IsScalar: /// /// - If self is a single scalar, the array must have one element /// - If self is a batch scalar, the array must have BATCH_SIZE elements - /// - If self is a dual number, the infinitesimal part is set to zero + /// - If self is a dual number, the infinitesimal part is set to zero fn from_real_array(arr: [f64; BATCH_SIZE]) -> Self; /// creates scalar from real scalar @@ -426,282 +401,20 @@ impl IsSingleScalar for f64 { } } -impl AbsDiffEq for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - type Epsilon = f64; - fn default_epsilon() -> f64 { - f64::default_epsilon() - } - - fn abs_diff_eq(&self, other: &Self, epsilon: f64) -> bool { - for i in 0..BATCH { - if !self.0[i].abs_diff_eq(&other.0[i], epsilon) { - return false; - } - } - true - } -} - -impl RelativeEq for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - fn default_max_relative() -> Self::Epsilon { - f64::default_max_relative() - } - - fn relative_eq( - &self, - other: &Self, - epsilon: Self::Epsilon, - max_relative: Self::Epsilon, - ) -> bool { - for i in 0..BATCH { - if !self.0[i].relative_eq(&other.0[i], epsilon, max_relative) { - return false; - } - } - true - } -} - -impl AddAssign for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - fn add_assign(&mut self, rhs: Self) { - self.0 += rhs.0; - } -} - -impl SubAssign for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - fn sub_assign(&mut self, rhs: Self) { - self.0 -= rhs.0; - } -} - -impl MulAssign for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - fn mul_assign(&mut self, rhs: Self) { - self.0 *= rhs.0; - } -} - -impl Neg for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - type Output = Self; - - fn neg(self) -> Self { - BatchScalarF64 { 0: -self.0 } - } -} - -impl Sub for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - type Output = Self; - - fn sub(self, rhs: Self) -> Self { - BatchScalarF64 { 0: self.0 - rhs.0 } - } -} - -impl Mul for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - type Output = Self; - - fn mul(self, rhs: Self) -> Self { - BatchScalarF64 { 0: self.0 * rhs.0 } - } -} - -impl Div for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - type Output = Self; - - fn div(self, rhs: Self) -> Self { - BatchScalarF64 { 0: self.0 / rhs.0 } - } -} - -impl num_traits::One for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - fn one() -> Self { - Self(Simd::::splat(1.0)) - } -} -impl IsRealScalar for BatchScalarF64 where - LaneCount: SupportedLaneCount -{ -} - -impl IsBatchScalar for BatchScalarF64 where - LaneCount: SupportedLaneCount -{ -} - -impl IsScalar for BatchScalarF64 -where - LaneCount: SupportedLaneCount, -{ - type Scalar = BatchScalarF64; - type RealScalar = Self; - type SingleScalar = f64; - type DualScalar = DualBatchScalar; - - type RealVector = BatchVecF64; - - type Vector = BatchVecF64; - type Matrix = BatchMatF64; - type RealMatrix = BatchMatF64; - - type Mask = Mask; - - fn less_equal(&self, rhs: &Self) -> Self::Mask { - self.0.simd_le(rhs.0) - } - - fn greater_equal(&self, rhs: &Self) -> Self::Mask { - self.0.simd_ge(rhs.0) - } - - fn scalar_examples() -> Vec> { - vec![ - BatchScalarF64::::from_f64(1.0), - BatchScalarF64::::from_f64(2.0), - BatchScalarF64::::from_f64(3.0), - ] - } - - fn extract_single(&self, i: usize) -> f64 { - self.0[i] - } - - fn from_real_scalar(val: BatchScalarF64) -> Self { - val - } - - fn real_part(&self) -> Self { - *self - } - - fn from_f64(val: f64) -> Self { - BatchScalarF64 { - 0: Simd::::splat(val), - } - } - - fn abs(self) -> Self { - BatchScalarF64 { - 0: SimdFloat::abs(self.0), - } - } - - fn from_real_array(arr: [f64; BATCH]) -> Self { - BatchScalarF64 { - 0: Simd::::from_array(arr), - } - } - - fn to_real_array(&self) -> [f64; BATCH] { - self.0.to_array() - } - - fn cos(self) -> Self { - BatchScalarF64 { 0: self.0.cos() } - } - - fn sin(self) -> Self { - BatchScalarF64 { 0: self.0.sin() } - } - - fn tan(self) -> Self { - BatchScalarF64 { - 0: sleef::Sleef::tan(self.0), - } - } - - fn acos(self) -> Self { - BatchScalarF64 { - 0: sleef::Sleef::acos(self.0), - } - } - - fn asin(self) -> Self { - BatchScalarF64 { - 0: sleef::Sleef::asin(self.0), - } - } - - fn atan(self) -> Self { - BatchScalarF64 { - 0: sleef::Sleef::atan(self.0), - } - } - - fn sqrt(self) -> Self { - BatchScalarF64 { 0: self.0.sqrt() } - } - - fn atan2(self, x: Self) -> Self { - BatchScalarF64 { - 0: sleef::Sleef::atan2(self.0, x.0), - } - } - - fn to_vec(self) -> Self::Vector<1> { - BatchVecF64::<1, BATCH>::from_scalar(self) - } - - fn fract(self) -> Self { - BatchScalarF64 { 0: self.0.fract() } - } - - fn floor(&self) -> BatchScalarF64 { - BatchScalarF64 { 0: self.0.floor() } - } - - fn signum(&self) -> Self { - BatchScalarF64 { 0: self.0.signum() } - } - - type DualVector = DualBatchVector; - - type DualMatrix = DualBatchMatrix; - - fn to_dual(self) -> Self::DualScalar { - DualBatchScalar::from_real_scalar(self) - } - - fn select(self, mask: &Self::Mask, other: Self) -> Self { - BatchScalarF64 { - 0: mask.select(self.0, other.0), - } - } -} #[test] fn scalar_prop_tests() { + #[cfg(feature = "simd")] + use crate::linalg::BatchScalarF64; + + f64::test_suite(); + + #[cfg(feature = "simd")] BatchScalarF64::<2>::test_suite(); + #[cfg(feature = "simd")] BatchScalarF64::<4>::test_suite(); + #[cfg(feature = "simd")] BatchScalarF64::<8>::test_suite(); } diff --git a/crates/sophus_core/src/linalg/vector.rs b/crates/sophus_core/src/linalg/vector.rs index a2682d9..de7cb38 100644 --- a/crates/sophus_core/src/linalg/vector.rs +++ b/crates/sophus_core/src/linalg/vector.rs @@ -1,8 +1,4 @@ -use crate::calculus::dual::DualBatchVector; use crate::calculus::dual::DualVector; -use crate::linalg::BatchMatF64; -use crate::linalg::BatchScalarF64; -use crate::linalg::BatchVecF64; use crate::linalg::MatF64; use crate::linalg::VecF64; use crate::prelude::*; @@ -14,9 +10,6 @@ use std::ops::Index; use std::ops::IndexMut; use std::ops::Neg; use std::ops::Sub; -use std::simd::LaneCount; -use std::simd::Mask; -use std::simd::SupportedLaneCount; /// Vector - either a real (f64) or a dual number vector pub trait IsVector, const ROWS: usize, const BATCH_SIZE: usize>: @@ -253,114 +246,3 @@ pub fn cross, const BATCH: usize>( l0 * r1 - l1 * r0, ]) } - -impl IsVector, ROWS, BATCH> - for BatchVecF64 -where - LaneCount: SupportedLaneCount, -{ - fn block_vec2( - top_row: BatchVecF64, - bot_row: BatchVecF64, - ) -> Self { - assert_eq!(ROWS, R0 + R1); - let mut m = Self::zeros(); - - m.fixed_view_mut::(0, 0).copy_from(&top_row); - m.fixed_view_mut::(R0, 0).copy_from(&bot_row); - m - } - - fn dot(self, rhs: Self) -> BatchScalarF64 { - (self.transpose() * rhs)[0] - } - - fn from_array(vals: [BatchScalarF64; ROWS]) -> Self { - Self::from_fn(|i, _| vals[i]) - } - - fn from_real_array(vals: [BatchScalarF64; ROWS]) -> Self { - Self::from_fn(|i, _| vals[i]) - } - - fn from_f64_array(vals: [f64; ROWS]) -> Self { - Self::from_fn(|i, _| BatchScalarF64::::from_f64(vals[i])) - } - - fn from_scalar_array(vals: [BatchScalarF64; ROWS]) -> Self { - Self::from_fn(|i, _| vals[i]) - } - - fn from_real_vector(val: BatchVecF64) -> Self { - val - } - - fn get_elem(&self, idx: usize) -> BatchScalarF64 { - self[idx] - } - - fn norm(&self) -> BatchScalarF64 { - self.squared_norm().sqrt() - } - - fn normalized(&self) -> Self { - let norm = self.norm(); - if norm == as IsScalar>::zeros() { - return *self; - } - let factor = BatchScalarF64::::ones() / norm; - self * factor - } - - fn real_vector(&self) -> &BatchVecF64 { - self - } - - fn scaled(&self, v: BatchScalarF64) -> Self { - self * v - } - - fn set_elem(&mut self, idx: usize, v: BatchScalarF64) { - self[idx] = v; - } - - fn squared_norm(&self) -> BatchScalarF64 { - let mut squared_norm = as IsScalar>::zeros(); - for i in 0..ROWS { - let val = IsVector::get_elem(self, i); - squared_norm += val * val; - } - squared_norm - } - - fn to_mat(self) -> BatchMatF64 { - self - } - - fn from_f64(val: f64) -> Self { - Self::from_element(BatchScalarF64::::from_f64(val)) - } - - fn to_dual(self) -> as IsScalar>::DualVector { - DualBatchVector::from_real_vector(self) - } - - fn outer(self, rhs: BatchVecF64) -> BatchMatF64 { - self * rhs.transpose() - } - - fn select(self, mask: &Mask, other: Self) -> Self { - self.zip_map(&other, |a, b| a.select(mask, b)) - } - - fn get_fixed_subvec(&self, start_r: usize) -> BatchVecF64 { - self.fixed_rows::(start_r).into() - } -} - -impl IsRealVector, ROWS, BATCH> - for BatchVecF64 -where - LaneCount: SupportedLaneCount, -{ -} diff --git a/crates/sophus_core/src/tensor/element.rs b/crates/sophus_core/src/tensor/element.rs index 4ae17a4..4f22f6f 100644 --- a/crates/sophus_core/src/tensor/element.rs +++ b/crates/sophus_core/src/tensor/element.rs @@ -149,10 +149,15 @@ impl STensorFormat { #[test] fn test_elements() { + #[cfg(feature = "simd")] use crate::linalg::scalar::IsScalar; use crate::linalg::scalar::NumberCategory; + + #[cfg(feature = "simd")] use crate::linalg::BatchScalar; + #[cfg(feature = "simd")] use crate::linalg::BatchScalarF64; + #[cfg(feature = "simd")] use crate::linalg::BatchVecF64; use crate::linalg::VecF32; @@ -160,6 +165,7 @@ fn test_elements() { assert_eq!(f32::number_category(), NumberCategory::Real); assert_eq!(u32::number_category(), NumberCategory::Unsigned); assert_eq!(i32::number_category(), NumberCategory::Signed); + #[cfg(feature = "simd")] assert_eq!( BatchScalar::::number_category(), NumberCategory::Real @@ -180,8 +186,11 @@ fn test_elements() { assert_eq!(mat.scalar([1, 1]), &4.0); assert_abs_diff_eq!(mat, SMat::::new(1.0, 2.0, 3.0, 4.0)); - let batch_vec: BatchVecF64<2, 2> = - BatchVecF64::from_element(BatchScalarF64::from_real_array([1.0, 2.0])); - assert_eq!(batch_vec.scalar([0]).extract_single(0), 1.0); - assert_eq!(batch_vec.scalar([1]).extract_single(1), 2.0); + #[cfg(feature = "simd")] + { + let batch_vec: BatchVecF64<2, 2> = + BatchVecF64::from_element(BatchScalarF64::from_real_array([1.0, 2.0])); + assert_eq!(batch_vec.scalar([0]).extract_single(0), 1.0); + assert_eq!(batch_vec.scalar([1]).extract_single(1), 2.0); + } } diff --git a/crates/sophus_core/src/tensor/mut_tensor.rs b/crates/sophus_core/src/tensor/mut_tensor.rs index 39d6acc..222f586 100644 --- a/crates/sophus_core/src/tensor/mut_tensor.rs +++ b/crates/sophus_core/src/tensor/mut_tensor.rs @@ -438,6 +438,7 @@ mut_tensor_is_view!(5, 2, 3); #[test] fn mut_tensor_tests() { + #[cfg(feature = "simd")] use crate::linalg::BatchMatF64; { let _rank1_tensor = MutTensorD::::default(); @@ -514,6 +515,7 @@ fn mut_tensor_tests() { } //linalg + #[cfg(feature = "simd")] { let shape = [3]; diff --git a/crates/sophus_image/Cargo.toml b/crates/sophus_image/Cargo.toml index 141cea2..d559a4b 100644 --- a/crates/sophus_image/Cargo.toml +++ b/crates/sophus_image/Cargo.toml @@ -20,3 +20,6 @@ nalgebra.workspace = true ndarray.workspace = true num-traits.workspace = true png.workspace = true + +[features] +simd = ["sophus_core/simd"] diff --git a/crates/sophus_image/src/intensity_image/dyn_intensity_image.rs b/crates/sophus_image/src/intensity_image/dyn_intensity_image.rs index 46d1ac6..4001b5d 100644 --- a/crates/sophus_image/src/intensity_image/dyn_intensity_image.rs +++ b/crates/sophus_image/src/intensity_image/dyn_intensity_image.rs @@ -179,7 +179,7 @@ pub enum DynIntensityArcImage { RgbaF32(ArcImage4F32), } -/// Convert a GenMutImage to an GenArcImage +/// Convert a GenMutImage to an GenArcImage /// impl From for DynIntensityArcImage { fn from(image: DynIntensityMutImage) -> Self { diff --git a/crates/sophus_image/src/lib.rs b/crates/sophus_image/src/lib.rs index f237429..9333cd4 100644 --- a/crates/sophus_image/src/lib.rs +++ b/crates/sophus_image/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] #![deny(missing_docs)] //! image crate - part of the sophus-rs project diff --git a/crates/sophus_lie/Cargo.toml b/crates/sophus_lie/Cargo.toml index 11bc3c6..c0888fb 100644 --- a/crates/sophus_lie/Cargo.toml +++ b/crates/sophus_lie/Cargo.toml @@ -11,9 +11,14 @@ repository.workspace = true version.workspace = true [dependencies] -sophus_core.workspace = true approx.workspace = true assertables.workspace = true nalgebra.workspace = true num-traits.workspace = true + +sophus_core = {workspace=true} + + +[features] +simd = ["sophus_core/simd"] diff --git a/crates/sophus_lie/src/factor_lie_group.rs b/crates/sophus_lie/src/factor_lie_group.rs index 9dab293..955ebdc 100644 --- a/crates/sophus_lie/src/factor_lie_group.rs +++ b/crates/sophus_lie/src/factor_lie_group.rs @@ -4,12 +4,18 @@ use crate::traits::IsRealLieFactorGroupImpl; use crate::Rotation2; use crate::Rotation3; use approx::assert_relative_eq; -use sophus_core::calculus::dual::DualBatchScalar; use sophus_core::calculus::dual::DualScalar; use sophus_core::calculus::maps::MatrixValuedMapFromVector; -use sophus_core::linalg::BatchScalarF64; use sophus_core::manifold::traits::TangentImpl; + +#[cfg(feature = "simd")] +use sophus_core::calculus::dual::DualBatchScalar; + +#[cfg(feature = "simd")] +use sophus_core::linalg::BatchScalarF64; + + impl< S: IsRealScalar, const DOF: usize, @@ -154,6 +160,7 @@ macro_rules! def_real_group_test_template { } def_real_group_test_template!(f64, DualScalar, Rotation2, Rotation2, 1); +#[cfg(feature = "simd")] def_real_group_test_template!( BatchScalarF64<8>, DualBatchScalar<8>, @@ -163,6 +170,7 @@ def_real_group_test_template!( ); def_real_group_test_template!(f64, DualScalar, Rotation3, Rotation3, 1); +#[cfg(feature = "simd")] def_real_group_test_template!( BatchScalarF64<8>, DualBatchScalar<8>, diff --git a/crates/sophus_lie/src/groups/isometry2.rs b/crates/sophus_lie/src/groups/isometry2.rs index 61825da..7a21157 100644 --- a/crates/sophus_lie/src/groups/isometry2.rs +++ b/crates/sophus_lie/src/groups/isometry2.rs @@ -34,15 +34,21 @@ impl, const BATCH: usize> Isometry2 { #[test] fn isometry2_prop_tests() { use crate::real_lie_group::RealLieGroupTest; - use sophus_core::calculus::dual::dual_scalar::DualBatchScalar; use sophus_core::calculus::dual::dual_scalar::DualScalar; + + #[cfg(feature = "simd")] + use sophus_core::calculus::dual::dual_batch_scalar::DualBatchScalar; + #[cfg(feature = "simd")] use sophus_core::linalg::BatchScalarF64; Isometry2::::test_suite(); + #[cfg(feature = "simd")] Isometry2::, 8>::test_suite(); Isometry2::::test_suite(); + #[cfg(feature = "simd")] Isometry2::, 8>::test_suite(); Isometry2::::run_real_tests(); + #[cfg(feature = "simd")] Isometry2::, 8>::run_real_tests(); } diff --git a/crates/sophus_lie/src/groups/isometry3.rs b/crates/sophus_lie/src/groups/isometry3.rs index e14cf50..4a650e6 100644 --- a/crates/sophus_lie/src/groups/isometry3.rs +++ b/crates/sophus_lie/src/groups/isometry3.rs @@ -33,15 +33,21 @@ impl, const BATCH: usize> Isometry3 { #[test] fn isometry3_prop_tests() { use crate::real_lie_group::RealLieGroupTest; - use sophus_core::calculus::dual::dual_scalar::DualBatchScalar; use sophus_core::calculus::dual::dual_scalar::DualScalar; + + #[cfg(feature = "simd")] + use sophus_core::calculus::dual::DualBatchScalar; + #[cfg(feature = "simd")] use sophus_core::linalg::BatchScalarF64; Isometry3::::test_suite(); + #[cfg(feature = "simd")] Isometry3::, 8>::test_suite(); Isometry3::::test_suite(); + #[cfg(feature = "simd")] Isometry3::, 8>::test_suite(); Isometry3::::run_real_tests(); + #[cfg(feature = "simd")] Isometry3::, 8>::run_real_tests(); } diff --git a/crates/sophus_lie/src/groups/rotation2.rs b/crates/sophus_lie/src/groups/rotation2.rs index 7ca396c..ea3c617 100644 --- a/crates/sophus_lie/src/groups/rotation2.rs +++ b/crates/sophus_lie/src/groups/rotation2.rs @@ -298,18 +298,28 @@ impl, const BATCH_SIZE: usize> fn rotation2_prop_tests() { use crate::factor_lie_group::RealFactorLieGroupTest; use crate::real_lie_group::RealLieGroupTest; - use sophus_core::calculus::dual::dual_scalar::DualBatchScalar; use sophus_core::calculus::dual::dual_scalar::DualScalar; + + + #[cfg(feature = "simd")] + use sophus_core::calculus::dual::DualBatchScalar; + + #[cfg(feature = "simd")] use sophus_core::linalg::BatchScalarF64; Rotation2::::test_suite(); + #[cfg(feature = "simd")] Rotation2::, 8>::test_suite(); + Rotation2::::test_suite(); + #[cfg(feature = "simd")] Rotation2::, 8>::test_suite(); Rotation2::::run_real_tests(); + #[cfg(feature = "simd")] Rotation2::, 8>::run_real_tests(); Rotation2::::run_real_factor_tests(); + #[cfg(feature = "simd")] Rotation2::, 8>::run_real_factor_tests(); } diff --git a/crates/sophus_lie/src/groups/rotation3.rs b/crates/sophus_lie/src/groups/rotation3.rs index 5554b76..bd65171 100644 --- a/crates/sophus_lie/src/groups/rotation3.rs +++ b/crates/sophus_lie/src/groups/rotation3.rs @@ -603,18 +603,24 @@ pub type Rotation3 = LieGroup::test_suite(); + #[cfg(feature = "simd")] Rotation3::, 8>::test_suite(); Rotation3::::test_suite(); + #[cfg(feature = "simd")] Rotation3::, 8>::test_suite(); Rotation3::::run_real_tests(); + #[cfg(feature = "simd")] Rotation3::, 8>::run_real_tests(); Rotation3::::run_real_factor_tests(); + #[cfg(feature = "simd")] Rotation3::, 8>::run_real_factor_tests(); } diff --git a/crates/sophus_lie/src/lib.rs b/crates/sophus_lie/src/lib.rs index 8e6e5fa..38df283 100644 --- a/crates/sophus_lie/src/lib.rs +++ b/crates/sophus_lie/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] #![deny(missing_docs)] #![allow(clippy::needless_range_loop)] //! Lie groups crate - part of the sophus-rs project diff --git a/crates/sophus_lie/src/real_lie_group.rs b/crates/sophus_lie/src/real_lie_group.rs index 2b7dbfb..13fa9e1 100644 --- a/crates/sophus_lie/src/real_lie_group.rs +++ b/crates/sophus_lie/src/real_lie_group.rs @@ -8,12 +8,17 @@ use crate::Rotation2; use crate::Rotation3; use approx::assert_relative_eq; use nalgebra::SVector; -use sophus_core::calculus::dual::DualBatchScalar; use sophus_core::calculus::dual::DualScalar; use sophus_core::calculus::maps::MatrixValuedMapFromVector; use sophus_core::calculus::maps::VectorValuedMapFromMatrix; use sophus_core::calculus::maps::VectorValuedMapFromVector; + +#[cfg(feature = "simd")] +use sophus_core::calculus::dual::dual_batch_scalar::DualBatchScalar; +#[cfg(feature = "simd")] use sophus_core::linalg::BatchScalarF64; + + use std::fmt::Display; use std::fmt::Formatter; @@ -513,6 +518,7 @@ macro_rules! def_real_group_test_template { } def_real_group_test_template!(f64, DualScalar, Rotation2, Rotation2, 1); +#[cfg(feature = "simd")] def_real_group_test_template!( BatchScalarF64<8>, DualBatchScalar<8>, @@ -522,6 +528,7 @@ def_real_group_test_template!( ); def_real_group_test_template!(f64, DualScalar, Isometry2, Isometry2, 1); +#[cfg(feature = "simd")] def_real_group_test_template!( BatchScalarF64<8>, DualBatchScalar<8>, @@ -531,6 +538,7 @@ def_real_group_test_template!( ); def_real_group_test_template!(f64, DualScalar, Rotation3, Rotation3, 1); +#[cfg(feature = "simd")] def_real_group_test_template!( BatchScalarF64<8>, DualBatchScalar<8>, @@ -540,6 +548,7 @@ def_real_group_test_template!( ); def_real_group_test_template!(f64, DualScalar, Isometry3, Isometry3, 1); +#[cfg(feature = "simd")] def_real_group_test_template!( BatchScalarF64<8>, DualBatchScalar<8>, diff --git a/crates/sophus_opt/Cargo.toml b/crates/sophus_opt/Cargo.toml index 0db3d4c..5cda437 100644 --- a/crates/sophus_opt/Cargo.toml +++ b/crates/sophus_opt/Cargo.toml @@ -24,3 +24,11 @@ faer.workspace = true nalgebra.workspace = true ndarray.workspace = true rand.workspace = true + +[features] +simd = [ + "sophus_core/simd", + "sophus_lie/simd", + "sophus_sensor/simd", + "sophus_image/simd", +] diff --git a/crates/sophus_opt/src/cost_fn.rs b/crates/sophus_opt/src/cost_fn.rs index 9aa4d6e..7b3e0ea 100644 --- a/crates/sophus_opt/src/cost_fn.rs +++ b/crates/sophus_opt/src/cost_fn.rs @@ -23,7 +23,7 @@ pub trait IsTermSignature { fn idx_ref(&self) -> &[usize; N]; } -/// Signature of a cost function +/// Signature of a cost function #[derive(Debug, Clone)] pub struct CostSignature< const NUM_ARGS: usize, diff --git a/crates/sophus_opt/src/lib.rs b/crates/sophus_opt/src/lib.rs index 2396f0a..a7b9a08 100644 --- a/crates/sophus_opt/src/lib.rs +++ b/crates/sophus_opt/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] #![deny(missing_docs)] #![allow(clippy::needless_range_loop)] diff --git a/crates/sophus_opt/src/variables.rs b/crates/sophus_opt/src/variables.rs index 3c9e546..8df0fb9 100644 --- a/crates/sophus_opt/src/variables.rs +++ b/crates/sophus_opt/src/variables.rs @@ -277,7 +277,7 @@ impl IsVarFamily for VarFamily { assert_eq!( self.start_indices.len(), 0, - "Ths function must ony called once" + "This function must only called once" ); match self.get_var_kind() { diff --git a/crates/sophus_pyo3/src/pyo3/lie_groups.rs b/crates/sophus_pyo3/src/pyo3/lie_groups.rs index e45c193..9455176 100644 --- a/crates/sophus_pyo3/src/pyo3/lie_groups.rs +++ b/crates/sophus_pyo3/src/pyo3/lie_groups.rs @@ -404,14 +404,14 @@ macro_rules! augment_py_product_group_class { augment_py_product_group_class!( PyBaseIsometry2, - PyIsometry2, - Isometry2, + PyIsometry2, + Isometry2, PyRotation2, - "Isometry2", + "Isometry2", 2); augment_py_product_group_class!( PyBaseIsometry3, PyIsometry3, - Isometry3, - PyRotation3, + Isometry3, + PyRotation3, "Isometry3",3); diff --git a/crates/sophus_sensor/Cargo.toml b/crates/sophus_sensor/Cargo.toml index aab90c6..a6fe699 100644 --- a/crates/sophus_sensor/Cargo.toml +++ b/crates/sophus_sensor/Cargo.toml @@ -19,3 +19,6 @@ assertables.workspace = true nalgebra.workspace = true ndarray.workspace = true num-traits.workspace = true + +[features] +simd = ["sophus_core/simd", "sophus_image/simd"] diff --git a/crates/sophus_sensor/src/lib.rs b/crates/sophus_sensor/src/lib.rs index cea326c..867f71e 100644 --- a/crates/sophus_sensor/src/lib.rs +++ b/crates/sophus_sensor/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] #![deny(missing_docs)] //! Sensor (aka camera) crate - part of the sophus-rs project diff --git a/crates/sophus_viewer/Cargo.toml b/crates/sophus_viewer/Cargo.toml index 26bbc60..7c3b160 100644 --- a/crates/sophus_viewer/Cargo.toml +++ b/crates/sophus_viewer/Cargo.toml @@ -28,3 +28,11 @@ nalgebra.workspace = true ndarray.workspace = true tokio.workspace = true wgpu.workspace = true + +[features] +simd = [ + "sophus_core/simd", + "sophus_lie/simd", + "sophus_sensor/simd", + "sophus_image/simd", +] diff --git a/crates/sophus_viewer/src/lib.rs b/crates/sophus_viewer/src/lib.rs index 7d779ec..9438296 100644 --- a/crates/sophus_viewer/src/lib.rs +++ b/crates/sophus_viewer/src/lib.rs @@ -1,4 +1,4 @@ -#![feature(portable_simd)] +#![cfg_attr(feature = "simd", feature(portable_simd))] #![deny(missing_docs)] #![allow(clippy::needless_range_loop)] diff --git a/crates/sophus_viewer/src/pixel_renderer.rs b/crates/sophus_viewer/src/pixel_renderer.rs index 386a735..92a8512 100644 --- a/crates/sophus_viewer/src/pixel_renderer.rs +++ b/crates/sophus_viewer/src/pixel_renderer.rs @@ -34,14 +34,6 @@ struct OrthoCam { dummy1: f32, } -/// 2D vertex -#[repr(C)] -#[derive(Clone, Copy, Pod, Zeroable)] -pub struct Vertex2 { - _pos: [f32; 2], - _color: [f32; 4], -} - /// 2D line vertex #[repr(C)] #[derive(Clone, Copy, Pod, Zeroable)] diff --git a/crates/sophus_viewer/src/pixel_renderer/line_pixel_shader.wgsl b/crates/sophus_viewer/src/pixel_renderer/line_pixel_shader.wgsl index 8bb9244..50b8b0b 100644 --- a/crates/sophus_viewer/src/pixel_renderer/line_pixel_shader.wgsl +++ b/crates/sophus_viewer/src/pixel_renderer/line_pixel_shader.wgsl @@ -45,8 +45,8 @@ fn vs_main( } out.position = vec4(2.0 * (p.x+0.5) / uniforms.width_height.x - 1.0, - 2.0 - 2.0*(p.y+0.5) / uniforms.width_height.y - 1.0, - 0.0, + 2.0 - 2.0*(p.y+0.5) / uniforms.width_height.y - 1.0, + 0.0, 1.0); out.color = color; diff --git a/crates/sophus_viewer/src/pixel_renderer/point_pixel_shader.wgsl b/crates/sophus_viewer/src/pixel_renderer/point_pixel_shader.wgsl index 7626d22..bd1af7f 100644 --- a/crates/sophus_viewer/src/pixel_renderer/point_pixel_shader.wgsl +++ b/crates/sophus_viewer/src/pixel_renderer/point_pixel_shader.wgsl @@ -40,8 +40,8 @@ fn vs_main( } out.position = vec4(2.0 * (u+0.5) / uniforms.width_height.x - 1.0, - 2.0 - 2.0*(v+0.5) / uniforms.width_height.y - 1.0, - 0.0, + 2.0 - 2.0*(v+0.5) / uniforms.width_height.y - 1.0, + 0.0, 1.0); out.color = color; diff --git a/crates/sophus_viewer/src/scene_renderer/line_scene_shader.wgsl b/crates/sophus_viewer/src/scene_renderer/line_scene_shader.wgsl index b867f51..310096b 100644 --- a/crates/sophus_viewer/src/scene_renderer/line_scene_shader.wgsl +++ b/crates/sophus_viewer/src/scene_renderer/line_scene_shader.wgsl @@ -63,7 +63,7 @@ fn vs_main( uv = uv0 - line_half_width * n; z = uv_z0.z; } - + // map point from pixel coordinates (Computer Vision convention) to clip space coordinates (WebGPU convention) out.position = pixel_and_z_to_clip(uv, z, frustum_uniforms); diff --git a/crates/sophus_viewer/src/scene_renderer/textured_mesh_scene_shader.wgsl b/crates/sophus_viewer/src/scene_renderer/textured_mesh_scene_shader.wgsl index fdb8022..e3ac696 100644 --- a/crates/sophus_viewer/src/scene_renderer/textured_mesh_scene_shader.wgsl +++ b/crates/sophus_viewer/src/scene_renderer/textured_mesh_scene_shader.wgsl @@ -16,17 +16,17 @@ var lut_uniform: DistortionLut; var distortion_texture: texture_2d; @group(2) @binding(0) -var mesh_texture: texture_2d; +var mesh_texture: texture_2d; @group(2) @binding(1) -var mesh_texture_sampler: sampler; +var mesh_texture_sampler: sampler; @vertex fn vs_main( @location(0) position: vec3, - @location(1) texCoords: vec2 + @location(1) texCoords: vec2 ) -> VertexOut { var out: VertexOut; var uv_z = scene_point_to_distorted(position, view_uniform, frustum_uniforms, lut_uniform); @@ -37,7 +37,7 @@ fn vs_main( @fragment fn fs_main(in: VertexOut) -> @location(0) vec4 { - return textureSample(mesh_texture, mesh_texture_sampler, in.texCoords); + return textureSample(mesh_texture, mesh_texture_sampler, in.texCoords); // return vec4(1.0, 0.0, 0.0, 1.0); } diff --git a/crates/sophus_viewer/src/scene_renderer/utils.wgsl b/crates/sophus_viewer/src/scene_renderer/utils.wgsl index 83c31fe..6ce9c45 100644 --- a/crates/sophus_viewer/src/scene_renderer/utils.wgsl +++ b/crates/sophus_viewer/src/scene_renderer/utils.wgsl @@ -45,12 +45,12 @@ fn z1_plane_to_distorted(point_in_proj: vec3, frustum: Frustum, lut: Distor var lut_offset_y = lut.lut_offset_y; var lut_range_x = lut.lut_range_x; var lut_range_y = lut.lut_range_y; - + let u = clamp((width - 1.0) * (point_in_proj.x -lut_offset_x) / lut_range_x, 0.0, width - 1.00001); let v = clamp((height - 1.0) * (point_in_proj.y -lut_offset_y) / lut_range_y, 0.0, height - 1.00001); // Manual implementation of bilinear interpolation. - // This is to workaround apparent limitations of wgpu - such as no/limited support for + // This is to workaround apparent limitations of wgpu - such as no/limited support for // sampling of f32 textures and sampling in the vertex shader. // TDDO: Figure out how to use sampling in vertex shader or maybe undistort in fragment shader // (first render pinhole image to texture, then undistort in fragment shader). @@ -81,17 +81,15 @@ fn scene_point_to_distorted(scene_point: vec3, // map point from pixel coordinates (Computer Vision convention) to clip space coordinates (WebGPU convention) fn pixel_and_z_to_clip(uv_z: vec2, z: f32, frustum: Frustum) -> vec4 { var width = frustum.width; - var height = frustum.height; + var height = frustum.height; var near = frustum.near; var far = frustum.far; var u = uv_z.x; var v = uv_z.y; - - return vec4(2.0 * (u / width - 0.5), - -2.0 * (v / height - 0.5), + + return vec4(2.0 * (u / width - 0.5), + -2.0 * (v / height - 0.5), // todo: Check whether the z value is correct. (far * (z - near)) / (z * (far - near)), 1.0); } - - diff --git a/rust-toolchain.toml b/rust-toolchain.toml deleted file mode 100644 index 5d56faf..0000000 --- a/rust-toolchain.toml +++ /dev/null @@ -1,2 +0,0 @@ -[toolchain] -channel = "nightly" diff --git a/rustfmt.toml b/rustfmt.toml index 74c8af6..63df91e 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1 +1 @@ -imports_granularity = "Item" \ No newline at end of file +imports_granularity = "Item" diff --git a/sophus-rs.code-workspace b/sophus-rs.code-workspace index 0fc68ee..adbf56c 100644 --- a/sophus-rs.code-workspace +++ b/sophus-rs.code-workspace @@ -13,5 +13,7 @@ "rust-analyzer.imports.granularity.group": "item", "rust-analyzer.imports.prefix": "crate", "rust-analyzer.cargo.unsetTest": [], + "rust-analyzer.cargo.features": [ + ], } }