From b2a656b9739a8d5dcb6351f427cf731eb89e3fd2 Mon Sep 17 00:00:00 2001 From: Thomas Gazzola Date: Wed, 28 Jul 2021 16:46:26 -0700 Subject: [PATCH 1/2] Kahan 2x2 determinant computation reduces numerical imprecision. --- src/fixedSizeSquareMatrixOpsImpl.hpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/fixedSizeSquareMatrixOpsImpl.hpp b/src/fixedSizeSquareMatrixOpsImpl.hpp index f006d866..2afa8367 100644 --- a/src/fixedSizeSquareMatrixOpsImpl.hpp +++ b/src/fixedSizeSquareMatrixOpsImpl.hpp @@ -88,7 +88,18 @@ struct SquareMatrixOps< 2 > static auto determinant( MATRIX const & matrix ) { checkSizes< 2, 2 >( matrix ); - return matrix[ 0 ][ 0 ] * matrix[ 1 ][ 1 ] - matrix[ 0 ][ 1 ] * matrix[ 1 ][ 0 ]; + + // Aliases for matrix [[a, b],[c, d]] for improved readability + auto const & a = matrix[0][0]; + auto const & b = matrix[0][1]; + auto const & c = matrix[1][0]; + auto const & d = matrix[1][1]; + + // Using the more precise Kahan method to compute the 2x2 determinant. + auto const w = b * c; + auto const e = fma( -b, c, w ); + auto const f = fma( a, d, -w ); + return f + e; } /** From 96b1434af55c1f650a035a890a9625acdfd83f42 Mon Sep 17 00:00:00 2001 From: Thomas Gazzola Date: Thu, 29 Jul 2021 14:49:59 -0700 Subject: [PATCH 2/2] Defining the fma function in LvArray::math. I've built an integer dummy fallback in order to prevent integers to be cast to double (I had issues). --- src/fixedSizeSquareMatrixOpsImpl.hpp | 12 ++------ src/math.hpp | 42 +++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/fixedSizeSquareMatrixOpsImpl.hpp b/src/fixedSizeSquareMatrixOpsImpl.hpp index 2afa8367..21d86e0d 100644 --- a/src/fixedSizeSquareMatrixOpsImpl.hpp +++ b/src/fixedSizeSquareMatrixOpsImpl.hpp @@ -89,17 +89,9 @@ struct SquareMatrixOps< 2 > { checkSizes< 2, 2 >( matrix ); - // Aliases for matrix [[a, b],[c, d]] for improved readability - auto const & a = matrix[0][0]; - auto const & b = matrix[0][1]; - auto const & c = matrix[1][0]; - auto const & d = matrix[1][1]; - // Using the more precise Kahan method to compute the 2x2 determinant. - auto const w = b * c; - auto const e = fma( -b, c, w ); - auto const f = fma( a, d, -w ); - return f + e; + auto const tmp = matrix[0][1] * matrix[1][0]; + return math::fma( -matrix[0][1], matrix[1][0], tmp ) + math::fma( matrix[0][0], matrix[1][1], -tmp ); } /** diff --git a/src/math.hpp b/src/math.hpp index f832e0fa..faea4f18 100644 --- a/src/math.hpp +++ b/src/math.hpp @@ -28,7 +28,7 @@ namespace LvArray { /** - * @brief Contains protable wrappers around cmath functions and some cuda specific functions. + * @brief Contains portable wrappers around cmath functions and some cuda specific functions. */ namespace math { @@ -317,6 +317,46 @@ max( T const a, T const b ) #endif } +/** + * @return Returns @p x * @p y + @p z. + * @tparam T A floating point type. + * @param x The first number. + * @param y The second number. + * @param z The third number. + * @note fma stands for fused multiply add. + * + * The function computes the result without losing precision in any intermediate result. + * Integer arguments cast to double. + * We want to avoid that by providing a version dedicated to integers. + */ +template< typename T > +LVARRAY_HOST_DEVICE inline constexpr +std::enable_if_t< std::is_floating_point< T >::value, T > +fma( T const x, T const y, T const z ) +{ +#if defined(__CUDA_ARCH__) + return ::fma( x, y, z ); +#else + return std::fma( x, y, z ); +#endif +} + +/** + * @return Returns @p x * @p y + @p z. + * @tparam T A floating point type. + * @param x The first number. + * @param y The second number. + * @param z The third number. + * @note This is a dummy implementation for integers (in order to not cast to doubles). + */ +template< typename T > +LVARRAY_HOST_DEVICE inline constexpr +std::enable_if_t< std::is_integral< T >::value, T > +fma( T const x, T const y, T const z ) +{ + return x * y + z; +} + #if defined( LVARRAY_USE_CUDA ) /// @copydoc max( T, T )