From bd33677fa4765aa5e55e7780559ae120899d2a0d Mon Sep 17 00:00:00 2001 From: Geolm Date: Fri, 19 Jan 2024 14:49:45 -0500 Subject: [PATCH] added simd_log2 function and test --- extra/simd_math.c | 33 +++++++++++++++++++++++++++++++++ extra/simd_math.h | 3 +++ simd.h | 8 +++++++- tests/test_simd_math.h | 5 +++-- 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/extra/simd_math.c b/extra/simd_math.c index 729a350..1a67435 100644 --- a/extra/simd_math.c +++ b/extra/simd_math.c @@ -98,6 +98,39 @@ simd_vector simd_log(simd_vector x) return x; } +//---------------------------------------------------------------------------------------------------------------------- +// based on https://github.com/redorav/hlslpp/blob/master/include/hlsl%2B%2B_vector_float8.h +simd_vector simd_log2(simd_vector x) +{ + simd_vector one = simd_splat(1.f); + simd_vectori exp = simd_splat_i(0x7f800000); + simd_vectori mant = simd_splat_i(0x007fffff); + simd_vectori i = simd_cast_from_float(x); + simd_vector e = simd_convert_from_int(simd_sub_i(simd_shift_right_i(simd_and_i(i, exp), 23), simd_splat_i(127))); + simd_vector m = simd_or(simd_cast_from_int(simd_and_i(i, mant)), one); + + // minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + simd_vector p = simd_fmad(m, simd_splat(-3.4436006e-2f), simd_splat(3.1821337e-1f)); + p = simd_fmad(m, p, simd_splat(-1.2315303f)); + p = simd_fmad(m, p, simd_splat(2.5988452f)); + p = simd_fmad(m, p, simd_splat(-3.3241990f)); + p = simd_fmad(m, p, simd_splat(3.1157899f)); + + // this effectively increases the polynomial degree by one, but ensures that log2(1) == 0 + p = simd_mul(p, simd_sub(m, one)); + simd_vector result = simd_add(p, e); + + // we can't compute a logarithm beyond this value, so we'll mark it as -infinity to indicate close to 0 + simd_vector ltminus127 = simd_cmp_le(result, simd_splat(-127.f)); + result = simd_select(result, simd_splat_negative_infinity(), ltminus127); + + // Check for negative values and return NaN + simd_vector lt0 = simd_cmp_lt(x, simd_splat_zero()); + result = simd_select(result, simd_splat_nan(), lt0); + + return result; +} + //---------------------------------------------------------------------------------------------------------------------- // based on http://gruntthepeon.free.fr/ssemath/ simd_vector simd_exp(simd_vector x) diff --git a/extra/simd_math.h b/extra/simd_math.h index 5bbbb89..66b653b 100644 --- a/extra/simd_math.h +++ b/extra/simd_math.h @@ -39,6 +39,9 @@ simd_vector simd_atan2(simd_vector x, simd_vector y); // max error : 4.768371582e-07 simd_vector simd_log(simd_vector x); +// max error : 2.349250963e-07 +simd_vector simd_log2(simd_vector x); + // max error : 1.108270880e-07 simd_vector simd_exp(simd_vector x); diff --git a/simd.h b/simd.h index 8168a7a..4e40549 100644 --- a/simd.h +++ b/simd.h @@ -65,9 +65,12 @@ static inline simd_vector simd_select(simd_vector a, simd_vector b, simd_vector static inline simd_vector simd_reverse(simd_vector a) {return __builtin_shufflevector(a, a, 3, 2, 1, 0);} static inline simd_vector simd_splat(float value) {return vdupq_n_f32(value);} static inline simd_vector simd_splat_zero(void) {return vdupq_n_f32(0);} +static inline simd_vector simd_splat_nan(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0xffffffff));} +static inline simd_vector simd_splat_positive_infinity(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x7f800000));} +static inline simd_vector simd_splat_negative_infinity(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0xff800000));} static inline simd_vector simd_sign_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x80000000));} static inline simd_vector simd_inv_sign_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(~0x80000000));} -static inline simd_vector simd_abs_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x7FFFFFFF));} +static inline simd_vector simd_abs_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x7fffffff));} static inline simd_vector simd_min_normalized(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x00800000));} // the smallest non denormalized float number static inline simd_vector simd_inv_mant_mask(void){return vreinterpretq_u32_f32(vdupq_n_u32(~0x7f800000));} static inline simd_vector simd_mant_mask(void){return vreinterpretq_u32_f32(vdupq_n_u32(0x7f800000));} @@ -334,6 +337,9 @@ static inline simd_vector simd_select(simd_vector a, simd_vector b, simd_vector static inline simd_vector simd_reverse(simd_vector a) {return _mm256_permute_ps(_mm256_swap(a), _MM_SHUFFLE(0, 1, 2, 3));} static inline simd_vector simd_splat(float value) {return _mm256_set1_ps(value);} static inline simd_vector simd_splat_zero(void) {return _mm256_setzero_ps();} +static inline simd_vector simd_splat_nan(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0xffffffff));} +static inline simd_vector simd_splat_positive_infinity(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0x7f800000));} +static inline simd_vector simd_splat_negative_infinity(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0xff800000));} static inline simd_vector simd_sign_mask(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));} static inline simd_vector simd_inv_sign_mask(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(~0x80000000));} static inline simd_vector simd_min_normalized(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0x00800000));} // the smallest non denormalized float number diff --git a/tests/test_simd_math.h b/tests/test_simd_math.h index 8fc5476..0f3d905 100644 --- a/tests/test_simd_math.h +++ b/tests/test_simd_math.h @@ -154,10 +154,11 @@ SUITE(trigonometry) SUITE(exponentiation) { printf("."); - RUN_TESTp(generic_test, logf, simd_log, FLT_EPSILON, 1000.f, 1.e-06f, false, "simd_log"); + RUN_TESTp(generic_test, logf, simd_log, FLT_EPSILON, 1000.f, 1.e-06f, true, "simd_log"); + RUN_TESTp(generic_test, log2f, simd_log2, FLT_EPSILON, 1.e20f, 3.e-07f, true, "simd_log2"); RUN_TESTp(generic_test, expf, simd_exp, -87.f, 87.f, 1.e-06f, true, "simd_exp"); RUN_TESTp(generic_test, expf, simd_approx_exp, -87.f, 87.f, 2.e-03f, true, "simd_approx_exp"); - RUN_TESTp(generic_test, cbrtf, simd_cbrt, -100.f, 100.f, 5.e-07f, false, "simd_cbrt"); + RUN_TESTp(generic_test, cbrtf, simd_cbrt, -100.f, 100.f, 2.e-07f, true, "simd_cbrt"); } SUITE(color_space)