Skip to content

Commit

Permalink
added simd_log2 function and test
Browse files Browse the repository at this point in the history
  • Loading branch information
Geolm committed Jan 19, 2024
1 parent 8bddb42 commit bd33677
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
33 changes: 33 additions & 0 deletions extra/simd_math.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,39 @@ simd_vector simd_log(simd_vector x)
return x;
}

//----------------------------------------------------------------------------------------------------------------------
// based on https://github.com/redorav/hlslpp/blob/master/include/hlsl%2B%2B_vector_float8.h
simd_vector simd_log2(simd_vector x)
{
simd_vector one = simd_splat(1.f);
simd_vectori exp = simd_splat_i(0x7f800000);
simd_vectori mant = simd_splat_i(0x007fffff);
simd_vectori i = simd_cast_from_float(x);
simd_vector e = simd_convert_from_int(simd_sub_i(simd_shift_right_i(simd_and_i(i, exp), 23), simd_splat_i(127)));
simd_vector m = simd_or(simd_cast_from_int(simd_and_i(i, mant)), one);

// minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
simd_vector p = simd_fmad(m, simd_splat(-3.4436006e-2f), simd_splat(3.1821337e-1f));
p = simd_fmad(m, p, simd_splat(-1.2315303f));
p = simd_fmad(m, p, simd_splat(2.5988452f));
p = simd_fmad(m, p, simd_splat(-3.3241990f));
p = simd_fmad(m, p, simd_splat(3.1157899f));

// this effectively increases the polynomial degree by one, but ensures that log2(1) == 0
p = simd_mul(p, simd_sub(m, one));
simd_vector result = simd_add(p, e);

// we can't compute a logarithm beyond this value, so we'll mark it as -infinity to indicate close to 0
simd_vector ltminus127 = simd_cmp_le(result, simd_splat(-127.f));
result = simd_select(result, simd_splat_negative_infinity(), ltminus127);

// Check for negative values and return NaN
simd_vector lt0 = simd_cmp_lt(x, simd_splat_zero());
result = simd_select(result, simd_splat_nan(), lt0);

return result;
}

//----------------------------------------------------------------------------------------------------------------------
// based on http://gruntthepeon.free.fr/ssemath/
simd_vector simd_exp(simd_vector x)
Expand Down
3 changes: 3 additions & 0 deletions extra/simd_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ simd_vector simd_atan2(simd_vector x, simd_vector y);
// max error : 4.768371582e-07
simd_vector simd_log(simd_vector x);

// max error : 2.349250963e-07
simd_vector simd_log2(simd_vector x);

// max error : 1.108270880e-07
simd_vector simd_exp(simd_vector x);

Expand Down
8 changes: 7 additions & 1 deletion simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@ static inline simd_vector simd_select(simd_vector a, simd_vector b, simd_vector
static inline simd_vector simd_reverse(simd_vector a) {return __builtin_shufflevector(a, a, 3, 2, 1, 0);}
static inline simd_vector simd_splat(float value) {return vdupq_n_f32(value);}
static inline simd_vector simd_splat_zero(void) {return vdupq_n_f32(0);}
static inline simd_vector simd_splat_nan(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0xffffffff));}
static inline simd_vector simd_splat_positive_infinity(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x7f800000));}
static inline simd_vector simd_splat_negative_infinity(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0xff800000));}
static inline simd_vector simd_sign_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x80000000));}
static inline simd_vector simd_inv_sign_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(~0x80000000));}
static inline simd_vector simd_abs_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x7FFFFFFF));}
static inline simd_vector simd_abs_mask(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x7fffffff));}
static inline simd_vector simd_min_normalized(void) {return vreinterpretq_u32_f32(vdupq_n_u32(0x00800000));} // the smallest non denormalized float number
static inline simd_vector simd_inv_mant_mask(void){return vreinterpretq_u32_f32(vdupq_n_u32(~0x7f800000));}
static inline simd_vector simd_mant_mask(void){return vreinterpretq_u32_f32(vdupq_n_u32(0x7f800000));}
Expand Down Expand Up @@ -334,6 +337,9 @@ static inline simd_vector simd_select(simd_vector a, simd_vector b, simd_vector
static inline simd_vector simd_reverse(simd_vector a) {return _mm256_permute_ps(_mm256_swap(a), _MM_SHUFFLE(0, 1, 2, 3));}
static inline simd_vector simd_splat(float value) {return _mm256_set1_ps(value);}
static inline simd_vector simd_splat_zero(void) {return _mm256_setzero_ps();}
static inline simd_vector simd_splat_nan(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0xffffffff));}
static inline simd_vector simd_splat_positive_infinity(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0x7f800000));}
static inline simd_vector simd_splat_negative_infinity(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0xff800000));}
static inline simd_vector simd_sign_mask(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));}
static inline simd_vector simd_inv_sign_mask(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(~0x80000000));}
static inline simd_vector simd_min_normalized(void) {return _mm256_castsi256_ps(_mm256_set1_epi32(0x00800000));} // the smallest non denormalized float number
Expand Down
5 changes: 3 additions & 2 deletions tests/test_simd_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,11 @@ SUITE(trigonometry)
SUITE(exponentiation)
{
printf(".");
RUN_TESTp(generic_test, logf, simd_log, FLT_EPSILON, 1000.f, 1.e-06f, false, "simd_log");
RUN_TESTp(generic_test, logf, simd_log, FLT_EPSILON, 1000.f, 1.e-06f, true, "simd_log");
RUN_TESTp(generic_test, log2f, simd_log2, FLT_EPSILON, 1.e20f, 3.e-07f, true, "simd_log2");
RUN_TESTp(generic_test, expf, simd_exp, -87.f, 87.f, 1.e-06f, true, "simd_exp");
RUN_TESTp(generic_test, expf, simd_approx_exp, -87.f, 87.f, 2.e-03f, true, "simd_approx_exp");
RUN_TESTp(generic_test, cbrtf, simd_cbrt, -100.f, 100.f, 5.e-07f, false, "simd_cbrt");
RUN_TESTp(generic_test, cbrtf, simd_cbrt, -100.f, 100.f, 2.e-07f, true, "simd_cbrt");
}

SUITE(color_space)
Expand Down

0 comments on commit bd33677

Please sign in to comment.