Skip to content

Commit

Permalink
neon version of simd_frexp, simd_ldexp. ldexp test doesn't pass yet
Browse files Browse the repository at this point in the history
  • Loading branch information
Geolm committed Jan 15, 2024
1 parent 7323c27 commit 0a8fb06
Showing 1 changed file with 16 additions and 22 deletions.
38 changes: 16 additions & 22 deletions simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,34 +254,28 @@ static inline void simd_export_uint8(simd_vector a, simd_vector b, simd_vector c
//----------------------------------------------------------------------------------------------------------------------
static inline simd_vector simd_frexp(simd_vector x, simd_vector* exponent)
{
/*__m256i cast_float = _mm256_castps_si256(x);
__m256i e = _mm256_and_si256(_mm256_srli_epi32(cast_float, 23), _mm256_set1_epi32(0xff));;
__m256i equal_to_zero = _mm256_and_si256(_mm256_cmpeq_epi32(e, _mm256_setzero_si256()), simd_cmp_eq(x, simd_splat_zero()));
e = _mm256_andnot_si256(equal_to_zero, _mm256_sub_epi32(e, _mm256_set1_epi32(0x7e)));
cast_float = _mm256_and_si256(cast_float, _mm256_set1_epi32(0x807fffff));
cast_float = _mm256_or_si256(cast_float, _mm256_set1_epi32(0x3f000000));
*exponent = _mm256_cvtepi32_ps(e);
return simd_select(_mm256_castsi256_ps(cast_float), x, equal_to_zero);*/
(void)x;
(void)exponent;
return simd_splat_zero();
int32x4_t cast_float = vreinterpretq_s32_f32(x);
int32x4_t e = vandq_s32(vshlq_s32(cast_float, vdupq_n_s32(-23)), vdupq_n_s32(0xff));
int32x4_t equal_to_zero = vandq_s32(vceqq_s32(e, vdupq_n_s32(0)), vreinterpretq_s32_f32(vceqq_f32(x, simd_splat_zero())));
e = vbicq_s32(vsubq_s32(e, vdupq_n_s32(0x7e)) ,equal_to_zero);
cast_float = vandq_s32(cast_float, vdupq_n_s32(0x807fffff));
cast_float = vorrq_s32(cast_float, vdupq_n_s32(0x3f000000));
*exponent = vcvtq_f32_s32(e);
return simd_select(vreinterpretq_f32_s32(cast_float), x, equal_to_zero);
}

//----------------------------------------------------------------------------------------------------------------------
static inline simd_vector simd_ldexp(simd_vector x, simd_vector pw2)
{
/*__m256i fl = _mm256_castps_si256(x);
__m256i e = _mm256_and_si256(_mm256_srli_epi32(fl, 23), _mm256_set1_epi32(0xff));
e = _mm256_and_si256(_mm256_add_epi32(e, _mm256_cvtps_epi32(pw2)), _mm256_set1_epi32(0xff));
__m256i is_infinity = _mm256_cmpeq_epi32(e, _mm256_set1_epi32(0xff));
fl = _mm256_or_si256(_mm256_andnot_si256(is_infinity, fl), _mm256_and_si256(fl, _mm256_set1_epi32(0xFF800000)));
fl = _mm256_or_si256(_mm256_slli_epi32(e, 23), _mm256_and_si256(fl, _mm256_set1_epi32(0x807fffff)));
int32x4_t twofivefive = vdupq_n_s32(0xff);
int32x4_t fl = vreinterpretq_s32_f32(x);
int32x4_t e = vandq_s32(vshlq_s32(fl, vdupq_n_s32(-23)), twofivefive);
e = vandq_s32(vaddq_s32(e, vcvtq_s32_f32(pw2)), twofivefive);
int32x4_t is_infinity = vceqq_s32(e, twofivefive);
fl = vorrq_s32(vbicq_s32(fl, is_infinity), vandq_s32(fl, vdupq_n_s32(0xFF800000)));
fl = vorrq_s32(vshlq_s32(e, vdupq_n_s32(23)), vandq_s32(fl, vdupq_n_s32(0x807fffff)));
simd_vector equal_to_zero = simd_cmp_eq(x, simd_splat_zero());
return simd_andnot(_mm256_castsi256_ps(fl), equal_to_zero);*/

(void)x;
(void)pw2;
return simd_splat_zero();
return simd_andnot(vreinterpretq_f32_s32(fl), equal_to_zero);
}

#else
Expand Down

0 comments on commit 0a8fb06

Please sign in to comment.