Skip to content

Commit

Permalink
Merge pull request #261 from ebassi/simd-round
Browse files Browse the repository at this point in the history
Add rounding operators for graphene_simd4f_t
  • Loading branch information
ebassi authored Jan 23, 2024
2 parents 5b31962 + 0959b6f commit d03fb3d
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 0 deletions.
2 changes: 2 additions & 0 deletions doc/graphene-sections.txt
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ graphene_simd4f_is_zero4
graphene_simd4f_is_zero3
graphene_simd4f_is_zero2
graphene_simd4f_interpolate
graphene_simd4f_ceil
graphene_simd4f_floor
<SUBSECTION Private>
graphene_simd4f_union_t
graphene_simd4i_union_t
Expand Down
127 changes: 127 additions & 0 deletions include/graphene-simd4f.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ bool graphene_simd4f_cmp_gt (const graphene_simd4f_t
GRAPHENE_AVAILABLE_IN_1_0
graphene_simd4f_t graphene_simd4f_neg (const graphene_simd4f_t s);

GRAPHENE_AVAILABLE_IN_1_12
graphene_simd4f_t graphene_simd4f_ceil (const graphene_simd4f_t s);
GRAPHENE_AVAILABLE_IN_1_12
graphene_simd4f_t graphene_simd4f_floor (const graphene_simd4f_t s);

#if !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_SSE)

/* SSE2 implementation of SIMD 4f */
Expand Down Expand Up @@ -471,6 +476,34 @@ typedef GRAPHENE_ALIGN16 union {
(graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \
}))

# if defined(GRAPHENE_USE_SSE4_1)
# define graphene_simd4f_ceil(s) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_ceil_ps ((s)); \
}))
# define graphene_simd4f_floor(s) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_floor_ps ((s)); \
}))
# else
# define graphene_simd4f_ceil(s) \
(__extension__ ({ \
const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
}))
# define graphene_simd4f_floor(s) \
(__extension__ ({ \
const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
}))
# endif

/* On MSVC, we use static inlines */
# elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */

Expand Down Expand Up @@ -771,6 +804,37 @@ _simd4f_neg (const graphene_simd4f_t s)
return _mm_xor_ps (s, _mm_load_ps (__mask.f));
}

#define graphene_simd4f_ceil(s) _simd4f_ceil(s)
#define graphene_simd4f_floor(s) _simd4f_floor(s)

static inline graphene_simd4f_t
_simd4f_ceil (const graphene_simd4f_t s)
{
#if defined(GRAPHENE_USE_SSE4_1)
return _mm_ceil_ps (s);
#else
const float __ceil_x = ceilf (graphene_simd4f_get_x (s));
const float __ceil_y = ceilf (graphene_simd4f_get_y (s));
const float __ceil_z = ceilf (graphene_simd4f_get_z (s));
const float __ceil_w = ceilf (graphene_simd4f_get_w (s));
return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w);
#endif
}

static inline graphene_simd4f_t
_simd4f_floor (const graphene_simd4f_t s)
{
#if defined(GRAPHENE_USE_SSE4_1)
return _mm_floor_ps (s);
#else
const float __floor_x = floorf (graphene_simd4f_get_x (s));
const float __floor_y = floorf (graphene_simd4f_get_y (s));
const float __floor_z = floorf (graphene_simd4f_get_z (s));
const float __floor_w = floorf (graphene_simd4f_get_w (s));
return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w);
#endif
}

#else /* SSE intrinsics-not GCC or Visual Studio */

# error "Need GCC-compatible or Visual Studio compiler for SSE extensions."
Expand Down Expand Up @@ -1076,6 +1140,24 @@ typedef int graphene_simd4i_t __attribute__((vector_size (16)));
graphene_simd4f_mul (__s, __minus_one); \
}))

# define graphene_simd4f_ceil(s) \
(__extension__ ({ \
const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
}))

# define graphene_simd4f_floor(s) \
(__extension__ ({ \
const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
}))

#elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_ARM_NEON)

/* ARM Neon implementation of SIMD4f */
Expand Down Expand Up @@ -1398,6 +1480,24 @@ typedef float32x2_t graphene_simd2f_t;
(graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); \
}))

# define graphene_simd4f_ceil(s) \
(__extension__ ({ \
const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
}))

# define graphene_simd4f_floor(s) \
(__extension__ ({ \
const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
(graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
}))

#elif defined _MSC_VER /* Visual Studio ARM */

# define graphene_simd4f_init(x,y,z,w) _simd4f_init(x,y,z,w)
Expand Down Expand Up @@ -1717,6 +1817,29 @@ _simd4f_neg (const graphene_simd4f_t s)
return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask));
}

# define graphene_simd4f_ceil(s) _simd4f_ceil(s)
# define graphene_simd4f_floor(s) _simd4f_floor(s)

static inline graphene_simd4f_t
_simd4f_ceil (const graphene_simd4f_t s)
{
const float __ceil_x = ceilf (graphene_simd4f_get_x (s));
const float __ceil_y = ceilf (graphene_simd4f_get_y (s));
const float __ceil_z = ceilf (graphene_simd4f_get_z (s));
const float __ceil_w = ceilf (graphene_simd4f_get_w (s));
return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w);
}

static inline graphene_simd4f_t
_simd4f_floor (const graphene_simd4f_t s)
{
const float __floor_x = floorf (graphene_simd4f_get_x (s));
const float __floor_y = floorf (graphene_simd4f_get_y (s));
const float __floor_z = floorf (graphene_simd4f_get_z (s));
const float __floor_w = floorf (graphene_simd4f_get_w (s));
return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w);
}

#else /* ARM NEON intrinsics-not GCC or Visual Studio */

# error "Need GCC-compatible or Visual Studio compiler for ARM NEON extensions."
Expand Down Expand Up @@ -1829,6 +1952,10 @@ _simd4f_neg (const graphene_simd4f_t s)
(graphene_simd4f_cmp_gt ((a), (b)))
#define graphene_simd4f_neg(s) \
(graphene_simd4f_neg ((s)))
#define graphene_simd4f_ceil(s) \
(graphene_simd4f_ceil ((s)))
#define graphene_simd4f_floor(s) \
(graphene_simd4f_floor ((s)))

#else
# error "Unsupported simd4f implementation."
Expand Down
44 changes: 44 additions & 0 deletions src/graphene-simd4f.c
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,38 @@ graphene_simd4f_t
return graphene_simd4f_neg (s);
}

/**
* graphene_simd4f_ceil:
* @s: a #graphene_simd4f_t
*
* Rounds each component of the vector @s up to the nearest integer value.
*
* Returns: the rounded up vector
*
* Since: 1.12
*/
graphene_simd4f_t
(graphene_simd4f_ceil) (const graphene_simd4f_t s)
{
return graphene_simd4f_ceil (s);
}

/**
* graphene_simd4f_floor:
* @s: a #graphene_simd4f_t
*
* Rounds each component of the vector @s down to the nearest integer value.
*
* Returns: the rounded down vector
*
* Since: 1.12
*/
graphene_simd4f_t
(graphene_simd4f_floor) (const graphene_simd4f_t s)
{
return graphene_simd4f_floor (s);
}

#else /* GRAPHENE_USE_SCALAR */

graphene_simd4f_t
Expand Down Expand Up @@ -1472,4 +1504,16 @@ graphene_simd4f_t
return graphene_simd4f_init (-s.x, -s.y, -s.z, -s.w);
}

graphene_simd4f_t
(graphene_simd4f_ceil) (const graphene_simd4f_t s)
{
return graphene_simd4f_init (ceilf (s.x), ceilf (s.y), ceilf (s.z), ceilf (s.w));
}

graphene_simd4f_t
(graphene_simd4f_floor) (const graphene_simd4f_t s)
{
return graphene_simd4f_init (floorf (s.x), floorf (s.y), floorf (s.z), floorf (s.w));
}

#endif /* GRAPHENE_USE_SCALAR */
33 changes: 33 additions & 0 deletions tests/simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,36 @@ simd_operators_reciprocal (void)
NULL);
}

static void
simd_operators_ceil (void)
{
graphene_simd4f_t a, b, check;

check = graphene_simd4f_init (2.0f, 3.0f, 4.0f, 5.0f);
a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f);
b = graphene_simd4f_ceil (a);

mutest_expect ("ceil() to round up to the nearest integer",
mutest_bool_value (graphene_simd4f_cmp_eq (b, check)),
mutest_to_be_true,
NULL);
}

static void
simd_operators_floor (void)
{
graphene_simd4f_t a, b, check;

check = graphene_simd4f_init (1.0f, 2.0f, 3.0f, 4.0f);
a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f);
b = graphene_simd4f_floor (a);

mutest_expect ("floor() to round down to the nearest integer",
mutest_bool_value (graphene_simd4f_cmp_eq (b, check)),
mutest_to_be_true,
NULL);
}

static void
simd_suite (void)
{
Expand All @@ -339,6 +369,9 @@ simd_suite (void)
mutest_it ("can compute the maximum vector and scalar", simd_operators_max);

mutest_it ("can compute the reciprocal of vector", simd_operators_reciprocal);

mutest_it ("can round up vector components", simd_operators_ceil);
mutest_it ("can round down vector components", simd_operators_floor);
}

MUTEST_MAIN (
Expand Down

0 comments on commit d03fb3d

Please sign in to comment.