Merge pull request #261 from ebassi/simd-round

Add rounding operators for graphene_simd4f_t
ebassi · Jan 23, 2024 · d03fb3d · d03fb3d
2 parents 5b31962 + 0959b6f
commit d03fb3d
Show file tree

Hide file tree

Showing 4 changed files with 206 additions and 0 deletions.
diff --git a/doc/graphene-sections.txt b/doc/graphene-sections.txt
@@ -439,6 +439,8 @@ graphene_simd4f_is_zero4
 graphene_simd4f_is_zero3
 graphene_simd4f_is_zero2
 graphene_simd4f_interpolate
+graphene_simd4f_ceil
+graphene_simd4f_floor
 <SUBSECTION Private>
 graphene_simd4f_union_t
 graphene_simd4i_union_t

diff --git a/include/graphene-simd4f.h b/include/graphene-simd4f.h
@@ -174,6 +174,11 @@ bool                    graphene_simd4f_cmp_gt          (const graphene_simd4f_t
 GRAPHENE_AVAILABLE_IN_1_0
 graphene_simd4f_t       graphene_simd4f_neg             (const graphene_simd4f_t s);
 
+GRAPHENE_AVAILABLE_IN_1_12
+graphene_simd4f_t       graphene_simd4f_ceil            (const graphene_simd4f_t s);
+GRAPHENE_AVAILABLE_IN_1_12
+graphene_simd4f_t       graphene_simd4f_floor           (const graphene_simd4f_t s);
+
 #if !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_SSE)
 
 /* SSE2 implementation of SIMD 4f */
@@ -471,6 +476,34 @@ typedef GRAPHENE_ALIGN16 union {
     (graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \
   }))
 
+#  if defined(GRAPHENE_USE_SSE4_1)
+#   define graphene_simd4f_ceil(s) \
+  (__extension__ ({ \
+    (graphene_simd4f_t) _mm_ceil_ps ((s)); \
+  }))
+#   define graphene_simd4f_floor(s) \
+  (__extension__ ({ \
+    (graphene_simd4f_t) _mm_floor_ps ((s)); \
+  }))
+#  else
+#   define graphene_simd4f_ceil(s) \
+  (__extension__ ({ \
+    const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
+    const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
+    const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
+    const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
+    (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
+  }))
+#   define graphene_simd4f_floor(s) \
+  (__extension__ ({ \
+    const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
+    const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
+    const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
+    const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
+    (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
+  }))
+#  endif
+
 /* On MSVC, we use static inlines */
 # elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */
 
@@ -771,6 +804,37 @@ _simd4f_neg (const graphene_simd4f_t s)
   return _mm_xor_ps (s, _mm_load_ps (__mask.f));
 }
 
+#define graphene_simd4f_ceil(s) _simd4f_ceil(s)
+#define graphene_simd4f_floor(s) _simd4f_floor(s)
+
+static inline graphene_simd4f_t
+_simd4f_ceil (const graphene_simd4f_t s)
+{
+#if defined(GRAPHENE_USE_SSE4_1)
+  return _mm_ceil_ps (s);
+#else
+  const float __ceil_x = ceilf (graphene_simd4f_get_x (s));
+  const float __ceil_y = ceilf (graphene_simd4f_get_y (s));
+  const float __ceil_z = ceilf (graphene_simd4f_get_z (s));
+  const float __ceil_w = ceilf (graphene_simd4f_get_w (s));
+  return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w);
+#endif
+}
+
+static inline graphene_simd4f_t
+_simd4f_floor (const graphene_simd4f_t s)
+{
+#if defined(GRAPHENE_USE_SSE4_1)
+  return _mm_floor_ps (s);
+#else
+  const float __floor_x = floorf (graphene_simd4f_get_x (s));
+  const float __floor_y = floorf (graphene_simd4f_get_y (s));
+  const float __floor_z = floorf (graphene_simd4f_get_z (s));
+  const float __floor_w = floorf (graphene_simd4f_get_w (s));
+  return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w);
+#endif
+}
+
 #else /* SSE intrinsics-not GCC or Visual Studio */
 
 #  error "Need GCC-compatible or Visual Studio compiler for SSE extensions."
@@ -1076,6 +1140,24 @@ typedef int graphene_simd4i_t __attribute__((vector_size (16)));
     graphene_simd4f_mul (__s, __minus_one); \
   }))
 
+# define graphene_simd4f_ceil(s) \
+  (__extension__ ({ \
+    const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
+    const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
+    const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
+    const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
+    (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
+  }))
+
+# define graphene_simd4f_floor(s) \
+  (__extension__ ({ \
+    const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
+    const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
+    const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
+    const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
+    (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
+  }))
+
 #elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_ARM_NEON)
 
 /* ARM Neon implementation of SIMD4f */
@@ -1398,6 +1480,24 @@ typedef float32x2_t graphene_simd2f_t;
     (graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); \
   }))
 
+# define graphene_simd4f_ceil(s) \
+  (__extension__ ({ \
+    const float __ceil_x = ceilf (graphene_simd4f_get_x ((s))); \
+    const float __ceil_y = ceilf (graphene_simd4f_get_y ((s))); \
+    const float __ceil_z = ceilf (graphene_simd4f_get_z ((s))); \
+    const float __ceil_w = ceilf (graphene_simd4f_get_w ((s))); \
+    (graphene_simd4f_t) graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w); \
+  }))
+
+# define graphene_simd4f_floor(s) \
+  (__extension__ ({ \
+    const float __floor_x = floorf (graphene_simd4f_get_x ((s))); \
+    const float __floor_y = floorf (graphene_simd4f_get_y ((s))); \
+    const float __floor_z = floorf (graphene_simd4f_get_z ((s))); \
+    const float __floor_w = floorf (graphene_simd4f_get_w ((s))); \
+    (graphene_simd4f_t) graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w); \
+  }))
+
 #elif defined _MSC_VER /* Visual Studio ARM */
 
 # define graphene_simd4f_init(x,y,z,w) _simd4f_init(x,y,z,w)
@@ -1717,6 +1817,29 @@ _simd4f_neg (const graphene_simd4f_t s)
   return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask));
 }
 
+# define graphene_simd4f_ceil(s) _simd4f_ceil(s)
+# define graphene_simd4f_floor(s) _simd4f_floor(s)
+
+static inline graphene_simd4f_t
+_simd4f_ceil (const graphene_simd4f_t s)
+{
+  const float __ceil_x = ceilf (graphene_simd4f_get_x (s));
+  const float __ceil_y = ceilf (graphene_simd4f_get_y (s));
+  const float __ceil_z = ceilf (graphene_simd4f_get_z (s));
+  const float __ceil_w = ceilf (graphene_simd4f_get_w (s));
+  return graphene_simd4f_init (__ceil_x, __ceil_y, __ceil_z, __ceil_w);
+}
+
+static inline graphene_simd4f_t
+_simd4f_floor (const graphene_simd4f_t s)
+{
+  const float __floor_x = floorf (graphene_simd4f_get_x (s));
+  const float __floor_y = floorf (graphene_simd4f_get_y (s));
+  const float __floor_z = floorf (graphene_simd4f_get_z (s));
+  const float __floor_w = floorf (graphene_simd4f_get_w (s));
+  return graphene_simd4f_init (__floor_x, __floor_y, __floor_z, __floor_w);
+}
+
 #else /* ARM NEON intrinsics-not GCC or Visual Studio */
 
 #  error "Need GCC-compatible or Visual Studio compiler for ARM NEON extensions."
@@ -1829,6 +1952,10 @@ _simd4f_neg (const graphene_simd4f_t s)
   (graphene_simd4f_cmp_gt ((a), (b)))
 #define graphene_simd4f_neg(s) \
   (graphene_simd4f_neg ((s)))
+#define graphene_simd4f_ceil(s) \
+  (graphene_simd4f_ceil ((s)))
+#define graphene_simd4f_floor(s) \
+  (graphene_simd4f_floor ((s)))
 
 #else
 # error "Unsupported simd4f implementation."

diff --git a/src/graphene-simd4f.c b/src/graphene-simd4f.c
@@ -1041,6 +1041,38 @@ graphene_simd4f_t
   return graphene_simd4f_neg (s);
 }
 
+/**
+ * graphene_simd4f_ceil:
+ * @s: a #graphene_simd4f_t
+ *
+ * Rounds each component of the vector @s up to the nearest integer value.
+ *
+ * Returns: the rounded up vector
+ *
+ * Since: 1.12
+ */
+graphene_simd4f_t
+(graphene_simd4f_ceil) (const graphene_simd4f_t s)
+{
+  return graphene_simd4f_ceil (s);
+}
+
+/**
+ * graphene_simd4f_floor:
+ * @s: a #graphene_simd4f_t
+ *
+ * Rounds each component of the vector @s down to the nearest integer value.
+ *
+ * Returns: the rounded down vector
+ *
+ * Since: 1.12
+ */
+graphene_simd4f_t
+(graphene_simd4f_floor) (const graphene_simd4f_t s)
+{
+  return graphene_simd4f_floor (s);
+}
+
 #else /* GRAPHENE_USE_SCALAR */
 
 graphene_simd4f_t
@@ -1472,4 +1504,16 @@ graphene_simd4f_t
   return graphene_simd4f_init (-s.x, -s.y, -s.z, -s.w);
 }
 
+graphene_simd4f_t
+(graphene_simd4f_ceil) (const graphene_simd4f_t s)
+{
+  return graphene_simd4f_init (ceilf (s.x), ceilf (s.y), ceilf (s.z), ceilf (s.w));
+}
+
+graphene_simd4f_t
+(graphene_simd4f_floor) (const graphene_simd4f_t s)
+{
+  return graphene_simd4f_init (floorf (s.x), floorf (s.y), floorf (s.z), floorf (s.w));
+}
+
 #endif /* GRAPHENE_USE_SCALAR */
diff --git a/tests/simd.c b/tests/simd.c
@@ -320,6 +320,36 @@ simd_operators_reciprocal (void)
                  NULL);
 }
 
+static void
+simd_operators_ceil (void)
+{
+  graphene_simd4f_t a, b, check;
+
+  check = graphene_simd4f_init (2.0f, 3.0f, 4.0f, 5.0f);
+  a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f);
+  b = graphene_simd4f_ceil (a);
+
+  mutest_expect ("ceil() to round up to the nearest integer",
+                 mutest_bool_value (graphene_simd4f_cmp_eq (b, check)),
+                 mutest_to_be_true,
+                 NULL);
+}
+
+static void
+simd_operators_floor (void)
+{
+  graphene_simd4f_t a, b, check;
+
+  check = graphene_simd4f_init (1.0f, 2.0f, 3.0f, 4.0f);
+  a = graphene_simd4f_init (1.7f, 2.4f, 3.6f, 4.2f);
+  b = graphene_simd4f_floor (a);
+
+  mutest_expect ("floor() to round down to the nearest integer",
+                 mutest_bool_value (graphene_simd4f_cmp_eq (b, check)),
+                 mutest_to_be_true,
+                 NULL);
+}
+
 static void
 simd_suite (void)
 {
@@ -339,6 +369,9 @@ simd_suite (void)
   mutest_it ("can compute the maximum vector and scalar", simd_operators_max);
 
   mutest_it ("can compute the reciprocal of vector", simd_operators_reciprocal);
+
+  mutest_it ("can round up vector components", simd_operators_ceil);
+  mutest_it ("can round down vector components", simd_operators_floor);
 }
 
 MUTEST_MAIN (