[CIR][CIRGen][Builtin][Neon] Lower __builtin_neon_vrndns_f32 (llvm#858)

As title. Also introduced buildAArch64NeonCall skeleton, which is partially the counterpart of OG's EmitNeonCall. And this could be use for many other neon intrinsics. --------- Co-authored-by: Guojin He <[email protected]>
ChuanqiXu9 · Sep 19, 2024 · 01a99e6 · 01a99e6
1 parent eece5de
commit 01a99e6
Show file tree

Hide file tree

Showing 3 changed files with 103 additions and 20 deletions.
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
@@ -133,6 +133,9 @@ struct MissingFeatures {
   static bool volatileTypes() { return false; }
   static bool syncScopeID() { return false; }
 
+  // AArch64 Neon builtin related.
+  static bool buildNeonShiftVector() { return false; }
+
   // Misc
   static bool cacheRecordLayouts() { return false; }
   static bool capturedByInit() { return false; }
@@ -184,6 +187,7 @@ struct MissingFeatures {
   static bool deferredReplacements() { return false; }
   static bool shouldInstrumentFunction() { return false; }
   static bool xray() { return false; }
+  static bool buildConstrainedFPCall() { return false; }
 
   // Inline assembly
   static bool asmGoto() { return false; }

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -79,19 +79,15 @@ struct ARMVectorIntrinsicInfo {
 } // end anonymous namespace
 
 #define NEONMAP0(NameBase)                                                     \
-  { #NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0 }
+  {#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0}
 
 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)                        \
-  {                                                                            \
-    #NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
-        TypeModifier                                                           \
-  }
+  {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0,  \
+   TypeModifier}
 
 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)      \
-  {                                                                            \
-    #NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic,    \
-        Intrinsic::AltLLVMIntrinsic, TypeModifier                              \
-  }
+  {#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic,     \
+   Intrinsic::AltLLVMIntrinsic, TypeModifier}
 
 static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
     NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
@@ -1097,13 +1093,11 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
 #undef NEONMAP2
 
 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
-  {                                                                            \
-    #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,   \
-        TypeModifier                                                           \
-  }
+  {#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0,    \
+   TypeModifier}
 
 #define SVEMAP2(NameBase, TypeModifier)                                        \
-  { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
+  {#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
 #define GET_SVE_LLVM_INTRINSIC_MAP
 #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
@@ -1115,13 +1109,11 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
 #undef SVEMAP2
 
 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier)                         \
-  {                                                                            \
-    #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0,   \
-        TypeModifier                                                           \
-  }
+  {#NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0,    \
+   TypeModifier}
 
 #define SMEMAP2(NameBase, TypeModifier)                                        \
-  { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
+  {#NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier}
 static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
 #define GET_SME_LLVM_INTRINSIC_MAP
 #include "clang/Basic/arm_sme_builtin_cg.inc"
@@ -1607,6 +1599,48 @@ static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID,
   }
 }
 
+mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
+                          llvm::SmallVector<mlir::Type> argTypes,
+                          llvm::SmallVector<mlir::Value, 4> args,
+                          llvm::StringRef intrinsicName, mlir::Type funcResTy,
+                          mlir::Location loc,
+                          bool isConstrainedFPIntrinsic = false,
+                          unsigned shift = 0, bool rightshift = false) {
+  // TODO: Consider removing the following unreachable when we have
+  // buildConstrainedFPCall feature implemented
+  assert(!MissingFeatures::buildConstrainedFPCall());
+  if (isConstrainedFPIntrinsic)
+    llvm_unreachable("isConstrainedFPIntrinsic NYI");
+  // TODO: Remove the following unreachable and call it in the loop once
+  // there is an implementation of buildNeonShiftVector
+  if (shift > 0)
+    llvm_unreachable("Argument shift NYI");
+
+  if (builtinID != clang::NEON::BI__builtin_neon_vrndns_f32)
+    llvm_unreachable("NYT");
+
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  for (unsigned j = 0; j < argTypes.size(); ++j) {
+    if (isConstrainedFPIntrinsic) {
+      assert(!MissingFeatures::buildConstrainedFPCall());
+    }
+    if (shift > 0 && shift == j) {
+      assert(!MissingFeatures::buildNeonShiftVector());
+    } else {
+      args[j] = builder.createBitcast(args[j], argTypes[j]);
+    }
+  }
+  if (isConstrainedFPIntrinsic) {
+    assert(!MissingFeatures::buildConstrainedFPCall());
+    return nullptr;
+  } else {
+    return builder
+        .create<mlir::cir::IntrinsicCallOp>(
+            loc, builder.getStringAttr(intrinsicName), funcResTy, args)
+        .getResult();
+  }
+}
+
 mlir::Value
 CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
                                         ReturnValueSlot ReturnValue,
@@ -2288,6 +2322,7 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     return V;
 
   mlir::Type VTy = Ty;
+  llvm::SmallVector<mlir::Value, 4> args;
   switch (BuiltinID) {
   default:
     return nullptr;
@@ -2394,7 +2429,11 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     llvm_unreachable("NYI");
   }
   case NEON::BI__builtin_neon_vrndns_f32: {
-    llvm_unreachable("NYI");
+    mlir::Value arg0 = buildScalarExpr(E->getArg(0));
+    args.push_back(arg0);
+    return buildNeonCall(NEON::BI__builtin_neon_vrndns_f32, *this,
+                         {arg0.getType()}, args, "llvm.roundeven.f32",
+                         getCIRGenModule().FloatTy, getLoc(E->getExprLoc()));
   }
   case NEON::BI__builtin_neon_vrndph_f16: {
     llvm_unreachable("NYI");

diff --git a/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c b/clang/test/CIR/CodeGen/arm-neon-directed-rounding.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -ffreestanding -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -ffreestanding -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+float32_t test_vrndns_f32(float32_t a) {
+  return vrndns_f32(a);
+}
+// CIR: cir.func internal private  @vrndns_f32(%arg0: !cir.float {{.*}}) -> !cir.float
+// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.float, !cir.ptr<!cir.float> 
+// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float 
+// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.roundeven.f32" [[INTRIN_ARG]] : (!cir.float)
+// CIR: cir.return {{%.*}} : !cir.float
+
+// CIR-LABEL: test_vrndns_f32
+// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] : !cir.float, !cir.ptr<!cir.float> 
+// CIR: [[FUNC_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float 
+// CIR: [[FUNC_RES:%.*]] = cir.call @vrndns_f32([[FUNC_ARG]]) : (!cir.float) -> !cir.float
+// CIR: cir.store [[FUNC_RES]], [[RET_P:%.*]] : !cir.float, !cir.ptr<!cir.float>
+// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: cir.return [[RET_VAL]] : !cir.float loc
+
+// LLVM: define dso_local float @test_vrndns_f32(float [[ARG:%.*]])
+// LLVM: store float [[ARG]], ptr [[ARG_SAVE:%.*]], align 4
+// LLVM: [[P0:%.*]] = load float, ptr [[ARG_SAVE]], align 4,
+// LLVM: store float [[P0]], ptr [[P0_SAVE:%.*]], align 4,
+// LLVM: [[INTRIN_ARG:%.*]] = load float, ptr [[P0_SAVE]], align 4,
+// LLVM: [[INTRIN_RES:%.*]] = call float @llvm.roundeven.f32(float [[INTRIN_ARG]])
+// LLVM: store float [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 4, 
+// LLVM: [[RES_COPY0:%.*]] = load float, ptr [[RES_SAVE0]], align 4,
+// LLVM: store float [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 4,
+// LLVM: [[RES_COPY1:%.*]] = load float, ptr [[RES_SAVE1]], align 4,
+// LLVM: store float [[RES_COPY1]], ptr [[RET_P:%.*]], align 4,
+// LLVM: [[RET_VAL:%.*]] = load float, ptr [[RET_P]], align 4,
+// LLVM: ret float [[RET_VAL]]