Skip to content

Commit

Permalink
[CIR][CIRGen][Builtin][Neon] Lower __builtin_neon_vrndns_f32 (llvm#858)
Browse files Browse the repository at this point in the history
As title.
Also introduced buildAArch64NeonCall skeleton, which is partially the
counterpart of OG's EmitNeonCall. And this could be use for many other
neon intrinsics.

---------

Co-authored-by: Guojin He <[email protected]>
  • Loading branch information
ghehg and ghehg authored Sep 19, 2024
1 parent eece5de commit 01a99e6
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 20 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/CIR/MissingFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ struct MissingFeatures {
static bool volatileTypes() { return false; }
static bool syncScopeID() { return false; }

// AArch64 Neon builtin related.
static bool buildNeonShiftVector() { return false; }

// Misc
static bool cacheRecordLayouts() { return false; }
static bool capturedByInit() { return false; }
Expand Down Expand Up @@ -184,6 +187,7 @@ struct MissingFeatures {
static bool deferredReplacements() { return false; }
static bool shouldInstrumentFunction() { return false; }
static bool xray() { return false; }
static bool buildConstrainedFPCall() { return false; }

// Inline assembly
static bool asmGoto() { return false; }
Expand Down
79 changes: 59 additions & 20 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,15 @@ struct ARMVectorIntrinsicInfo {
} // end anonymous namespace

#define NEONMAP0(NameBase) \
{ #NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0 }
{#NameBase, NEON::BI__builtin_neon_##NameBase, 0, 0, 0}

#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
{ \
#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier \
}
{#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier}

#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
{ \
#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, \
Intrinsic::AltLLVMIntrinsic, TypeModifier \
}
{#NameBase, NEON::BI__builtin_neon_##NameBase, Intrinsic::LLVMIntrinsic, \
Intrinsic::AltLLVMIntrinsic, TypeModifier}

static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
Expand Down Expand Up @@ -1097,13 +1093,11 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
#undef NEONMAP2

#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
{ \
#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier \
}
{#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier}

#define SVEMAP2(NameBase, TypeModifier) \
{ #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
{#NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier}
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
#define GET_SVE_LLVM_INTRINSIC_MAP
#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
Expand All @@ -1115,13 +1109,11 @@ static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
#undef SVEMAP2

#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
{ \
#NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier \
}
{#NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
TypeModifier}

#define SMEMAP2(NameBase, TypeModifier) \
{ #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
{#NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier}
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
#define GET_SME_LLVM_INTRINSIC_MAP
#include "clang/Basic/arm_sme_builtin_cg.inc"
Expand Down Expand Up @@ -1607,6 +1599,48 @@ static mlir::Value buildArmLdrexNon128Intrinsic(unsigned int builtinID,
}
}

mlir::Value buildNeonCall(unsigned int builtinID, CIRGenFunction &cgf,
llvm::SmallVector<mlir::Type> argTypes,
llvm::SmallVector<mlir::Value, 4> args,
llvm::StringRef intrinsicName, mlir::Type funcResTy,
mlir::Location loc,
bool isConstrainedFPIntrinsic = false,
unsigned shift = 0, bool rightshift = false) {
// TODO: Consider removing the following unreachable when we have
// buildConstrainedFPCall feature implemented
assert(!MissingFeatures::buildConstrainedFPCall());
if (isConstrainedFPIntrinsic)
llvm_unreachable("isConstrainedFPIntrinsic NYI");
// TODO: Remove the following unreachable and call it in the loop once
// there is an implementation of buildNeonShiftVector
if (shift > 0)
llvm_unreachable("Argument shift NYI");

if (builtinID != clang::NEON::BI__builtin_neon_vrndns_f32)
llvm_unreachable("NYT");

CIRGenBuilderTy &builder = cgf.getBuilder();
for (unsigned j = 0; j < argTypes.size(); ++j) {
if (isConstrainedFPIntrinsic) {
assert(!MissingFeatures::buildConstrainedFPCall());
}
if (shift > 0 && shift == j) {
assert(!MissingFeatures::buildNeonShiftVector());
} else {
args[j] = builder.createBitcast(args[j], argTypes[j]);
}
}
if (isConstrainedFPIntrinsic) {
assert(!MissingFeatures::buildConstrainedFPCall());
return nullptr;
} else {
return builder
.create<mlir::cir::IntrinsicCallOp>(
loc, builder.getStringAttr(intrinsicName), funcResTy, args)
.getResult();
}
}

mlir::Value
CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
Expand Down Expand Up @@ -2288,6 +2322,7 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
return V;

mlir::Type VTy = Ty;
llvm::SmallVector<mlir::Value, 4> args;
switch (BuiltinID) {
default:
return nullptr;
Expand Down Expand Up @@ -2394,7 +2429,11 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
llvm_unreachable("NYI");
}
case NEON::BI__builtin_neon_vrndns_f32: {
llvm_unreachable("NYI");
mlir::Value arg0 = buildScalarExpr(E->getArg(0));
args.push_back(arg0);
return buildNeonCall(NEON::BI__builtin_neon_vrndns_f32, *this,
{arg0.getType()}, args, "llvm.roundeven.f32",
getCIRGenModule().FloatTy, getLoc(E->getExprLoc()));
}
case NEON::BI__builtin_neon_vrndph_f16: {
llvm_unreachable("NYI");
Expand Down
40 changes: 40 additions & 0 deletions clang/test/CIR/CodeGen/arm-neon-directed-rounding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
// RUN: -ffreestanding -emit-cir -target-feature +neon %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
// RUN: -ffreestanding -emit-llvm -target-feature +neon %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// REQUIRES: aarch64-registered-target || arm-registered-target
#include <arm_neon.h>

float32_t test_vrndns_f32(float32_t a) {
return vrndns_f32(a);
}
// CIR: cir.func internal private @vrndns_f32(%arg0: !cir.float {{.*}}) -> !cir.float
// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.float, !cir.ptr<!cir.float>
// CIR: [[INTRIN_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float
// CIR: {{%.*}} = cir.llvm.intrinsic "llvm.roundeven.f32" [[INTRIN_ARG]] : (!cir.float)
// CIR: cir.return {{%.*}} : !cir.float

// CIR-LABEL: test_vrndns_f32
// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] : !cir.float, !cir.ptr<!cir.float>
// CIR: [[FUNC_ARG:%.*]] = cir.load [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float
// CIR: [[FUNC_RES:%.*]] = cir.call @vrndns_f32([[FUNC_ARG]]) : (!cir.float) -> !cir.float
// CIR: cir.store [[FUNC_RES]], [[RET_P:%.*]] : !cir.float, !cir.ptr<!cir.float>
// CIR: [[RET_VAL:%.*]] = cir.load [[RET_P]] : !cir.ptr<!cir.float>, !cir.float
// CIR: cir.return [[RET_VAL]] : !cir.float loc

// LLVM: define dso_local float @test_vrndns_f32(float [[ARG:%.*]])
// LLVM: store float [[ARG]], ptr [[ARG_SAVE:%.*]], align 4
// LLVM: [[P0:%.*]] = load float, ptr [[ARG_SAVE]], align 4,
// LLVM: store float [[P0]], ptr [[P0_SAVE:%.*]], align 4,
// LLVM: [[INTRIN_ARG:%.*]] = load float, ptr [[P0_SAVE]], align 4,
// LLVM: [[INTRIN_RES:%.*]] = call float @llvm.roundeven.f32(float [[INTRIN_ARG]])
// LLVM: store float [[INTRIN_RES]], ptr [[RES_SAVE0:%.*]], align 4,
// LLVM: [[RES_COPY0:%.*]] = load float, ptr [[RES_SAVE0]], align 4,
// LLVM: store float [[RES_COPY0]], ptr [[RES_SAVE1:%.*]], align 4,
// LLVM: [[RES_COPY1:%.*]] = load float, ptr [[RES_SAVE1]], align 4,
// LLVM: store float [[RES_COPY1]], ptr [[RET_P:%.*]], align 4,
// LLVM: [[RET_VAL:%.*]] = load float, ptr [[RET_P]], align 4,
// LLVM: ret float [[RET_VAL]]

0 comments on commit 01a99e6

Please sign in to comment.