Skip to content

Commit

Permalink
[Inliner] Don't count a call penalty for foldable __memcpy_chk and si…
Browse files Browse the repository at this point in the history
…milar (#117876)

When the size is an appropriate constant, __memcpy_chk will turn into a
memcpy that gets folded away by InstCombine. Therefore this patch avoids
counting these as calls for purposes of inlining costs.

This is only really relevant on platforms whose headers redirect memcpy
to __memcpy_chk (such as Darwin). On platforms that use intrinsics,
memcpy and similar functions are already exempt from call penalties.
  • Loading branch information
citymarina authored Nov 29, 2024
1 parent fe04290 commit 8fb748b
Show file tree
Hide file tree
Showing 4 changed files with 258 additions and 17 deletions.
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/InlineCost.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ std::optional<int> getInliningCostEstimate(
CallBase &Call, TargetTransformInfo &CalleeTTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);

Expand All @@ -327,6 +328,7 @@ std::optional<InlineCostFeatures> getInliningCostFeatures(
CallBase &Call, TargetTransformInfo &CalleeTTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);

Expand Down
86 changes: 69 additions & 17 deletions llvm/lib/Analysis/InlineCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Getter for BlockFrequencyInfo
function_ref<BlockFrequencyInfo &(Function &)> GetBFI;

/// Getter for TargetLibraryInfo
function_ref<const TargetLibraryInfo &(Function &)> GetTLI;

/// Profile summary information.
ProfileSummaryInfo *PSI;

Expand Down Expand Up @@ -433,6 +436,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool simplifyIntrinsicCallIsConstant(CallBase &CB);
bool simplifyIntrinsicCallObjectSize(CallBase &CB);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
bool isLoweredToCall(Function *F, CallBase &Call);

/// Return true if the given argument to the function being considered for
/// inlining has the given attribute set either at the call site or the
Expand Down Expand Up @@ -492,13 +496,15 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitUnreachableInst(UnreachableInst &I);

public:
CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr)
CallAnalyzer(
Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
CandidateCall(Call) {}

InlineResult analyze();
Expand Down Expand Up @@ -688,7 +694,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
/// FIXME: if InlineCostCallAnalyzer is derived from, this may need
/// to instantiate the derived class.
InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
GetAssumptionCache, GetBFI, PSI, ORE, false);
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
false);
if (CA.analyze().isSuccess()) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
Expand Down Expand Up @@ -1106,10 +1113,12 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
const TargetTransformInfo &TTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
ProfileSummaryInfo *PSI = nullptr,
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
bool IgnoreThreshold = false)
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
ORE),
ComputeFullInlineCost(OptComputeFullInlineCost ||
Params.ComputeFullInlineCost || ORE ||
isCostBenefitAnalysisEnabled()),
Expand Down Expand Up @@ -1228,8 +1237,8 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
InlineConstants::IndirectCallThreshold;

InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
GetAssumptionCache, GetBFI, PSI, ORE, false,
true);
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
false, true);
if (CA.analyze().isSuccess()) {
increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
CA.getCost());
Expand Down Expand Up @@ -1355,9 +1364,11 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
const TargetTransformInfo &TTI,
function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
CallBase &Call)
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI,
PSI) {}

const InlineCostFeatures &features() const { return Cost; }
};
Expand Down Expand Up @@ -2260,6 +2271,44 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
return false;
}

bool CallAnalyzer::isLoweredToCall(Function *F, CallBase &Call) {
const TargetLibraryInfo *TLI = GetTLI ? &GetTLI(*F) : nullptr;
LibFunc LF;
if (!TLI || !TLI->getLibFunc(*F, LF) || !TLI->has(LF))
return TTI.isLoweredToCall(F);

switch (LF) {
case LibFunc_memcpy_chk:
case LibFunc_memmove_chk:
case LibFunc_mempcpy_chk:
case LibFunc_memset_chk: {
// Calls to __memcpy_chk whose length is known to fit within the object
// size will eventually be replaced by inline stores. Therefore, these
// should not incur a call penalty. This is only really relevant on
// platforms whose headers redirect memcpy to __memcpy_chk (e.g. Darwin), as
// other platforms use memcpy intrinsics, which are already exempt from the
// call penalty.
auto *LenOp = dyn_cast<ConstantInt>(Call.getOperand(2));
if (!LenOp)
LenOp = dyn_cast_or_null<ConstantInt>(
SimplifiedValues.lookup(Call.getOperand(2)));
auto *ObjSizeOp = dyn_cast<ConstantInt>(Call.getOperand(3));
if (!ObjSizeOp)
ObjSizeOp = dyn_cast_or_null<ConstantInt>(
SimplifiedValues.lookup(Call.getOperand(3)));
if (LenOp && ObjSizeOp &&
LenOp->getLimitedValue() <= ObjSizeOp->getLimitedValue()) {
return false;
}
break;
}
default:
break;
}

return TTI.isLoweredToCall(F);
}

bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (!onCallBaseVisitStart(Call))
return true;
Expand Down Expand Up @@ -2341,7 +2390,7 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
return false;
}

if (TTI.isLoweredToCall(F)) {
if (isLoweredToCall(F, Call)) {
onLoweredCall(F, Call, IsIndirectCall);
}

Expand Down Expand Up @@ -2945,6 +2994,7 @@ std::optional<int> llvm::getInliningCostEstimate(
CallBase &Call, TargetTransformInfo &CalleeTTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
const InlineParams Params = {/* DefaultThreshold*/ 0,
/*HintThreshold*/ {},
Expand All @@ -2958,7 +3008,7 @@ std::optional<int> llvm::getInliningCostEstimate(
/*EnableDeferral*/ true};

InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI, ORE, true,
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE, true,
/*IgnoreThreshold*/ true);
auto R = CA.analyze();
if (!R.isSuccess())
Expand All @@ -2970,9 +3020,10 @@ std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
CallBase &Call, TargetTransformInfo &CalleeTTI,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
ORE, *Call.getCalledFunction(), Call);
InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, GetTLI,
PSI, ORE, *Call.getCalledFunction(), Call);
auto R = CFA.analyze();
if (!R.isSuccess())
return std::nullopt;
Expand Down Expand Up @@ -3072,7 +3123,7 @@ InlineCost llvm::getInlineCost(
<< ")\n");

InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI, ORE);
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
InlineResult ShouldInline = CA.analyze();

LLVM_DEBUG(CA.dump());
Expand Down Expand Up @@ -3263,7 +3314,8 @@ InlineCostAnnotationPrinterPass::run(Function &F,
continue;
OptimizationRemarkEmitter ORE(CalledFunction);
InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI,
GetAssumptionCache, nullptr, &PSI, &ORE);
GetAssumptionCache, nullptr, nullptr, &PSI,
&ORE);
ICCA.analyze();
OS << " Analyzing call of " << CalledFunction->getName()
<< "... (caller:" << CB->getCaller()->getName() << ")\n";
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/Transforms/Inline/AArch64/memcpy-constant-size.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt %s -mtriple=arm64-apple-macosx -passes=inline -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s

declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
declare ptr @__memcpy_chk(ptr, ptr, i64, i64)
declare ptr @__memmove_chk(ptr, ptr, i64, i64)
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
declare ptr @__memset_chk(ptr, i32, i64, i64)

define void @callee(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @callee
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]])
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 [[OBJSIZE]])
; CHECK-NEXT: ret void
;
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize)
ret void
}

define void @caller(ptr %dst, ptr %src) {
; CHECK-LABEL: define void @caller
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false)
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]])
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]])
; CHECK-NEXT: ret void
;
call void @callee(ptr %dst, ptr %src, i64 4)
ret void
}

define void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @objsize_toosmall_callee
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1)
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 1)
; CHECK-NEXT: ret void
;
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1)
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 1)
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1)
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 1)
ret void
}

define void @objsize_toosmall_caller(ptr %dst, ptr %src) {
; CHECK-LABEL: define void @objsize_toosmall_caller
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
; CHECK-NEXT: call void @objsize_toosmall_callee(ptr [[DST]], ptr [[SRC]], i64 4)
; CHECK-NEXT: ret void
;
call void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 4)
ret void
}

define void @intrinsics_callee(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @intrinsics_callee
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false)
call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false)
call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 %size, i1 false)
ret void
}

define void @intrinsics_caller(ptr %dst, ptr %src) {
; CHECK-LABEL: define void @intrinsics_caller
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false)
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 4, i1 false)
; CHECK-NEXT: ret void
;
call void @intrinsics_callee(ptr %dst, ptr %src, i64 4)
ret void
}
95 changes: 95 additions & 0 deletions llvm/test/Transforms/PhaseOrdering/AArch64/memcpy-constant-size.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt %s -mtriple=arm64-apple-macosx -passes='default<O3>' -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s

declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
declare ptr @__memcpy_chk(ptr, ptr, i64, i64)
declare ptr @__memmove_chk(ptr, ptr, i64, i64)
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
declare ptr @__memset_chk(ptr, i32, i64, i64)

define void @callee_memcpy(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @callee_memcpy
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
ret void
}

define void @callee_memmove(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @callee_memmove
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
ret void
}

define void @callee_mempcpy(ptr %dst, ptr %src, i64 %size) {
; CHECK-LABEL: define void @callee_mempcpy
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1]] {
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize)
ret void
}

define void @callee_memset(ptr %dst, i64 %size) {
; CHECK-LABEL: define void @callee_memset
; CHECK-SAME: (ptr [[DST:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[SIZE]], i1 false)
; CHECK-NEXT: ret void
;
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false)
%call.mempcpy = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize)
ret void
}

define void @caller_memcpy(ptr %dst, ptr %src) {
; CHECK-LABEL: define void @caller_memcpy
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
; CHECK-NEXT: ret void
;
call void @callee_memcpy(ptr %dst, ptr %src, i64 4)
ret void
}

define void @caller_memmove(ptr %dst, ptr %src) {
; CHECK-LABEL: define void @caller_memmove
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
; CHECK-NEXT: ret void
;
call void @callee_memmove(ptr %dst, ptr %src, i64 4)
ret void
}

define void @caller_mempcpy(ptr %dst, ptr %src) {
; CHECK-LABEL: define void @caller_mempcpy
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1
; CHECK-NEXT: ret void
;
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4)
ret void
}

define void @caller_memset(ptr %dst) {
; CHECK-LABEL: define void @caller_memset
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1
; CHECK-NEXT: ret void
;
call void @callee_memset(ptr %dst, i64 4)
ret void
}

0 comments on commit 8fb748b

Please sign in to comment.