-
Notifications
You must be signed in to change notification settings - Fork 12.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Inliner] Don't count a call penalty for foldable __memcpy_chk and si…
…milar (#117876) When the size is an appropriate constant, __memcpy_chk will turn into a memcpy that gets folded away by InstCombine. Therefore this patch avoids counting these as calls for purposes of inlining costs. This is only really relevant on platforms whose headers redirect memcpy to __memcpy_chk (such as Darwin). On platforms that use intrinsics, memcpy and similar functions are already exempt from call penalties.
- Loading branch information
1 parent
fe04290
commit 8fb748b
Showing
4 changed files
with
258 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
92 changes: 92 additions & 0 deletions
92
llvm/test/Transforms/Inline/AArch64/memcpy-constant-size.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 | ||
; RUN: opt %s -mtriple=arm64-apple-macosx -passes=inline -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s | ||
|
||
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1) | ||
declare ptr @__memcpy_chk(ptr, ptr, i64, i64) | ||
declare ptr @__memmove_chk(ptr, ptr, i64, i64) | ||
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64) | ||
declare ptr @__memset_chk(ptr, i32, i64, i64) | ||
|
||
define void @callee(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @callee | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) { | ||
; CHECK-NEXT: [[OBJSIZE:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false) | ||
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]]) | ||
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]]) | ||
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 [[OBJSIZE]]) | ||
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 [[OBJSIZE]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false) | ||
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize) | ||
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize) | ||
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize) | ||
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize) | ||
ret void | ||
} | ||
|
||
define void @caller(ptr %dst, ptr %src) { | ||
; CHECK-LABEL: define void @caller | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) { | ||
; CHECK-NEXT: [[OBJSIZE_I:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[DST]], i1 false, i1 true, i1 false) | ||
; CHECK-NEXT: [[CALL_MEMCPY_I:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]]) | ||
; CHECK-NEXT: [[CALL_MEMMOVE_I:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]]) | ||
; CHECK-NEXT: [[CALL_MEMPCPY_I:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 4, i64 [[OBJSIZE_I]]) | ||
; CHECK-NEXT: [[CALL_MEMSET_I:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 4, i64 [[OBJSIZE_I]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @callee(ptr %dst, ptr %src, i64 4) | ||
ret void | ||
} | ||
|
||
define void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @objsize_toosmall_callee | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) { | ||
; CHECK-NEXT: [[CALL_MEMCPY:%.*]] = call ptr @__memcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1) | ||
; CHECK-NEXT: [[CALL_MEMMOVE:%.*]] = call ptr @__memmove_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1) | ||
; CHECK-NEXT: [[CALL_MEMPCPY:%.*]] = call ptr @__mempcpy_chk(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i64 1) | ||
; CHECK-NEXT: [[CALL_MEMSET:%.*]] = call ptr @__memset_chk(ptr [[DST]], i32 0, i64 [[SIZE]], i64 1) | ||
; CHECK-NEXT: ret void | ||
; | ||
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1) | ||
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 1) | ||
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 1) | ||
%call.memset = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 1) | ||
ret void | ||
} | ||
|
||
define void @objsize_toosmall_caller(ptr %dst, ptr %src) { | ||
; CHECK-LABEL: define void @objsize_toosmall_caller | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) { | ||
; CHECK-NEXT: call void @objsize_toosmall_callee(ptr [[DST]], ptr [[SRC]], i64 4) | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @objsize_toosmall_callee(ptr %dst, ptr %src, i64 4) | ||
ret void | ||
} | ||
|
||
define void @intrinsics_callee(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @intrinsics_callee | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) { | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false) | ||
call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 false) | ||
call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 %size, i1 false) | ||
ret void | ||
} | ||
|
||
define void @intrinsics_caller(ptr %dst, ptr %src) { | ||
; CHECK-LABEL: define void @intrinsics_caller | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) { | ||
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false) | ||
; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 4, i1 false) | ||
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr [[DST]], i8 0, i64 4, i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @intrinsics_callee(ptr %dst, ptr %src, i64 4) | ||
ret void | ||
} |
95 changes: 95 additions & 0 deletions
95
llvm/test/Transforms/PhaseOrdering/AArch64/memcpy-constant-size.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 | ||
; RUN: opt %s -mtriple=arm64-apple-macosx -passes='default<O3>' -inline-threshold=2 -inline-call-penalty=5 -S | FileCheck %s | ||
|
||
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1) | ||
declare ptr @__memcpy_chk(ptr, ptr, i64, i64) | ||
declare ptr @__memmove_chk(ptr, ptr, i64, i64) | ||
declare ptr @__mempcpy_chk(ptr, ptr, i64, i64) | ||
declare ptr @__memset_chk(ptr, i32, i64, i64) | ||
|
||
define void @callee_memcpy(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @callee_memcpy | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false) | ||
%call.memcpy = call ptr @__memcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize) | ||
ret void | ||
} | ||
|
||
define void @callee_memmove(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @callee_memmove | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { | ||
; CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false) | ||
%call.memmove = call ptr @__memmove_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize) | ||
ret void | ||
} | ||
|
||
define void @callee_mempcpy(ptr %dst, ptr %src, i64 %size) { | ||
; CHECK-LABEL: define void @callee_mempcpy | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR1]] { | ||
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST]], ptr align 1 [[SRC]], i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false) | ||
%call.mempcpy = call ptr @__mempcpy_chk(ptr %dst, ptr %src, i64 %size, i64 %objsize) | ||
ret void | ||
} | ||
|
||
define void @callee_memset(ptr %dst, i64 %size) { | ||
; CHECK-LABEL: define void @callee_memset | ||
; CHECK-SAME: (ptr [[DST:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 [[DST]], i8 0, i64 [[SIZE]], i1 false) | ||
; CHECK-NEXT: ret void | ||
; | ||
%objsize = call i64 @llvm.objectsize.i64.p0(ptr %dst, i1 false, i1 true, i1 false) | ||
%call.mempcpy = call ptr @__memset_chk(ptr %dst, i32 0, i64 %size, i64 %objsize) | ||
ret void | ||
} | ||
|
||
define void @caller_memcpy(ptr %dst, ptr %src) { | ||
; CHECK-LABEL: define void @caller_memcpy | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr nocapture readonly [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1 | ||
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @callee_memcpy(ptr %dst, ptr %src, i64 4) | ||
ret void | ||
} | ||
|
||
define void @caller_memmove(ptr %dst, ptr %src) { | ||
; CHECK-LABEL: define void @caller_memmove | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] { | ||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1 | ||
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @callee_memmove(ptr %dst, ptr %src, i64 4) | ||
ret void | ||
} | ||
|
||
define void @caller_mempcpy(ptr %dst, ptr %src) { | ||
; CHECK-LABEL: define void @caller_mempcpy | ||
; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]]) local_unnamed_addr #[[ATTR1]] { | ||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 1 | ||
; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @callee_mempcpy(ptr %dst, ptr %src, i64 4) | ||
ret void | ||
} | ||
|
||
define void @caller_memset(ptr %dst) { | ||
; CHECK-LABEL: define void @caller_memset | ||
; CHECK-SAME: (ptr [[DST:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
; CHECK-NEXT: store i32 0, ptr [[DST]], align 1 | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @callee_memset(ptr %dst, i64 4) | ||
ret void | ||
} |