Skip to content

Commit

Permalink
Use musttail for variadic method thunks when possible
Browse files Browse the repository at this point in the history
This avoids cloning variadic virtual methods when the target supports
musttail and the return type is not covariant. I think we never
implemented this previously because it doesn't handle the covariant
case. But, in the MS ABI, there are some cases where vtable thunks must
be emitted even when the variadic method defintion is not available, so
it looks like we need to implement this. Do it for both ABIs, since it's
a nice size improvement and simplification for Itanium.

Emit an error when emitting thunks for variadic methods with a covariant
return type. This case is essentially not implementable unless the ABI
provides a way to perfectly forward variadic arguments without a tail
call.

Fixes PR43173.

Differential Revision: https://reviews.llvm.org/D67028

llvm-svn: 371269
  • Loading branch information
rnk committed Sep 6, 2019
1 parent c177919 commit 28328c3
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 19 deletions.
52 changes: 41 additions & 11 deletions clang/lib/CodeGen/CGVTables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,15 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
llvm::Function *BaseFn = cast<llvm::Function>(Callee);

// Cloning can't work if we don't have a definition. The Microsoft ABI may
// require thunks when a definition is not available. Emit an error in these
// cases.
if (!MD->isDefined()) {
CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments");
return Fn;
}
assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method");

// Clone to thunk.
llvm::ValueToValueMapTy VMap;

Expand Down Expand Up @@ -201,6 +210,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
Builder.SetInsertPoint(&*ThisStore);
llvm::Value *AdjustedThisPtr =
CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This);
AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr,
ThisStore->getOperand(0)->getType());
ThisStore->setOperand(0, AdjustedThisPtr);

if (!Thunk.Return.isEmpty()) {
Expand Down Expand Up @@ -291,14 +302,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee,
*this, LoadCXXThisAddress(), Thunk->This)
: LoadCXXThis();

if (CurFnInfo->usesInAlloca() || IsUnprototyped) {
// We don't handle return adjusting thunks, because they require us to call
// the copy constructor. For now, fall through and pretend the return
// adjustment was empty so we don't crash.
// If perfect forwarding is required a variadic method, a method using
// inalloca, or an unprototyped thunk, use musttail. Emit an error if this
// thunk requires a return adjustment, since that is impossible with musttail.
if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) {
if (Thunk && !Thunk->Return.isEmpty()) {
if (IsUnprototyped)
CGM.ErrorUnsupported(
MD, "return-adjusting thunk with incomplete parameter type");
else if (CurFnInfo->isVariadic())
llvm_unreachable("shouldn't try to emit musttail return-adjusting "
"thunks for variadic functions");
else
CGM.ErrorUnsupported(
MD, "non-trivial argument copy for return-adjusting thunk");
Expand Down Expand Up @@ -549,16 +563,32 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD,

CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn);

// Thunks for variadic methods are special because in general variadic
// arguments cannot be perferctly forwarded. In the general case, clang
// implements such thunks by cloning the original function body. However, for
// thunks with no return adjustment on targets that support musttail, we can
// use musttail to perfectly forward the variadic arguments.
bool ShouldCloneVarArgs = false;
if (!IsUnprototyped && ThunkFn->isVarArg()) {
// Varargs thunks are special; we can't just generate a call because
// we can't copy the varargs. Our implementation is rather
// expensive/sucky at the moment, so don't generate the thunk unless
// we have to.
// FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly.
ShouldCloneVarArgs = true;
if (TI.Return.isEmpty()) {
switch (CGM.getTriple().getArch()) {
case llvm::Triple::x86_64:
case llvm::Triple::x86:
case llvm::Triple::aarch64:
ShouldCloneVarArgs = false;
break;
default:
break;
}
}
}

if (ShouldCloneVarArgs) {
if (UseAvailableExternallyLinkage)
return ThunkFn;
ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD,
TI);
ThunkFn =
CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI);
} else {
// Normal thunk body generation.
CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped);
Expand Down
5 changes: 3 additions & 2 deletions clang/test/CodeGenCXX/linetable-virtual-variadic.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
// Sparc64 is used because AArch64 and X86_64 would both use musttail.
// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple sparc64-linux-gnu -emit-llvm -debug-info-kind=line-directives-only %s -o - | FileCheck %s
// Crasher for PR22929.
class Base {
virtual void VariadicFunction(...);
Expand Down
13 changes: 13 additions & 0 deletions clang/test/CodeGenCXX/ms-thunks-variadic-return.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %clang_cc1 -fno-rtti-data -triple x86_64-windows-msvc -emit-llvm-only %s -verify

// Verify that we error out on this return adjusting thunk that we can't emit.

struct A {
virtual A *clone(const char *f, ...) = 0;
};
struct B : virtual A {
// expected-error@+1 2 {{cannot compile this return-adjusting thunk with variadic arguments yet}}
B *clone(const char *f, ...) override;
};
struct C : B { int c; };
C c;
133 changes: 127 additions & 6 deletions clang/test/CodeGenCXX/thunks.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT %s
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-NONOPT --check-prefix=CHECK-DBG %s
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes | FileCheck --check-prefix=CHECK --check-prefix=CHECK-OPT %s
// Sparc64 doesn't support musttail (yet), so it uses method cloning for
// variadic thunks. Use it for testing.
// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - \
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT %s
// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -debug-info-kind=standalone -dwarf-version=5 -munwind-tables -emit-llvm -o - \
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-NONOPT,CHECK-DBG %s
// RUN: %clang_cc1 %s -triple=sparc64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-CLONE,CHECK-OPT %s

// Test x86_64, which uses musttail for variadic thunks.
// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
// RUN: | FileCheck --check-prefixes=CHECK,CHECK-TAIL,CHECK-OPT %s

// Finally, reuse these tests for the MS ABI.
// RUN: %clang_cc1 %s -triple=x86_64-windows-msvc -munwind-tables -emit-llvm -o - -O1 -disable-llvm-passes \
// RUN: | FileCheck --check-prefixes=WIN64 %s


namespace Test1 {

Expand All @@ -23,6 +37,11 @@ struct C : A, B {
// CHECK-LABEL: define void @_ZThn8_N5Test11C1fEv(
// CHECK-DBG-NOT: dbg.declare
// CHECK: ret void
//
// WIN64-LABEL: define dso_local void @"?f@C@Test1@@UEAAXXZ"(
// WIN64-LABEL: define linkonce_odr dso_local void @"?f@C@Test1@@W7EAAXXZ"(
// WIN64: getelementptr i8, i8* {{.*}}, i32 -8
// WIN64: ret void
void C::f() { }

}
Expand All @@ -45,6 +64,10 @@ struct B : virtual A {
// CHECK: ret void
void B::f() { }

// No thunk is used for this case in the MS ABI.
// WIN64-LABEL: define dso_local void @"?f@B@Test2@@UEAAXXZ"(
// WIN64-NOT: define {{.*}} void @"?f@B@Test2

}

namespace Test3 {
Expand All @@ -65,6 +88,7 @@ struct B : A {
};

// CHECK: define %{{.*}}* @_ZTch0_v0_n24_N5Test31B1fEv(
// WIN64: define weak_odr dso_local %{{.*}} @"?f@B@Test3@@QEAAPEAUV1@2@XZ"(
V2 *B::f() { return 0; }

}
Expand Down Expand Up @@ -92,6 +116,10 @@ struct __attribute__((visibility("protected"))) C : A, B {
// CHECK: ret void
void C::f() { }

// Visibility doesn't matter on COFF, but whatever. We could add an ELF test
// mode later.
// WIN64-LABEL: define protected void @"?f@C@Test4@@UEAAXXZ"(
// WIN64-LABEL: define linkonce_odr protected void @"?f@C@Test4@@W7EAAXXZ"(
}

// Check that the thunk gets internal linkage.
Expand Down Expand Up @@ -119,6 +147,8 @@ namespace Test4B {
c.f();
}
}
// Not sure why this isn't delayed like in Itanium.
// WIN64-LABEL: define internal void @"?f@C@?A0xAEF74CE7@Test4B@@UEAAXXZ"(

namespace Test5 {

Expand All @@ -134,6 +164,7 @@ struct B : virtual A {
void f(B b) {
b.f();
}
// No thunk in MS ABI in this case.
}

namespace Test6 {
Expand Down Expand Up @@ -178,6 +209,10 @@ namespace Test6 {
// CHECK: {{call void @_ZN5Test66Thunks1fEv.*sret}}
// CHECK: ret void
X Thunks::f() { return X(); }

// WIN64-LABEL: define linkonce_odr dso_local void @"?f@Thunks@Test6@@WBA@EAA?AUX@2@XZ"({{.*}} sret %{{.*}})
// WIN64-NOT: memcpy
// WIN64: tail call void @"?f@Thunks@Test6@@UEAA?AUX@2@XZ"({{.*}} sret %{{.*}})
}

namespace Test7 {
Expand Down Expand Up @@ -224,6 +259,8 @@ namespace Test7 {
// CHECK-NOT: memcpy
// CHECK: ret void
void testD() { D d; }

// MS C++ ABI doesn't use a thunk, so this case isn't interesting.
}

namespace Test8 {
Expand All @@ -241,6 +278,8 @@ namespace Test8 {
// CHECK-NOT: memcpy
// CHECK: ret void
void C::bar(NonPOD var) {}

// MS C++ ABI doesn't use a thunk, so this case isn't interesting.
}

// PR7241: Emitting thunks for a method shouldn't require the vtable for
Expand Down Expand Up @@ -287,6 +326,16 @@ namespace Test11 {
// CHECK: define {{.*}} @_ZTch0_v0_n32_N6Test111C1fEv(
// CHECK-DBG-NOT: dbg.declare
// CHECK: ret

// WIN64-LABEL: define dso_local %{{.*}}* @"?f@C@Test11@@UEAAPEAU12@XZ"(i8*

// WIN64-LABEL: define weak_odr dso_local %{{.*}}* @"?f@C@Test11@@QEAAPEAUA@2@XZ"(i8*
// WIN64: call %{{.*}}* @"?f@C@Test11@@UEAAPEAU12@XZ"(i8* %{{.*}})
//
// Match the vbtable return adjustment.
// WIN64: load i32*, i32** %{{[^,]*}}, align 8
// WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
// WIN64: load i32, i32* %{{[^,]*}}, align 4
}

// Varargs thunk test.
Expand All @@ -301,7 +350,8 @@ namespace Test12 {
virtual void c();
virtual C* f(int x, ...);
};
C* C::f(int x, ...) { return this; }
C* makeC();
C* C::f(int x, ...) { return makeC(); }

// C::f
// CHECK: define {{.*}} @_ZN6Test121C1fEiz
Expand All @@ -312,6 +362,28 @@ namespace Test12 {
// CHECK-DBG-NOT: dbg.declare
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -8
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8

// The vtable layout goes:
// C vtable in A:
// - f impl, no adjustment
// C vtable in B:
// - f thunk 2, covariant, clone
// - f thunk 2, musttail this adjust to impl
// FIXME: The weak_odr linkage is probably not necessary and just an artifact
// of Itanium ABI details.
// WIN64-LABEL: define dso_local {{.*}} @"?f@C@Test12@@UEAAPEAU12@HZZ"(
// WIN64: call %{{.*}}* @"?makeC@Test12@@YAPEAUC@1@XZ"()
//
// This thunk needs return adjustment, clone.
// WIN64-LABEL: define weak_odr dso_local {{.*}} @"?f@C@Test12@@W7EAAPEAUB@2@HZZ"(
// WIN64: call %{{.*}}* @"?makeC@Test12@@YAPEAUC@1@XZ"()
// WIN64: getelementptr inbounds i8, i8* %{{.*}}, i32 8
//
// Musttail call back to the A implementation after this adjustment from B to A.
// WIN64-LABEL: define linkonce_odr dso_local %{{.*}}* @"?f@C@Test12@@W7EAAPEAU12@HZZ"(
// WIN64: getelementptr i8, i8* %{{[^,]*}}, i32 -8
// WIN64: musttail call {{.*}} @"?f@C@Test12@@UEAAPEAU12@HZZ"(
C c;
}

// PR13832
Expand Down Expand Up @@ -339,6 +411,17 @@ namespace Test13 {
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 -24
// CHECK: getelementptr inbounds i8, i8* {{.*}}, i64 8
// CHECK: ret %"struct.Test13::D"*

// WIN64-LABEL: define weak_odr dso_local dereferenceable(32) %"struct.Test13::D"* @"?foo1@D@Test13@@$4PPPPPPPE@A@EAAAEAUB1@2@XZ"(
// This adjustment.
// WIN64: getelementptr inbounds i8, i8* {{.*}}, i64 -12
// Call implementation.
// WIN64: call {{.*}} @"?foo1@D@Test13@@UEAAAEAU12@XZ"(i8* {{.*}})
// Virtual + nonvirtual return adjustment.
// WIN64: load i32*, i32** %{{[^,]*}}, align 8
// WIN64: getelementptr inbounds i32, i32* %{{[^,]*}}, i32 1
// WIN64: load i32, i32* %{{[^,]*}}, align 4
// WIN64: getelementptr inbounds i8, i8* %{{[^,]*}}, i32 %{{[^,]*}}
}

namespace Test14 {
Expand Down Expand Up @@ -374,9 +457,16 @@ namespace Test15 {
void C::c() {}

// C::c
// CHECK: declare void @_ZN6Test151C1fEiz
// CHECK-CLONE: declare void @_ZN6Test151C1fEiz
// non-virtual thunk to C::f
// CHECK: declare void @_ZThn8_N6Test151C1fEiz
// CHECK-CLONE: declare void @_ZThn8_N6Test151C1fEiz

// If we have musttail, then we emit the thunk as available_externally.
// CHECK-TAIL: declare void @_ZN6Test151C1fEiz
// CHECK-TAIL: define available_externally void @_ZThn8_N6Test151C1fEiz({{.*}})
// CHECK-TAIL: musttail call void (%"struct.Test15::C"*, i32, ...) @_ZN6Test151C1fEiz({{.*}}, ...)

// MS C++ ABI doesn't use a thunk, so this case isn't interesting.
}

namespace Test16 {
Expand All @@ -398,6 +488,33 @@ D::~D() {}
// CHECK: ret void
}

namespace Test17 {
class A {
virtual void f(const char *, ...);
};
class B {
virtual void f(const char *, ...);
};
class C : A, B {
virtual void anchor();
void f(const char *, ...) override;
};
// Key method and object anchor vtable for Itanium and MSVC.
void C::anchor() {}
C c;

// CHECK-CLONE-LABEL: declare void @_ZThn8_N6Test171C1fEPKcz(

// CHECK-TAIL-LABEL: define available_externally void @_ZThn8_N6Test171C1fEPKcz(
// CHECK-TAIL: getelementptr inbounds i8, i8* %{{.*}}, i64 -8
// CHECK-TAIL: musttail call {{.*}} @_ZN6Test171C1fEPKcz({{.*}}, ...)

// MSVC-LABEL: define linkonce_odr dso_local void @"?f@C@Test17@@G7EAAXPEBDZZ"
// MSVC-SAME: (%"class.Test17::C"* %this, i8* %[[ARG:[^,]+]], ...)
// MSVC: getelementptr i8, i8* %{{.*}}, i32 -8
// MSVC: musttail call void (%"class.Test17::C"*, i8*, ...) @"?f@C@Test17@@EEAAXPEBDZZ"(%"class.Test17::C"* %{{.*}}, i8* %[[ARG]], ...)
}

/**** The following has to go at the end of the file ****/

// checking without opt
Expand All @@ -421,5 +538,9 @@ D::~D() {}
// CHECK-OPT-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
// CHECK-OPT-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv

// This is from Test10:
// WIN64-LABEL: define linkonce_odr dso_local void @"?foo@C@Test10@@UEAAXXZ"(
// WIN64-LABEL: define linkonce_odr dso_local void @"?foo@C@Test10@@W7EAAXXZ"(

// CHECK-NONOPT: attributes [[NUW]] = { noinline nounwind optnone uwtable{{.*}} }
// CHECK-OPT: attributes [[NUW]] = { nounwind uwtable{{.*}} }

0 comments on commit 28328c3

Please sign in to comment.