Implement X86 code generation for musttail

Currently, musttail codegen is relying on sibcall optimization, and reporting a fatal error if fails. Sibcall optimization fails when stack arguments need to be modified, which is insufficient for musttail. The logic for moving arguments in memory safely is already implemented for GuaranteedTailCallOpt. This change merely arranges for musttail calls to use it. No functional change for GuaranteedTailCallOpt. Reviewers: espindola Differential Revision: http://reviews.llvm.org/D3493 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207598 91177308-0d34-0410-b5e6-96231b3b80d8
author: Reid Kleckner <reid@kleckner.net> 2014-04-29 23:55:41 +0000
committer: Reid Kleckner <reid@kleckner.net> 2014-04-29 23:55:41 +0000
commit: 9902128e2a6450644fb4caf929ff0e3a2d6b0704 (patch)
tree: fe4b2e756fc7e604d69a318ded6e929c211133f6 /test
parent: fc288c83394800c2eb3e6627cd52f72d5f997b4a (diff)
download: llvm-9902128e2a6450644fb4caf929ff0e3a2d6b0704.tar.gz
llvm-9902128e2a6450644fb4caf929ff0e3a2d6b0704.tar.bz2
llvm-9902128e2a6450644fb4caf929ff0e3a2d6b0704.tar.xz
3 files changed, 226 insertions, 4 deletions
diff --git a/test/CodeGen/X86/musttail-indirect.ll b/test/CodeGen/X86/musttail-indirect.ll
new file mode 100644
index 0000000000..9d21b5ea5d
--- /dev/null
+++ b/test/CodeGen/X86/musttail-indirect.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=i686-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-win32 -O0 | FileCheck %s
+
+; IR simplified from the following C++ snippet compiled for i686-windows-msvc:
+
+; struct A { A(); ~A(); int a; };
+;
+; struct B {
+;   virtual int  f(int);
+;   virtual int  g(A, int, A);
+;   virtual void h(A, int, A);
+;   virtual A    i(A, int, A);
+;   virtual A    j(int);
+; };
+;
+; int  (B::*mp_f)(int)       = &B::f;
+; int  (B::*mp_g)(A, int, A) = &B::g;
+; void (B::*mp_h)(A, int, A) = &B::h;
+; A    (B::*mp_i)(A, int, A) = &B::i;
+; A    (B::*mp_j)(int)       = &B::j;
+
+; Each member pointer creates a thunk.  The ones with inalloca are required to
+; tail calls by the ABI, even at O0.
+
+%struct.B = type { i32 (...)** }
+%struct.A = type { i32 }
+
+; CHECK-LABEL: f_thunk:
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc i32 @f_thunk(%struct.B* %this, i32) {
+entry:
+  %1 = bitcast %struct.B* %this to i32 (%struct.B*, i32)***
+  %vtable = load i32 (%struct.B*, i32)*** %1
+  %2 = load i32 (%struct.B*, i32)** %vtable
+  %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, i32 %0)
+  ret i32 %3
+}
+
+; Inalloca thunks shouldn't require any stores to the stack.
+; CHECK-LABEL: g_thunk:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc i32 @g_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
+  %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 1
+  %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+  %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
+  ret i32 %3
+}
+
+; CHECK-LABEL: h_thunk:
+; CHECK: jmpl
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK-NOT: ret
+define x86_thiscallcc void @h_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
+  %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 2
+  %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+  musttail call x86_thiscallcc void %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
+  ret void
+}
+
+; CHECK-LABEL: i_thunk:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc %struct.A* @i_thunk(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)***
+  %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vtable, i32 3
+  %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn
+  %3 = musttail call x86_thiscallcc %struct.A* %2(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca %0)
+  ret %struct.A* %3
+}
+
+; CHECK-LABEL: j_thunk:
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_thiscallcc void @j_thunk(%struct.A* noalias sret %agg.result, %struct.B* %this, i32) {
+entry:
+  %1 = bitcast %struct.B* %this to void (%struct.A*, %struct.B*, i32)***
+  %vtable = load void (%struct.A*, %struct.B*, i32)*** %1
+  %vfn = getelementptr inbounds void (%struct.A*, %struct.B*, i32)** %vtable, i32 4
+  %2 = load void (%struct.A*, %struct.B*, i32)** %vfn
+  musttail call x86_thiscallcc void %2(%struct.A* sret %agg.result, %struct.B* %this, i32 %0)
+  ret void
+}
+
+; CHECK-LABEL: _stdcall_thunk@8:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_stdcallcc i32 @stdcall_thunk(<{ %struct.B*, %struct.A }>* inalloca) {
+entry:
+  %this_ptr = getelementptr inbounds <{ %struct.B*, %struct.A }>* %0, i32 0, i32 0
+  %this = load %struct.B** %this_ptr
+  %1 = bitcast %struct.B* %this to i32 (<{ %struct.B*, %struct.A }>*)***
+  %vtable = load i32 (<{ %struct.B*, %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds i32 (<{ %struct.B*, %struct.A }>*)** %vtable, i32 1
+  %2 = load i32 (<{ %struct.B*, %struct.A }>*)** %vfn
+  %3 = musttail call x86_stdcallcc i32 %2(<{ %struct.B*, %struct.A }>* inalloca %0)
+  ret i32 %3
+}
+
+; CHECK-LABEL: @fastcall_thunk@8:
+; CHECK-NOT: mov %{{.*}}, {{.*(.*esp.*)}}
+; CHECK: jmpl
+; CHECK-NOT: ret
+define x86_fastcallcc i32 @fastcall_thunk(%struct.B* inreg %this, <{ %struct.A }>* inalloca) {
+entry:
+  %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A }>*)***
+  %vtable = load i32 (%struct.B*, <{ %struct.A }>*)*** %1
+  %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A }>*)** %vtable, i32 1
+  %2 = load i32 (%struct.B*, <{ %struct.A }>*)** %vfn
+  %3 = musttail call x86_fastcallcc i32 %2(%struct.B* inreg %this, <{ %struct.A }>* inalloca %0)
+  ret i32 %3
+}
diff --git a/test/CodeGen/X86/musttail-thiscall.ll b/test/CodeGen/X86/musttail-thiscall.ll
new file mode 100644
index 0000000000..8ea12482e5
--- /dev/null
+++ b/test/CodeGen/X86/musttail-thiscall.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+
+; CHECK-LABEL: t1:
+; CHECK: jmp {{_?}}t1_callee
+define x86_thiscallcc void @t1(i8* %this) {
+  %adj = getelementptr i8* %this, i32 4
+  musttail call x86_thiscallcc void @t1_callee(i8* %adj)
+  ret void
+}
+declare x86_thiscallcc void @t1_callee(i8* %this)
+
+; CHECK-LABEL: t2:
+; CHECK: jmp {{_?}}t2_callee
+define x86_thiscallcc i32 @t2(i8* %this, i32 %a) {
+  %adj = getelementptr i8* %this, i32 4
+  %rv = musttail call x86_thiscallcc i32 @t2_callee(i8* %adj, i32 %a)
+  ret i32 %rv
+}
+declare x86_thiscallcc i32 @t2_callee(i8* %this, i32 %a)
+
+; CHECK-LABEL: t3:
+; CHECK: jmp {{_?}}t3_callee
+define x86_thiscallcc i8* @t3(i8* %this, <{ i8*, i32 }>* inalloca %args) {
+  %adj = getelementptr i8* %this, i32 4
+  %a_ptr = getelementptr <{ i8*, i32 }>* %args, i32 0, i32 1
+  store i32 0, i32* %a_ptr
+  %rv = musttail call x86_thiscallcc i8* @t3_callee(i8* %adj, <{ i8*, i32 }>* inalloca %args)
+  ret i8* %rv
+}
+declare x86_thiscallcc i8* @t3_callee(i8* %this, <{ i8*, i32 }>* inalloca %args);
diff --git a/test/CodeGen/X86/musttail.ll b/test/CodeGen/X86/musttail.ll
index 75b217f994..ca5d3119cf 100644
--- a/test/CodeGen/X86/musttail.ll
+++ b/test/CodeGen/X86/musttail.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -march=x86 < %s | FileCheck %s
-
-; FIXME: Eliminate this tail call at -O0, since musttail is a correctness
-; requirement.
-; RUN: not llc -march=x86 -O0 < %s
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+; RUN: llc -march=x86 -disable-tail-calls < %s | FileCheck %s
 
 declare void @t1_callee(i8*)
 define void @t1(i32* %a) {
@@ -21,3 +19,72 @@ define i32* @t2() {
   %w = bitcast i8* %v to i32*
   ret i32* %w
 }
+
+; Complex frame layout: stack realignment with dynamic alloca.
+define void @t3(i32 %n) alignstack(32) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: pushl %ebp
+; CHECK: pushl %esi
+; CHECK: andl $-32, %esp
+; CHECK: movl %esp, %esi
+; CHECK: popl %esi
+; CHECK: popl %ebp
+; CHECK-NEXT: jmp {{_?}}t3_callee
+  %a = alloca i8, i32 %n
+  call void @capture(i8* %a)
+  musttail call void @t3_callee(i32 %n) nounwind
+  ret void
+}
+
+declare void @capture(i8*)
+declare void @t3_callee(i32)
+
+; Test that we actually copy in and out stack arguments that aren't forwarded
+; without modification.
+define i32 @t4({}* %fn, i32 %n, i32 %r) {
+; CHECK-LABEL: t4:
+; CHECK: incl %[[r:.*]]
+; CHECK: decl %[[n:.*]]
+; CHECK: movl %[[r]], {{[0-9]+}}(%esp)
+; CHECK: movl %[[n]], {{[0-9]+}}(%esp)
+; CHECK: jmpl *%{{.*}}
+
+entry:
+  %r1 = add i32 %r, 1
+  %n1 = sub i32 %n, 1
+  %fn_cast = bitcast {}* %fn to i32 ({}*, i32, i32)*
+  %r2 = musttail call i32 %fn_cast({}* %fn, i32 %n1, i32 %r1)
+  ret i32 %r2
+}
+
+; Combine the complex stack frame with the parameter modification.
+define i32 @t5({}* %fn, i32 %n, i32 %r) alignstack(32) {
+; CHECK-LABEL: t5:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: pushl %esi
+; 	Align the stack.
+; CHECK: andl $-32, %esp
+; CHECK: movl %esp, %esi
+; 	Modify the args.
+; CHECK: incl %[[r:.*]]
+; CHECK: decl %[[n:.*]]
+; 	Store them through ebp, since that's the only stable arg pointer.
+; CHECK: movl %[[r]], {{[0-9]+}}(%ebp)
+; CHECK: movl %[[n]], {{[0-9]+}}(%ebp)
+; 	Epilogue.
+; CHECK: leal {{[-0-9]+}}(%ebp), %esp
+; CHECK: popl %esi
+; CHECK: popl %ebp
+; CHECK: jmpl *%{{.*}}
+
+entry:
+  %a = alloca i8, i32 %n
+  call void @capture(i8* %a)
+  %r1 = add i32 %r, 1
+  %n1 = sub i32 %n, 1
+  %fn_cast = bitcast {}* %fn to i32 ({}*, i32, i32)*
+  %r2 = musttail call i32 %fn_cast({}* %fn, i32 %n1, i32 %r1)
+  ret i32 %r2
+}
author	Reid Kleckner <reid@kleckner.net>	2014-04-29 23:55:41 +0000
committer	Reid Kleckner <reid@kleckner.net>	2014-04-29 23:55:41 +0000
commit	9902128e2a6450644fb4caf929ff0e3a2d6b0704 (patch)
tree	fe4b2e756fc7e604d69a318ded6e929c211133f6 /test
parent	fc288c83394800c2eb3e6627cd52f72d5f997b4a (diff)
download	llvm-9902128e2a6450644fb4caf929ff0e3a2d6b0704.tar.gz llvm-9902128e2a6450644fb4caf929ff0e3a2d6b0704.tar.bz2 llvm-9902128e2a6450644fb4caf929ff0e3a2d6b0704.tar.xz