summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/CodeGenerator.rst4
-rw-r--r--docs/LangRef.html27
-rw-r--r--include/llvm/CallingConv.h4
-rw-r--r--lib/Target/X86/X86CallingConv.td48
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp11
-rw-r--r--test/CodeGen/X86/hipe-cc.ll77
-rw-r--r--test/CodeGen/X86/hipe-cc64.ll87
8 files changed, 254 insertions, 13 deletions
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 21af969d42..104b848ebf 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -1982,8 +1982,8 @@ Tail call optimization
Tail call optimization, callee reusing the stack of the caller, is currently
supported on x86/x86-64 and PowerPC. It is performed if:
-* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC
- call convention).
+* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
+ calling convention) or ``cc 11`` (HiPE calling convention).
* The call is a tail call - in tail position (ret immediately follows call and
ret uses value of call or is void).
diff --git a/docs/LangRef.html b/docs/LangRef.html
index b4dd976306..b1ed4e6e48 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -729,10 +729,10 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
target to use whatever tricks it wants to produce fast code for the
target, without having to conform to an externally specified ABI
(Application Binary Interface).
- <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
- when this or the GHC convention is used.</a> This calling convention
- does not support varargs and requires the prototype of all callees to
- exactly match the prototype of the function definition.</dd>
+ <a href="CodeGenerator.html#id80">Tail calls can only be optimized
+ when this, the GHC or the HiPE convention is used.</a> This calling
+ convention does not support varargs and requires the prototype of all
+ callees to exactly match the prototype of the function definition.</dd>
<dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
<dd>This calling convention attempts to make code in the caller as efficient
@@ -749,7 +749,7 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
disabling callee save registers. This calling convention should not be
used lightly but only for specific situations such as an alternative to
the <em>register pinning</em> performance technique often used when
- implementing functional programming languages.At the moment only X86
+ implementing functional programming languages. At the moment only X86
supports this convention and it has the following limitations:
<ul>
<li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
@@ -758,10 +758,25 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
6 floating point parameters.</li>
</ul>
This calling convention supports
- <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
+ <a href="CodeGenerator.html#id80">tail call optimization</a> but
requires both the caller and callee are using it.
</dd>
+ <dt><b>"<tt>cc <em>11</em></tt>" - The HiPE calling convention</b>:</dt>
+ <dd>This calling convention has been implemented specifically for use by the
+ <a href="http://www.it.uu.se/research/group/hipe/">High-Performance Erlang
+ (HiPE)</a> compiler, <em>the</em> native code compiler of the
+ <a href="http://www.erlang.org/download.shtml">Ericsson's Open Source
+ Erlang/OTP system</a>. It uses more registers for argument passing than
+ the ordinary C calling convention and defines no callee-saved registers.
+ The calling convention properly supports
+ <a href="CodeGenerator.html#id80">tail call optimization</a> but requires
+ that both the caller and the callee use it. It uses a <em>register
+ pinning</em> mechanism, similar to GHC's convention, for keeping
+ frequently accessed runtime components pinned to specific hardware
+ registers. At the moment only X86 supports this convention (both 32 and 64
+ bit).</dd>
+
<dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
<dd>Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific calling
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index 053f4eb326..699cea331c 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -47,6 +47,10 @@ namespace CallingConv {
// GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
GHC = 10,
+ // HiPE - Calling convention used by the High-Performance Erlang Compiler
+ // (HiPE).
+ HiPE = 11,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 6786756c7f..947e7d7849 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -103,6 +103,15 @@ def RetCC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<RetCC_X86Common>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_HiPE : CallingConv<[
+ // Promote all types to i32
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@@ -123,17 +132,30 @@ def RetCC_X86_Win64_C : CallingConv<[
CCDelegateTo<RetCC_X86_64_C>
]>;
+// X86-64 HiPE return-value convention.
+def RetCC_X86_64_HiPE : CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: HP, P, VAL1, VAL2
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
+]>;
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // If HiPE, use RetCC_X86_32_HiPE.
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
]>;
// This is the root return-value convention for the X86-64 backend.
def RetCC_X86_64 : CallingConv<[
+ // HiPE uses RetCC_X86_64_HiPE
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@@ -291,6 +313,18 @@ def CC_X86_64_GHC : CallingConv<[
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
+def CC_X86_64_HiPE : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3
+ CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@@ -422,6 +456,18 @@ def CC_X86_32_GHC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;
+def CC_X86_32_HiPE : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2
+ CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
@@ -432,6 +478,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
@@ -440,6 +487,7 @@ def CC_X86_32 : CallingConv<[
// This is the root argument convention for the X86-64 backend.
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+ CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 80dd9ef613..3192a43bdb 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1822,7 +1822,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
- return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -1909,7 +1910,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2254,7 +2255,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -3119,6 +3120,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
return TailCallOpt;
case CallingConv::GHC:
return TailCallOpt;
+ case CallingConv::HiPE:
+ return TailCallOpt;
}
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 73ac747742..11f2d7aa70 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -190,6 +190,11 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &X86::GR64_TCW64RegClass;
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_TCRegClass;
+
+ const Function *F = MF.getFunction();
+ bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+ if (hasHipeCC)
+ return &X86::GR32RegClass;
return &X86::GR32_TCRegClass;
}
}
@@ -230,6 +235,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
bool oclBiCall = false;
+ bool hipeCall = false;
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (MF) {
@@ -237,9 +243,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
+ hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false);
}
- if (ghcCall)
+ if (ghcCall || hipeCall)
return CSR_NoRegs_SaveList;
if (oclBiCall) {
if (HasAVX && IsWin64)
@@ -273,7 +280,7 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (!HasAVX && !IsWin64 && Is64Bit)
return CSR_64_Intel_OCL_BI_RegMask;
}
- if (CC == CallingConv::GHC)
+ if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
new file mode 100644
index 0000000000..0de4491529
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu | FileCheck %s
+
+; Check the HiPE calling convention works (x86-32)
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+ ; CHECK: movl 40(%esp), %eax
+ ; CHECK-NEXT: movl 44(%esp), %edx
+ ; CHECK-NEXT: movl $8, %ecx
+ ; CHECK-NEXT: calll addfour
+ %0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8)
+ %res = extractvalue {i32, i32, i32} %0, 2
+
+ ; CHECK: movl %eax, 16(%esp)
+ ; CHECK-NEXT: movl $2, 12(%esp)
+ ; CHECK-NEXT: movl $1, 8(%esp)
+ ; CHECK: calll foo
+ tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind
+ ret void
+}
+
+define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+ ; CHECK: addl %edx, %eax
+ ; CHECK-NEXT: addl %ecx, %eax
+ %0 = add i32 %x, %y
+ %1 = add i32 %0, %z
+
+ ; CHECK: ret
+ %res = insertvalue {i32, i32, i32} undef, i32 %1, 2
+ ret {i32, i32, i32} %res
+}
+
+define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind {
+entry:
+ ; CHECK: movl %esi, 16(%esp)
+ ; CHECK-NEXT: movl %ebp, 12(%esp)
+ ; CHECK-NEXT: movl %eax, 8(%esp)
+ ; CHECK-NEXT: movl %edx, 4(%esp)
+ ; CHECK-NEXT: movl %ecx, (%esp)
+ %hp_var = alloca i32
+ %p_var = alloca i32
+ %arg0_var = alloca i32
+ %arg1_var = alloca i32
+ %arg2_var = alloca i32
+ store i32 %hp, i32* %hp_var
+ store i32 %p, i32* %p_var
+ store i32 %arg0, i32* %arg0_var
+ store i32 %arg1, i32* %arg1_var
+ store i32 %arg2, i32* %arg2_var
+
+ ; CHECK: movl 4(%esp), %edx
+ ; CHECK-NEXT: movl 8(%esp), %eax
+ ; CHECK-NEXT: movl 12(%esp), %ebp
+ ; CHECK-NEXT: movl 16(%esp), %esi
+ %0 = load i32* %hp_var
+ %1 = load i32* %p_var
+ %2 = load i32* %arg0_var
+ %3 = load i32* %arg1_var
+ %4 = load i32* %arg2_var
+ ; CHECK: jmp bar
+ tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
+ ret void
+}
+
+define cc 11 void @baz() nounwind {
+ %tmp_clos = load i32* @clos
+ %tmp_clos2 = inttoptr i32 %tmp_clos to i32*
+ %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
+ ; CHECK: movl $42, %eax
+ ; CHECK-NEXT: jmpl *clos
+ tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind
+ ret void
+}
+
+@clos = external constant i32
+declare cc 11 void @bar(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
new file mode 100644
index 0000000000..6354a4c1ec
--- /dev/null
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Check the HiPE calling convention works (x86-64)
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+ ; CHECK: movq %rsi, %rax
+ ; CHECK-NEXT: movq %rdi, %rsi
+ ; CHECK-NEXT: movq %rax, %rdx
+ ; CHECK-NEXT: movl $8, %ecx
+ ; CHECK-NEXT: movl $9, %r8d
+ ; CHECK-NEXT: callq addfour
+ %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
+ %res = extractvalue {i64, i64, i64} %0, 2
+
+ ; CHECK: movl $1, %edx
+ ; CHECK-NEXT: movl $2, %ecx
+ ; CHECK-NEXT: movl $3, %r8d
+ ; CHECK-NEXT: movq %rax, %r9
+ ; CHECK: callq foo
+ tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
+ ret void
+}
+
+define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
+entry:
+ ; CHECK: leaq (%rsi,%rdx), %rax
+ ; CHECK-NEXT: addq %rcx, %rax
+ ; CHECK-NEXT: addq %r8, %rax
+ %0 = add i64 %x, %y
+ %1 = add i64 %0, %z
+ %2 = add i64 %1, %w
+
+ ; CHECK: ret
+ %res = insertvalue {i64, i64, i64} undef, i64 %2, 2
+ ret {i64, i64, i64} %res
+}
+
+define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
+entry:
+ ; CHECK: movq %r15, 40(%rsp)
+ ; CHECK-NEXT: movq %rbp, 32(%rsp)
+ ; CHECK-NEXT: movq %rsi, 24(%rsp)
+ ; CHECK-NEXT: movq %rdx, 16(%rsp)
+ ; CHECK-NEXT: movq %rcx, 8(%rsp)
+ ; CHECK-NEXT: movq %r8, (%rsp)
+ %hp_var = alloca i64
+ %p_var = alloca i64
+ %arg0_var = alloca i64
+ %arg1_var = alloca i64
+ %arg2_var = alloca i64
+ %arg3_var = alloca i64
+ store i64 %hp, i64* %hp_var
+ store i64 %p, i64* %p_var
+ store i64 %arg0, i64* %arg0_var
+ store i64 %arg1, i64* %arg1_var
+ store i64 %arg2, i64* %arg2_var
+ store i64 %arg3, i64* %arg3_var
+
+ ; CHECK: movq 8(%rsp), %rcx
+ ; CHECK-NEXT: movq 16(%rsp), %rdx
+ ; CHECK-NEXT: movq 24(%rsp), %rsi
+ ; CHECK-NEXT: movq 32(%rsp), %rbp
+ ; CHECK-NEXT: movq 40(%rsp), %r15
+ %0 = load i64* %hp_var
+ %1 = load i64* %p_var
+ %2 = load i64* %arg0_var
+ %3 = load i64* %arg1_var
+ %4 = load i64* %arg2_var
+ %5 = load i64* %arg3_var
+ ; CHECK: jmp bar
+ tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
+ ret void
+}
+
+define cc 11 void @baz() nounwind {
+ %tmp_clos = load i64* @clos
+ %tmp_clos2 = inttoptr i64 %tmp_clos to i64*
+ %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
+ ; CHECK: movl $42, %esi
+ ; CHECK-NEXT: jmpq *(%rax)
+ tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind
+ ret void
+}
+
+@clos = external constant i64
+declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)