summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/LangRef.rst5
-rw-r--r--include/llvm/Support/CallSite.h5
-rw-r--r--include/llvm/Target/TargetCallingConv.h5
-rw-r--r--include/llvm/Target/TargetLowering.h10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1
-rw-r--r--lib/IR/Mangler.cpp4
-rw-r--r--lib/Target/X86/X86FastISel.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp29
-rw-r--r--test/CodeGen/X86/inalloca-ctor.ll34
-rw-r--r--test/CodeGen/X86/inalloca-invoke.ll54
-rw-r--r--test/CodeGen/X86/inalloca-stdcall.ll26
-rw-r--r--test/CodeGen/X86/inalloca.ll65
13 files changed, 251 insertions, 17 deletions
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 62ce112d57..07e94a9f84 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -792,7 +792,10 @@ Currently, only the following parameter attributes are defined:
An argument allocation may be used by a call at most once because
the call may deallocate it. The ``inalloca`` attribute cannot be
used in conjunction with other attributes that affect argument
- storage, like ``inreg``, ``nest``, ``sret``, or ``byval``.
+ storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The
+ ``inalloca`` attribute also disables LLVM's implicit lowering of
+ large aggregate return values, which means that frontend authors
+ must lower them with ``sret`` pointers.
When the call site is reached, the argument allocation must have
been the most recent stack allocation that is still live, or the
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index d2c47b53ed..f4324cb7ba 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -268,9 +268,10 @@ public:
paramHasAttr(ArgNo + 1, Attribute::InAlloca);
}
- /// @brief Determine if there are any inalloca arguments.
+ /// @brief Determine if there are is an inalloca argument. Only the last
+ /// argument can have the inalloca attribute.
bool hasInAllocaArgument() const {
- return getAttributes().hasAttrSomewhere(Attribute::InAlloca);
+ return paramHasAttr(arg_size(), Attribute::InAlloca);
}
bool doesNotAccessMemory(unsigned ArgNo) const {
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index 9cc52a591b..758a9c9ca8 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -42,6 +42,8 @@ namespace ISD {
static const uint64_t ByValAlignOffs = 7;
static const uint64_t Split = 1ULL<<11;
static const uint64_t SplitOffs = 11;
+ static const uint64_t InAlloca = 1ULL<<12; ///< Passed with inalloca
+ static const uint64_t InAllocaOffs = 12;
static const uint64_t OrigAlign = 0x1FULL<<27;
static const uint64_t OrigAlignOffs = 27;
static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size
@@ -68,6 +70,9 @@ namespace ISD {
bool isByVal() const { return Flags & ByVal; }
void setByVal() { Flags |= One << ByValOffs; }
+ bool isInAlloca() const { return Flags & InAlloca; }
+ void setInAlloca() { Flags |= One << InAllocaOffs; }
+
bool isNest() const { return Flags & Nest; }
void setNest() { Flags |= One << NestOffs; }
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index e9d87291aa..b5c7e43858 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -609,8 +609,9 @@ public:
return getValueType(Ty, AllowUnknown).getSimpleVT();
}
- /// Return the desired alignment for ByVal aggregate function arguments in the
- /// caller parameter area. This is the actual alignment, not its logarithm.
+ /// Return the desired alignment for ByVal or InAlloca aggregate function
+ /// arguments in the caller parameter area. This is the actual alignment, not
+ /// its logarithm.
virtual unsigned getByValTypeAlignment(Type *Ty) const;
/// Return the type of registers that this ValueType will eventually require.
@@ -1965,12 +1966,13 @@ public:
bool isSRet : 1;
bool isNest : 1;
bool isByVal : 1;
+ bool isInAlloca : 1;
bool isReturned : 1;
uint16_t Alignment;
ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
- isSRet(false), isNest(false), isByVal(false), isReturned(false),
- Alignment(0) { }
+ isSRet(false), isNest(false), isByVal(false), isInAlloca(false),
+ isReturned(false), Alignment(0) { }
void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx);
};
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c177038102..55e96a4dea 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5434,6 +5434,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
+ assert(!CS.hasInAllocaArgument() &&
+ "sret demotion is incompatible with inalloca");
uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
FTy->getReturnType());
unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
@@ -7142,8 +7144,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInReg();
if (Args[i].isSRet)
Flags.setSRet();
- if (Args[i].isByVal) {
+ if (Args[i].isByVal)
Flags.setByVal();
+ if (Args[i].isInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].isByVal || Args[i].isInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
@@ -7362,8 +7374,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setInReg();
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
- if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
Flags.setByVal();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Flags.isByVal() || Flags.isInAlloca()) {
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6052a48640..82e5ae8534 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -75,6 +75,7 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
Alignment = CS->getParamAlignment(AttrIdx);
}
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index 5a099738d5..bc1362dc80 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -65,8 +65,8 @@ static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
AI != AE; ++AI) {
Type *Ty = AI->getType();
- // 'Dereference' type in case of byval parameter attribute
- if (AI->hasByValAttr())
+ // 'Dereference' type in case of byval or inalloca parameter attribute.
+ if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
// Size should be aligned to DWORD boundary
ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index be6f13899c..9b842b74e4 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1910,6 +1910,10 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (isVarArg && isWin64)
return false;
+ // Don't know about inalloca yet.
+ if (CS.hasInAllocaArgument())
+ return false;
+
// Fast-isel doesn't know about callee-pop yet.
if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
TM.Options.GuaranteedTailCallOpt))
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1b04cf46fc..bfe93877e4 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2584,9 +2584,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
X86Info->setTCReturnAddrDelta(FPDiff);
}
+ unsigned NumBytesToPush = NumBytes;
+ unsigned NumBytesToPop = NumBytes;
+
+ // If we have an inalloca argument, all stack space has already been allocated
+ // for us and be right at the top of the stack. We don't support multiple
+ // arguments passed in memory when using inalloca.
+ if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
+ NumBytesToPush = 0;
+ assert(ArgLocs.back().getLocMemOffset() == 0 &&
+ "an inalloca argument must be the only memory argument");
+ }
+
if (!IsSibcall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(
+ Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
SDValue RetAddrFrIdx;
// Load return address for tail calls.
@@ -2603,10 +2615,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ // Skip inalloca arguments, they have already been written.
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (Flags.isInAlloca())
+ continue;
+
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
@@ -2873,8 +2889,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytesToPop, true),
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
@@ -2931,7 +2948,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesToPop, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPop,
true),
InFlag, dl);
diff --git a/test/CodeGen/X86/inalloca-ctor.ll b/test/CodeGen/X86/inalloca-ctor.ll
new file mode 100644
index 0000000000..f81e9675b7
--- /dev/null
+++ b/test/CodeGen/X86/inalloca-ctor.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Foo = type { i32, i32 }
+
+%frame = type { %Foo, i32, %Foo }
+
+declare void @f(%frame* inalloca %a)
+
+declare void @Foo_ctor(%Foo* %this)
+
+define void @g() {
+entry:
+ %args = alloca %frame, inalloca
+ %c = getelementptr %frame* %args, i32 0, i32 2
+; CHECK: movl $20, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %esp,
+ call void @Foo_ctor(%Foo* %c)
+; CHECK: leal 12(%{{.*}}),
+; CHECK: subl $4, %esp
+; CHECK: calll _Foo_ctor
+; CHECK: addl $4, %esp
+ %b = getelementptr %frame* %args, i32 0, i32 1
+ store i32 42, i32* %b
+; CHECK: movl $42,
+ %a = getelementptr %frame* %args, i32 0, i32 0
+ call void @Foo_ctor(%Foo* %a)
+; CHECK: subl $4, %esp
+; CHECK: calll _Foo_ctor
+; CHECK: addl $4, %esp
+ call void @f(%frame* inalloca %args)
+; CHECK: calll _f
+ ret void
+}
diff --git a/test/CodeGen/X86/inalloca-invoke.ll b/test/CodeGen/X86/inalloca-invoke.ll
new file mode 100644
index 0000000000..ac530ca525
--- /dev/null
+++ b/test/CodeGen/X86/inalloca-invoke.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Iter = type { i32, i32, i32 }
+
+%frame.reverse = type { %Iter, %Iter }
+
+declare void @llvm.stackrestore(i8*)
+declare i8* @llvm.stacksave()
+declare void @begin(%Iter* sret)
+declare void @plus(%Iter* sret, %Iter*, i32)
+declare void @reverse(%frame.reverse* inalloca align 4)
+
+define i32 @main() {
+ %temp.lvalue = alloca %Iter
+ br label %blah
+
+blah:
+ %inalloca.save = call i8* @llvm.stacksave()
+ %rev_args = alloca %frame.reverse, inalloca, align 4
+ %beg = getelementptr %frame.reverse* %rev_args, i32 0, i32 0
+ %end = getelementptr %frame.reverse* %rev_args, i32 0, i32 1
+
+; CHECK: calll __chkstk
+; CHECK: movl %[[beg:[^,]*]], %esp
+; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
+
+ call void @begin(%Iter* sret %temp.lvalue)
+; CHECK: calll _begin
+
+ invoke void @plus(%Iter* sret %end, %Iter* %temp.lvalue, i32 4)
+ to label %invoke.cont unwind label %lpad
+
+; Uses end as sret param.
+; CHECK: movl %[[end]], (%esp)
+; CHECK: calll _plus
+
+invoke.cont:
+ call void @begin(%Iter* sret %beg)
+
+; CHECK: movl %[[beg]],
+; CHECK: calll _begin
+
+ invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args)
+ to label %invoke.cont5 unwind label %lpad
+
+invoke.cont5: ; preds = %invoke.cont
+ call void @llvm.stackrestore(i8* %inalloca.save)
+ ret i32 0
+
+lpad: ; preds = %invoke.cont, %entry
+ %lp = landingpad { i8*, i32 } personality i8* null
+ cleanup
+ unreachable
+}
diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll
new file mode 100644
index 0000000000..93ac451a50
--- /dev/null
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Foo = type { i32, i32 }
+
+declare x86_stdcallcc void @f(%Foo* inalloca %a)
+declare x86_stdcallcc void @i(i32 %a)
+
+define void @g() {
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call x86_stdcallcc void @f(%Foo* inalloca %b)
+; CHECK: calll _f@8
+; CHECK-NOT: %esp
+; CHECK: subl $4, %esp
+; CHECK: calll _i@4
+ call x86_stdcallcc void @i(i32 0)
+ ret void
+}
diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll
new file mode 100644
index 0000000000..84d694a1fb
--- /dev/null
+++ b/test/CodeGen/X86/inalloca.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+%Foo = type { i32, i32 }
+
+declare void @f(%Foo* inalloca %b)
+
+define void @a() {
+; CHECK-LABEL: _a:
+entry:
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call void @f(%Foo* inalloca %b)
+; CHECK: calll _f
+ ret void
+}
+
+declare void @inreg_with_inalloca(i32 inreg %a, %Foo* inalloca %b)
+
+define void @b() {
+; CHECK-LABEL: _b:
+entry:
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
+; CHECK: movl $1, %eax
+; CHECK: calll _inreg_with_inalloca
+ ret void
+}
+
+declare x86_thiscallcc void @thiscall_with_inalloca(i8* %a, %Foo* inalloca %b)
+
+define void @c() {
+; CHECK-LABEL: _c:
+entry:
+ %b = alloca %Foo, inalloca
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: movl %[[REG:[^,]*]], %esp
+ %f1 = getelementptr %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ store i32 13, i32* %f1
+ store i32 42, i32* %f2
+; CHECK: movl $13, (%[[REG]])
+; CHECK: movl $42, 4(%[[REG]])
+ call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
+; CHECK: xorl %ecx, %ecx
+; CHECK: calll _thiscall_with_inalloca
+ ret void
+}