summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/CallingConvLower.h97
-rw-r--r--lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp135
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll73
-rw-r--r--test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll48
-rw-r--r--test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll45
7 files changed, 360 insertions, 42 deletions
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index c035e0777c..fa9d60f0d4 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -163,8 +163,56 @@ private:
unsigned StackOffset;
SmallVector<uint32_t, 16> UsedRegs;
- unsigned FirstByValReg;
- bool FirstByValRegValid;
+
+ // ByValInfo and SmallVector<ByValInfo, 4> ByValRegs:
+ //
+ // Vector of ByValInfo instances (ByValRegs) is introduced for byval registers
+ // tracking.
+ // Or, in another words it tracks byval parameters that are stored in
+ // general purpose registers.
+ //
+ // For 4 byte stack alignment,
+ // instance index means byval parameter number in formal
+ // arguments set. Assume, we have some "struct_type" with size = 4 bytes,
+ // then, for function "foo":
+ //
+ // i32 foo(i32 %p, %struct_type* %r, i32 %s, %struct_type* %t)
+ //
+ // ByValRegs[0] describes how "%r" is stored (Begin == r1, End == r2)
+ // ByValRegs[1] describes how "%t" is stored (Begin == r3, End == r4).
+ //
+ // In case of 8 bytes stack alignment,
+ // ByValRegs may also contain information about wasted registers.
+ // In function shown above, r3 would be wasted according to AAPCS rules.
+ // And in that case ByValRegs[1].Waste would be "true".
+ // ByValRegs vector size still would be 2,
+ // while "%t" goes to the stack: it wouldn't be described in ByValRegs.
+ //
+ // Supposed use-case for this collection:
+ // 1. Initially ByValRegs is empty, InRegsParamsProceed is 0.
+ // 2. HandleByVal fillups ByValRegs.
+ // 3. Argument analysis (LowerFormatArguments, for example). After
+ // some byval argument was analyzed, InRegsParamsProceed is increased.
+ struct ByValInfo {
+ ByValInfo(unsigned B, unsigned E, bool IsWaste = false) :
+ Begin(B), End(E), Waste(IsWaste) {}
+ // First register allocated for current parameter.
+ unsigned Begin;
+
+ // First after last register allocated for current parameter.
+ unsigned End;
+
+ // Means that current range of registers doesn't belong to any
+ // parameters. It was wasted due to stack alignment rules.
+ // For more information see:
+ // AAPCS, 5.5 Parameter Passing, Stage C, C.3.
+ bool Waste;
+ };
+ SmallVector<ByValInfo, 4 > ByValRegs;
+
+ // InRegsParamsProceed - shows how many instances of ByValRegs was proceed
+ // during argument analysis.
+ unsigned InRegsParamsProceed;
protected:
ParmContext CallOrPrologue;
@@ -306,12 +354,45 @@ public:
MVT LocVT, CCValAssign::LocInfo LocInfo,
int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
- // First GPR that carries part of a byval aggregate that's split
- // between registers and memory.
- unsigned getFirstByValReg() const { return FirstByValRegValid ? FirstByValReg : 0; }
- void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; }
- void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; }
- bool isFirstByValRegValid() const { return FirstByValRegValid; }
+ // Returns count of byval arguments that are to be stored (even partly)
+ // in registers.
+ unsigned getInRegsParamsCount() const { return ByValRegs.size(); }
+
+ // Returns count of byval in-regs arguments proceed.
+ unsigned getInRegsParamsProceed() const { return InRegsParamsProceed; }
+
+ // Get information about N-th byval parameter that is stored in registers.
+ // Here "ByValParamIndex" is N.
+ void getInRegsParamInfo(unsigned InRegsParamRecordIndex,
+ unsigned& BeginReg, unsigned& EndReg) const {
+ assert(InRegsParamRecordIndex < ByValRegs.size() &&
+ "Wrong ByVal parameter index");
+
+ const ByValInfo& info = ByValRegs[InRegsParamRecordIndex];
+ BeginReg = info.Begin;
+ EndReg = info.End;
+ }
+
+ // Add information about parameter that is kept in registers.
+ void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd) {
+ ByValRegs.push_back(ByValInfo(RegBegin, RegEnd));
+ }
+
+ // Goes either to next byval parameter (excluding "waste" record), or
+ // to the end of collection.
+ // Returns false, if end is reached.
+ bool nextInRegsParam() {
+ unsigned e = ByValRegs.size();
+ if (InRegsParamsProceed < e)
+ ++InRegsParamsProceed;
+ return InRegsParamsProceed < e;
+ }
+
+ // Clear byval registers tracking info.
+ void clearByValRegsInfo() {
+ InRegsParamsProceed = 0;
+ ByValRegs.clear();
+ }
ParmContext getCallOrPrologue() const { return CallOrPrologue; }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index f1d4ace922..75f4b96e3b 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
// No stack is used.
StackOffset = 0;
- clearFirstByValReg();
+ clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9475f1b5a0..0f7beb1e3b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1481,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
- if (CCInfo.isFirstByValRegValid()) {
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
- for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1493,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
- offset = ARM::R4 - CCInfo.getFirstByValReg();
- CCInfo.clearFirstByValReg();
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() - 4*offset > 0) {
+ if (Flags.getByValSize() > 4*offset) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1740,9 +1751,24 @@ ARMTargetLowering::HandleByVal(
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((!State->isFirstByValRegValid()) &&
- (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) &&
- (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+ // For in-prologue parameters handling, we also introduce stack offset
+ // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+ // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+ // NSAA should be evaluted (NSAA means "next stacked argument address").
+ // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+ // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+ unsigned NSAAOffset = State->getNextStackOffset();
+ if (State->getCallOrPrologue() != Call) {
+ for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+ unsigned RB, RE;
+ State->getInRegsParamInfo(i, RB, RE);
+ assert(NSAAOffset >= (RE-RB)*4 &&
+ "Stack offset for byval regs doesn't introduced anymore?");
+ NSAAOffset -= (RE-RB)*4;
+ }
+ }
+ if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1750,22 +1776,45 @@ ARMTargetLowering::HandleByVal(
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
- State->setFirstByValReg(reg);
+ unsigned excess = 4 * (ARM::R4 - reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = reg;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs, 4);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ // Make remained size equal to 0 in case, when
+ // the whole structure may be stored into registers.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
- // Confiscate any remaining parameter registers to preclude their
- // assignment to subsequent parameters.
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
}
/// MatchingStackOffset - Return true if the given stack call argument is
@@ -2580,13 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned InRegsParamRecordIdx,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
- if (CCInfo.isFirstByValRegValid())
- NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
- else {
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ NumGPRs = REnd - RBegin;
+ } else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
sizeof(GPRArgRegs) /
@@ -2611,6 +2663,7 @@ int
ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
bool ForceMutable) const {
@@ -2629,24 +2682,26 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex;
- if (CCInfo.isFirstByValRegValid())
- firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
- else {
+ unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ firstRegToSaveIndex = RBegin - ARM::R0;
+ lastRegToSaveIndex = REnd - ARM::R0;
+ } else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ lastRegToSaveIndex = 4;
}
unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, ArgRegsSize, ArgRegsSaveSize);
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
// Store any by-val regs to their spots on the stack so that they may be
// loaded by deferencing the result of formal parameter pointer or va_next.
// Note: once stack area for byval/varargs registers
// was initialized, it can't be initialized again.
- if (!AFI->getArgRegsSaveSize() && ArgRegsSaveSize) {
-
- AFI->setArgRegsSaveSize(ArgRegsSaveSize);
+ if (ArgRegsSaveSize) {
int FrameIndex = MFI->CreateFixedObject(
ArgRegsSaveSize,
@@ -2655,7 +2710,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+ ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2672,6 +2728,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
@@ -2696,7 +2755,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, 0, 0, ArgOffset, ForceMutable);
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+ 0, ArgOffset, ForceMutable);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -2727,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2824,12 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ CCInfo.nextInRegsParam();
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 46b8438676..426010e295 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -477,6 +477,7 @@ namespace llvm {
int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
bool ForceMutable) const;
@@ -487,6 +488,7 @@ namespace llvm {
bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned InRegsParamRecordIdx,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize) const;
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
new file mode 100644
index 0000000000..4a5ca9db0e
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -0,0 +1,73 @@
+;PR15293: ARM codegen ice - expected larger existing stack allocation
+;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+;CHECK: foo:
+;CHECK: sub sp, sp, #8
+;CHECK: push {r11, lr}
+;CHECK: str r0, [sp, #12]
+;CHECK: add r0, sp, #12
+;CHECK: bl fooUseParam
+;CHECK: pop {r11, lr}
+;CHECK: add sp, sp, #8
+;CHECK: mov pc, lr
+
+;CHECK: foo2:
+;CHECK: sub sp, sp, #16
+;CHECK: push {r11, lr}
+;CHECK: str r0, [sp, #12]
+;CHECK: add r0, sp, #12
+;CHECK: str r2, [sp, #16]
+;CHECK: bl fooUseParam
+;CHECK: add r0, sp, #16
+;CHECK: bl fooUseParam
+;CHECK: pop {r11, lr}
+;CHECK: add sp, sp, #16
+;CHECK: mov pc, lr
+
+;CHECK: doFoo:
+;CHECK: push {r11, lr}
+;CHECK: ldr r0,
+;CHECK: ldr r0, [r0]
+;CHECK: bl foo
+;CHECK: pop {r11, lr}
+;CHECK: mov pc, lr
+
+
+;CHECK: doFoo2:
+;CHECK: push {r11, lr}
+;CHECK: ldr r0,
+;CHECK: mov r1, #0
+;CHECK: ldr r0, [r0]
+;CHECK: mov r2, r0
+;CHECK: bl foo2
+;CHECK: pop {r11, lr}
+;CHECK: mov pc, lr
+
+
+%artz = type { i32 }
+@static_val = constant %artz { i32 777 }
+
+declare void @fooUseParam(%artz* )
+
+define void @foo(%artz* byval %s) {
+ call void @fooUseParam(%artz* %s)
+ ret void
+}
+
+define void @foo2(%artz* byval %s, i32 %p, %artz* byval %s2) {
+ call void @fooUseParam(%artz* %s)
+ call void @fooUseParam(%artz* %s2)
+ ret void
+}
+
+
+define void @doFoo() {
+ call void @foo(%artz* byval @static_val)
+ ret void
+}
+
+define void @doFoo2() {
+ call void @foo2(%artz* byval @static_val, i32 0, %artz* byval @static_val)
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
new file mode 100644
index 0000000000..6db71fed95
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -0,0 +1,48 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888}
+
+declare void @fooUseStruct(%st_t*)
+
+define void @foo(double %vfp0, ; --> D0, NSAA=SP
+ double %vfp1, ; --> D1, NSAA=SP
+ double %vfp2, ; --> D2, NSAA=SP
+ double %vfp3, ; --> D3, NSAA=SP
+ double %vfp4, ; --> D4, NSAA=SP
+ double %vfp5, ; --> D5, NSAA=SP
+ double %vfp6, ; --> D6, NSAA=SP
+ double %vfp7, ; --> D7, NSAA=SP
+ double %vfp8, ; --> SP, NSAA=SP+8 (!)
+ i32 %p0, ; --> R0, NSAA=SP+8
+ %st_t* byval %p1, ; --> R1, R2, NSAA=SP+8
+ i32 %p2, ; --> R3, NSAA=SP+8
+ i32 %p3) #0 { ; --> SP+4, NSAA=SP+12
+entry:
+ ;CHECK: sub sp, #8
+ ;CHECK: push.w {r11, lr}
+ ;CHECK: add r0, sp, #16
+ ;CHECK: str r2, [sp, #20]
+ ;CHECK: str r1, [sp, #16]
+ ;CHECK: bl fooUseStruct
+ call void @fooUseStruct(%st_t* %p1)
+ ret void
+}
+
+define void @doFoo() {
+entry:
+ call void @foo(double 23.0,
+ double 23.1,
+ double 23.2,
+ double 23.3,
+ double 23.4,
+ double 23.5,
+ double 23.6,
+ double 23.7,
+ double 23.8,
+ i32 0, %st_t* byval @static_val, i32 1, i32 2)
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
new file mode 100644
index 0000000000..212bbc2ee9
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
@@ -0,0 +1,45 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize > R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32, i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888, i32 787, i32 878}
+
+define void @foo(double %vfp0, ; --> D0, NSAA=SP
+ double %vfp1, ; --> D1, NSAA=SP
+ double %vfp2, ; --> D2, NSAA=SP
+ double %vfp3, ; --> D3, NSAA=SP
+ double %vfp4, ; --> D4, NSAA=SP
+ double %vfp5, ; --> D5, NSAA=SP
+ double %vfp6, ; --> D6, NSAA=SP
+ double %vfp7, ; --> D7, NSAA=SP
+ double %vfp8, ; --> SP, NSAA=SP+8 (!)
+ i32 %p0, ; --> R0, NSAA=SP+8
+ %st_t* byval %p1, ; --> SP+8, 4 words NSAA=SP+24
+ i32 %p2) #0 { ; --> SP+24, NSAA=SP+24
+
+entry:
+ ;CHECK: push.w {r11, lr}
+ ;CHECK: ldr r0, [sp, #32]
+ ;CHECK: bl fooUseI32
+ call void @fooUseI32(i32 %p2)
+ ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+ call void @foo(double 23.0,
+ double 23.1,
+ double 23.2,
+ double 23.3,
+ double 23.4,
+ double 23.5,
+ double 23.6,
+ double 23.7,
+ double 23.8,
+ i32 0, %st_t* byval @static_val, i32 1)
+ ret void
+}
+