summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-08-10 06:26:49 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-08-10 06:26:49 +0000
commitc9aed19747608b7688a64f2f382a008889f8e57d (patch)
tree4b02044afb655cdcef591720814532993649b82c
parent94f7950e4d68399954d4ee651ffa6674b87ff299 (diff)
downloadllvm-c9aed19747608b7688a64f2f382a008889f8e57d.tar.gz
llvm-c9aed19747608b7688a64f2f382a008889f8e57d.tar.bz2
llvm-c9aed19747608b7688a64f2f382a008889f8e57d.tar.xz
Fix ARM hasFP() semantics. It should return true whenever FP register is
reserved, not available for general allocation. This eliminates all the extra checks for Darwin. This change also fixes the use of FP to access frame indices in leaf functions and cleaned up some confusing code in epilogue emission. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110655 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp84
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp9
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h11
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td16
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp11
-rw-r--r--test/CodeGen/Thumb/large-stack.ll29
6 files changed, 90 insertions, 70 deletions
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ee1a82e737..c8cc8706f6 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -177,7 +177,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
Reserved.set(ARM::FPSCR);
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
Reserved.set(FramePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
@@ -194,7 +194,7 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@@ -511,7 +511,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -540,7 +540,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -610,6 +610,10 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
@@ -683,6 +687,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -708,7 +713,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
- if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@@ -860,8 +868,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
- (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
- estimateRSStackSizeLimit(MF)))
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
@@ -881,9 +890,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -976,7 +983,7 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
@@ -1546,7 +1553,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@@ -1565,7 +1573,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -1577,11 +1585,14 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP)
+ AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
@@ -1614,6 +1625,8 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
+
+ AFI->setShouldRestoreSPFromFP(true);
}
}
@@ -1669,34 +1682,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- bool HasFP = hasFP(MF);
- if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (HasFP ||
- AFI->getGPRCalleeSavedArea2Size() ||
- AFI->getDPRCalleeSavedAreaSize() ||
- AFI->getDPRCalleeSavedAreaOffset()) {
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index db550b1c42..c3d60ad9e6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -742,14 +742,15 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned
ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case ARM::tGPRRegClassID:
- return 5 - FPDiff;
- case ARM::GPRRegClassID:
- return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
case ARM::DPRRegClassID:
return 32 - 10;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7e57a1ca55..514c26b4da 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 5e17175b97..2b72739c30 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -294,8 +294,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
if (Subtarget.isThumb1Only()) {
I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
if (Subtarget.isTargetDarwin()) {
@@ -312,8 +311,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
}
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
}];
}
@@ -403,8 +401,7 @@ def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
if (Subtarget.isThumb1Only()) {
I = THUMB_rGPRAO + (sizeof(THUMB_rGPRAO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
if (Subtarget.isTargetDarwin()) {
@@ -421,8 +418,7 @@ def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
I = ARM_rGPRAO_1 + (sizeof(ARM_rGPRAO_1)/sizeof(unsigned));
}
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
}];
}
@@ -449,11 +445,9 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
tGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
tGPRClass::iterator I =
THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ return RI->hasFP(MF) ? I-1 : I;
}
}];
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 9ae3145389..50328e3760 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -742,11 +742,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
+ AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@@ -764,10 +764,9 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -828,7 +827,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- if (hasFP(MF)) {
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 02de36af1c..b05e6bf604 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,20 +1,35 @@
-; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #256
+; CHECK: add sp, #256
%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
define void @test2() {
+; CHECK: test2:
+; CHECK: ldr r0, LCPI
+; CHECK: add sp, r0
+; CHECK: mov sp, r7
+; CHECK: sub sp, #4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
define i32 @test3() {
- %retval = alloca i32, align 4
- %tmp = alloca i32, align 4
- %a = alloca [805306369 x i8], align 16
- store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
- ret i32 %tmp1
+; CHECK: test3:
+; CHECK: ldr r1, LCPI
+; CHECK: add sp, r1
+; CHECK: ldr r1, LCPI
+; CHECK: add r1, sp
+; CHECK: mov sp, r7
+; CHECK: sub sp, #4
+ %retval = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %a = alloca [805306369 x i8], align 16
+ store i32 0, i32* %tmp
+ %tmp1 = load i32* %tmp
+ ret i32 %tmp1
}