diff options
-rw-r--r-- | include/llvm/CodeGen/MachineFrameInfo.h | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 51 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 61 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.h | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/alloca-align-rounding-32.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/alloca-align-rounding.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/force-align-stack-alloca.ll | 21 |
7 files changed, 139 insertions, 19 deletions
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 8b958e437e..78898a4a69 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -215,6 +215,10 @@ class MachineFrameInfo { /// just allocate them normally. bool UseLocalStackAllocationBlock; + /// After the stack pointer has been restore from the base pointer we + /// use a cached adjusment. Currently only used for x86. + int64_t BPAdj; + public: explicit MachineFrameInfo(const TargetFrameLowering &tfi) : TFI(tfi) { StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0; @@ -230,6 +234,7 @@ public: LocalFrameSize = 0; LocalFrameMaxAlign = 0; UseLocalStackAllocationBlock = false; + BPAdj = 0; } /// hasStackObjects - Return true if there are any stack objects in this @@ -538,6 +543,16 @@ public: void setCalleeSavedInfoValid(bool v) { CSIValid = v; } + /// setBasePtrStackAdjustment - If we're restoring the stack pointer from the + /// base pointer, due to dynamic stack realignment + VLAs, we cache the + /// number of bytes initially allocated for the stack frame. In obscure + /// cases (e.g., tail calls with byval argument and no stack protector), the + /// stack gets adjusted outside of the prolog, but these shouldn't be + /// considered when restoring from the base pointer. Currently, this is only + /// needed for x86. + void setBasePtrStackAdjustment(int64_t adj) { BPAdj = adj; } + int64_t getBasePtrStackAdjustment() const { return BPAdj; } + /// getPristineRegs - Return a set of physical registers that are pristine on /// entry to the MBB. /// diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 7e7f364504..21ad062eda 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -650,6 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); + unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -913,6 +914,20 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, UseLEA, TII, *RegInfo); + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (RegInfo->hasBasePointer(MF)) { + // Update the frame pointer with the current stack pointer. + unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; + BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + + MFI->setBasePtrStackAdjustment(NumBytes); + } + if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); @@ -960,6 +975,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); + unsigned BasePtr = RegInfo->getBaseRegister(); switch (RetOpcode) { default: @@ -1029,6 +1045,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes || MFI->hasVarSizedObjects()) mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + // Restore the SP from the BP, if necessary. + if (RegInfo->hasBasePointer(MF)) { + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(BasePtr); + + // When restoring from the BP we must use a cached SP adjustment. + NumBytes = MFI->getBasePtrStackAdjustment(); + } + // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was // realigned. @@ -1147,7 +1172,16 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); - if (RegInfo->needsStackRealignment(MF)) { + if (RegInfo->hasBasePointer(MF)) { + assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); + if (FI < 0) { + // Skip the saved EBP. + return Offset + RegInfo->getSlotSize(); + } else { + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); + return Offset + StackSize; + } + } else if (RegInfo->needsStackRealignment(MF)) { if (FI < 0) { // Skip the saved EBP. return Offset + RegInfo->getSlotSize(); @@ -1178,9 +1212,14 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); // We can't calculate offset from frame pointer if the stack is realigned, - // so enforce usage of stack pointer. - FrameReg = (RegInfo->needsStackRealignment(MF)) ? - RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF); + // so enforce usage of stack/base pointer. The base pointer is used when we + // have dynamic allocas in addition to dynamic realignment. + if (RegInfo->hasBasePointer(MF)) + FrameReg = RegInfo->getBaseRegister(); + else if (RegInfo->needsStackRealignment(MF)) + FrameReg = RegInfo->getStackRegister(); + else + FrameReg = RegInfo->getFrameRegister(MF); return getFrameIndexOffset(MF, FI); } @@ -1317,6 +1356,10 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, "Slot for EBP register must be last in order to be found!"); (void)FrameIdx; } + + // Spill the BasePtr if it's used. + if (RegInfo->hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); } static bool diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index b22a086b24..acf53f81ff 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,6 +50,10 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); +cl::opt<bool> +EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() @@ -68,10 +72,12 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, SlotSize = 8; StackPtr = X86::RSP; FramePtr = X86::RBP; + BasePtr = X86::RBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; + BasePtr = X86::EBX; } } @@ -290,6 +296,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } + // Set the base-pointer register and its aliases as reserved if needed. + if (hasBasePointer(MF)) { + CallingConv::ID CC = MF.getFunction()->getCallingConv(); + const uint32_t* RegMask = getCallPreservedMask(CC); + if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) + report_fatal_error( + "Stack realignment in presence of dynamic allocas is not supported with" + "this calling convention."); + + Reserved.set(getBaseRegister()); + for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I) + Reserved.set(*I); + } + // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -340,10 +360,36 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// +bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + if (!EnableBasePointer) + return false; + + // When we need stack realignment and there are dynamic allocas, we can't + // reference off of the stack pointer, so we reserve a base pointer. + if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + return true; + + return false; +} + bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (MF.getTarget().Options.RealignStack && - !MFI->hasVarSizedObjects()); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + if (!MF.getTarget().Options.RealignStack) + return false; + + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(FramePtr)) + return false; + + // If a base pointer is necessary. Check that it isn't too late to reserve + // it. + if (MFI->hasVarSizedObjects()) + return MRI->canReserveReg(BasePtr); + return true; } bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { @@ -353,13 +399,6 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); - // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas. - // FIXME: It's more complicated than this... - if (0 && requiresRealignment && MFI->hasVarSizedObjects()) - report_fatal_error( - "Stack realignment in presence of dynamic allocas is not supported"); - // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); @@ -499,7 +538,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; - if (needsStackRealignment(MF)) + if (hasBasePointer(MF)) + BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); + else if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else if (AfterFPPop) BasePtr = StackPtr; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index ee69842b10..1bc32cbb78 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -50,6 +50,11 @@ private: /// unsigned FramePtr; + /// BasePtr - X86 physical register used as a base ptr in complex stack + /// frames. I.e., when we need a 3rd base, not just SP and FP, due to + /// variable size stack objects. + unsigned BasePtr; + public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); @@ -106,6 +111,8 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; + bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; @@ -123,6 +130,7 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } + unsigned getBaseRegister() const { return BasePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll index 8a8b044d14..a45284e10c 100644 --- a/test/CodeGen/X86/alloca-align-rounding-32.ll +++ b/test/CodeGen/X86/alloca-align-rounding-32.ll @@ -15,5 +15,6 @@ define void @foo2(i32 %h) { call void @bar(<2 x i64>* %p) ret void ; CHECK: foo2 +; CHECK: andl $-32, %esp ; CHECK: andl $-32, %eax } diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll index 7bc880625c..3d76fb0aa2 100644 --- a/test/CodeGen/X86/alloca-align-rounding.ll +++ b/test/CodeGen/X86/alloca-align-rounding.ll @@ -15,5 +15,6 @@ define void @foo2(i64 %h) { call void @bar(<2 x i64>* %p) ret void ; CHECK: foo2 +; CHECK: andq $-32, %rsp ; CHECK: andq $-32, %rax } diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll index 48f963f58e..ecef781f88 100644 --- a/test/CodeGen/X86/force-align-stack-alloca.ll +++ b/test/CodeGen/X86/force-align-stack-alloca.ll @@ -17,10 +17,15 @@ entry: define i64 @g(i32 %i) nounwind { ; CHECK: g: -; CHECK: pushl +; CHECK: pushl %ebp ; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: andl $-32, %esp ; CHECK-NEXT: pushl -; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: pushl +; CHECK-NEXT: subl $24, %esp +; +; Now setup the base pointer (%ebx). +; CHECK-NEXT: movl %esp, %ebx ; CHECK-NOT: {{[^ ,]*}}, %esp ; ; The next adjustment of the stack is due to the alloca. @@ -41,12 +46,18 @@ define i64 @g(i32 %i) nounwind { ; CHECK-NEXT: addl $32, %esp ; CHECK-NOT: {{[^ ,]*}}, %esp ; -; Finally we nede to restore %esp from %ebp, the alloca prevents us from -; restoring it directly. +; Restore %esp from %ebx (base pointer) so we can pop the callee-saved +; registers. This is the state prior to the allocation of VLAs. ; CHECK-NOT: popl -; CHECK: leal -4(%ebp), %esp +; CHECK: movl %ebx, %esp +; CHECK-NEXT: addl $24, %esp ; CHECK-NEXT: popl ; CHECK-NEXT: popl +; +; Finally we need to restore %esp from %ebp due to dynamic stack +; realignment. +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp ; CHECK-NEXT: ret entry: |