summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp100
-rw-r--r--test/CodeGen/X86/dynamic-allocas-VLAs.ll23
-rw-r--r--test/CodeGen/X86/force-align-stack-alloca.ll16
-rw-r--r--test/CodeGen/X86/pr11468.ll33
4 files changed, 95 insertions, 77 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index bf0ba09e23..2775736717 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -722,10 +722,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- if (RegInfo->needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
-
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ if (RegInfo->needsStackRealignment(MF)) {
+ // Callee-saved registers are pushed on stack before the stack
+ // is realigned.
+ FrameSize -= X86FI->getCalleeSavedFrameSize();
+ NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+ } else {
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ }
// Get the offset of the stack slot for the EBP register, which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
@@ -782,19 +786,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(FramePtr);
-
- // Realign stack
- if (RegInfo->needsStackRealignment(MF)) {
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
- .addReg(StackPtr)
- .addImm(-MaxAlign)
- .setMIFlag(MachineInstr::FrameSetup);
-
- // The EFLAGS implicit def is dead.
- MI->getOperand(3).setIsDead();
- }
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
@@ -824,6 +815,27 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
+ // Realign stack after we pushed callee-saved registers (so that we'll be
+ // able to calculate their offsets from the frame pointer).
+
+ // NOTE: We push the registers before realigning the stack, so
+ // vector callee-saved (xmm) registers may be saved w/o proper
+ // alignment in this way. However, currently these regs are saved in
+ // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
+ // this shouldn't be a problem.
+ if (RegInfo->needsStackRealignment(MF)) {
+ assert(HasFP && "There should be a frame pointer if stack is realigned.");
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
+ .addReg(StackPtr)
+ .addImm(-MaxAlign)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+
DL = MBB.findDebugLoc(MBBI);
// If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -975,7 +987,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned SlotSize = RegInfo->getSlotSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
- unsigned BasePtr = RegInfo->getBaseRegister();
switch (RetOpcode) {
default:
@@ -1013,10 +1024,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- if (RegInfo->needsStackRealignment(MF))
- FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
-
- NumBytes = FrameSize - CSSize;
+ if (RegInfo->needsStackRealignment(MF)) {
+ // Callee-saved registers were pushed on stack before the stack
+ // was realigned.
+ FrameSize -= CSSize;
+ NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+ } else {
+ NumBytes = FrameSize - CSSize;
+ }
// Pop EBP.
BuildMI(MBB, MBBI, DL,
@@ -1026,7 +1041,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
// Skip the callee-saved pop instructions.
- MachineBasicBlock::iterator LastCSPop = MBBI;
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
@@ -1037,6 +1051,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
+ MachineBasicBlock::iterator FirstCSPop = MBBI;
DL = MBBI->getDebugLoc();
@@ -1045,40 +1060,19 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes || MFI->hasVarSizedObjects())
mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
- // Restore the SP from the BP, if necessary.
- if (RegInfo->hasBasePointer(MF)) {
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(BasePtr);
-
- // When restoring from the BP we must use a cached SP adjustment.
- NumBytes = X86FI->getBasePtrStackAdjustment();
- }
-
// If dynamic alloca is used, then reset esp to point to the last callee-saved
// slot before popping them off! Same applies for the case, when stack was
// realigned.
- if (RegInfo->needsStackRealignment(MF)) {
- // We cannot use LEA here, because stack pointer was realigned. We need to
- // deallocate local frame back.
- if (CSSize) {
- emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
- *RegInfo);
- MBBI = prior(LastCSPop);
- }
-
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
- StackPtr).addReg(FramePtr);
- } else if (MFI->hasVarSizedObjects()) {
- if (CSSize) {
- unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
- MachineInstr *MI =
- addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
- MBB.insert(MBBI, MI);
+ if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+ if (RegInfo->needsStackRealignment(MF))
+ MBBI = FirstCSPop;
+ if (CSSize != 0) {
+ unsigned Opc = getLEArOpcode(Is64Bit);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
} else {
- BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+ unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(FramePtr);
}
} else if (NumBytes) {
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index c7970d491e..54ae39b711 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -85,20 +85,19 @@ entry:
; CHECK: _t4
; CHECK: pushq %rbp
; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
; CHECK: pushq %r14
; CHECK: pushq %rbx
-; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
; CHECK: movq %rsp, %rbx
;
; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
; CHECK: callq _t4_helper
;
-; CHECK: addq $[[STACKADJ]], %rsp
+; CHECK: leaq -16(%rbp), %rsp
; CHECK: popq %rbx
; CHECK: popq %r14
-; CHECK: movq %rbp, %rsp
; CHECK: popq %rbp
}
@@ -176,19 +175,17 @@ entry:
; CHECK: _t7
; CHECK: pushq %rbp
; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
; CHECK: pushq %rbx
-; CHECK: subq $[[ADJ:[0-9]+]], %rsp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
; CHECK: movq %rsp, %rbx
; Stack adjustment for byval
; CHECK: subq {{.*}}, %rsp
; CHECK: callq _bar
; CHECK-NOT: addq {{.*}}, %rsp
-; CHECK: movq %rbx, %rsp
-; CHECK: addq $[[ADJ]], %rsp
+; CHECK: leaq -8(%rbp), %rsp
; CHECK: popq %rbx
-; CHECK: movq %rbp, %rsp
; CHECK: popq %rbp
}
@@ -229,14 +226,12 @@ entry:
; FORCE-ALIGN: _t9
; FORCE-ALIGN: pushq %rbp
; FORCE-ALIGN: movq %rsp, %rbp
-; FORCE-ALIGN: andq $-32, %rsp
; FORCE-ALIGN: pushq %rbx
-; FORCE-ALIGN: subq $24, %rsp
+; FORCE-ALIGN: andq $-32, %rsp
+; FORCE-ALIGN: subq $32, %rsp
; FORCE-ALIGN: movq %rsp, %rbx
-; FORCE-ALIGN: movq %rbx, %rsp
-; FORCE-ALIGN: addq $24, %rsp
+; FORCE-ALIGN: leaq -8(%rbp), %rsp
; FORCE-ALIGN: popq %rbx
-; FORCE-ALIGN: movq %rbp, %rsp
; FORCE-ALIGN: popq %rbp
}
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index ecef781f88..6d44559437 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -19,10 +19,10 @@ define i64 @g(i32 %i) nounwind {
; CHECK: g:
; CHECK: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
-; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: pushl
; CHECK-NEXT: pushl
-; CHECK-NEXT: subl $24, %esp
+; CHECK-NEXT: andl $-32, %esp
+; CHECK-NEXT: subl $32, %esp
;
; Now setup the base pointer (%ebx).
; CHECK-NEXT: movl %esp, %ebx
@@ -46,17 +46,13 @@ define i64 @g(i32 %i) nounwind {
; CHECK-NEXT: addl $32, %esp
; CHECK-NOT: {{[^ ,]*}}, %esp
;
-; Restore %esp from %ebx (base pointer) so we can pop the callee-saved
-; registers. This is the state prior to the allocation of VLAs.
+; Restore %esp from %ebp (frame pointer) and subtract the size of
+; zone with callee-saved registers to pop them.
+; This is the state prior to stack realignment and the allocation of VLAs.
; CHECK-NOT: popl
-; CHECK: movl %ebx, %esp
-; CHECK-NEXT: addl $24, %esp
+; CHECK: leal -8(%ebp), %esp
; CHECK-NEXT: popl
; CHECK-NEXT: popl
-;
-; Finally we need to restore %esp from %ebp due to dynamic stack
-; realignment.
-; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
new file mode 100644
index 0000000000..f7e9adb4a2
--- /dev/null
+++ b/test/CodeGen/X86/pr11468.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; PR11468
+
+define void @f(i64 %sz) uwtable {
+entry:
+ %a = alloca i32, align 32
+ store volatile i32 0, i32* %a, align 32
+ ; force to push r14 on stack
+ call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+ ret void
+
+; CHECK: _f
+; CHECK: pushq %rbp
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: movq %rsp, %rbp
+; CHECK: .cfi_def_cfa_register %rbp
+
+; We first push register on stack, and then realign it, so that
+; .cfi_offset value is correct
+; CHECK: pushq %r14
+; CHECK: andq $-32, %rsp
+; CHECK: .cfi_offset %r14, -24
+
+; Restore %rsp from %rbp and subtract the total size of saved regsiters.
+; CHECK: leaq -8(%rbp), %rsp
+
+; Pop saved registers.
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+!0 = metadata !{i32 125}
+