summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@apple.com>2012-11-26 22:55:05 +0000
committerChad Rosier <mcrosier@apple.com>2012-11-26 22:55:05 +0000
commit1243922fc1a1e3d2681ed9e78503eeabd875ba93 (patch)
treee10f57435c2c026887a1f67c516df20c98a83ddb
parentbb8ddc7e4ff1f06c7289865bbf9e1fa4632d1c8e (diff)
downloadllvm-1243922fc1a1e3d2681ed9e78503eeabd875ba93.tar.gz
llvm-1243922fc1a1e3d2681ed9e78503eeabd875ba93.tar.bz2
llvm-1243922fc1a1e3d2681ed9e78503eeabd875ba93.tar.xz
Remove the X86 Maximal Stack Alignment Check pass as it is no longer necessary.
This pass was conservative in that it always reserved the FP to enable dynamic stack realignment, which allowed the RA to use aligned spills for vector registers. This happens even when spills were not necessary. The RA has since been improved to use unaligned spills when necessary. The new behavior is to realign the stack if the frame pointer was already reserved for some other reason, but don't reserve the frame pointer just because a function contains vector virtual registers. Part of rdar://12719844 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168627 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp43
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp1
-rw-r--r--test/CodeGen/X86/avx-intel-ocl.ll32
-rw-r--r--test/CodeGen/X86/dynamic-allocas-VLAs.ll12
5 files changed, 19 insertions, 75 deletions
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index dce5b4d2b0..1e7b98d94f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -63,12 +63,6 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
///
FunctionPass *createEmitX86CodeToMemory();
-/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
-/// which determines whether the frame pointer register should be
-/// reserved in case dynamic stack alignment is later required.
-///
-FunctionPass *createX86MaxStackAlignmentHeuristicPass();
-
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 11f2d7aa70..48f5ae176a 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -789,46 +789,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
}
}
}
-
-namespace {
- struct MSAH : public MachineFunctionPass {
- static char ID;
- MSAH() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- const X86TargetMachine *TM =
- static_cast<const X86TargetMachine *>(&MF.getTarget());
- const TargetFrameLowering *TFI = TM->getFrameLowering();
- MachineRegisterInfo &RI = MF.getRegInfo();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned StackAlignment = TFI->getStackAlignment();
-
- // Be over-conservative: scan over all vreg defs and find whether vector
- // registers are used. If yes, there is a possibility that vector register
- // will be spilled and thus require dynamic stack realignment.
- for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
- FuncInfo->setForceFramePointer(true);
- return true;
- }
- }
- // Nothing to do
- return false;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Check";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAH::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 158f9dc066..7ee9408f8b 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -171,7 +171,6 @@ bool X86PassConfig::addInstSelector() {
}
bool X86PassConfig::addPreRegAlloc() {
- addPass(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 4b7835b190..0fec9658d6 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -87,23 +87,23 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
-; X64: vmovaps {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rbp) ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
; X64: call
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
-; X64: vmovaps {{.*}}(%rbp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
ret <16 x float> %c
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index c5e47facf3..9405f76cbe 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -103,7 +103,7 @@ entry:
declare void @t4_helper(i32*, i32*, <8 x float>*)
-; Dynamic realignment + Spill
+; Spilling an AVX register shouldn't cause dynamic realignment
define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
entry:
%a = alloca i32, align 4
@@ -116,21 +116,15 @@ entry:
ret i32 %add
; CHECK: _t5
-; CHECK: pushq %rbp
-; CHECK: movq %rsp, %rbp
-; CHECK: andq $-32, %rsp
; CHECK: subq ${{[0-9]+}}, %rsp
;
; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
-; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: vmovups [[AVXREG]], (%rsp)
; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK: callq _t5_helper1
-; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: vmovups (%rsp), %ymm0
; CHECK: callq _t5_helper2
; CHECK: movl {{[0-9]+}}(%rsp), %eax
-;
-; CHECK: movq %rbp, %rsp
-; CHECK: popq %rbp
}
declare void @t5_helper1(i32*)