summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNAKAMURA Takumi <geek4civic@gmail.com>2014-06-25 12:41:52 +0000
committerNAKAMURA Takumi <geek4civic@gmail.com>2014-06-25 12:41:52 +0000
commitb720a3d15c0bcd9353237427e1da2e2fee1cb516 (patch)
tree99c46b916e82d9c5f0be01510c8d368d331bbef6
parent2fc4d9923f951f331a84f7da0579ceb2cf42c472 (diff)
downloadllvm-b720a3d15c0bcd9353237427e1da2e2fee1cb516.tar.gz
llvm-b720a3d15c0bcd9353237427e1da2e2fee1cb516.tar.bz2
llvm-b720a3d15c0bcd9353237427e1da2e2fee1cb516.tar.xz
Re-apply r211399, "Generate native unwind info on Win64" with a fix to ignore SEH pseudo ops in X86 JIT emitter.
-- This patch enables LLVM to emit Win64-native unwind info rather than DWARF CFI. It handles all corner cases (I hope), including stack realignment. Because the unwind info is not flexible enough to describe stack frames with a gap of unknown size in the middle, such as the one caused by stack realignment, I modified register spilling code to place all spills into the fixed frame slots, so that they can be accessed relative to the frame pointer. Patch by Vadim Chugunov! Reviewed By: rnk Differential Revision: http://reviews.llvm.org/D4081 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211691 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/MachineFrameInfo.h3
-rw-r--r--include/llvm/MC/MCAsmInfo.h1
-rw-r--r--include/llvm/Target/TargetFrameLowering.h13
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp13
-rw-r--r--lib/CodeGen/MachineFunction.cpp15
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp87
-rw-r--r--lib/MC/MCObjectFileInfo.cpp15
-rw-r--r--lib/MC/MCStreamer.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp12
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp10
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp349
-rw-r--r--lib/Target/X86/X86FrameLowering.h9
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp5
-rw-r--r--lib/Target/X86/X86InstrCompiler.td20
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp37
-rw-r--r--test/CodeGen/X86/2007-05-05-Personality.ll6
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll4
-rw-r--r--test/CodeGen/X86/avx-intel-ocl.ll62
-rw-r--r--test/CodeGen/X86/gcc_except_table.ll10
-rw-r--r--test/CodeGen/X86/win64_eh.ll170
20 files changed, 636 insertions, 207 deletions
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index bd0ea11911..c51f8fe03b 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -484,6 +484,9 @@ public:
///
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable);
+ /// CreateFixedSpillStackObject - Create a spill slot at a fixed location
+ /// on the stack. Returns an index with a negative value.
+ int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset);
/// isFixedObjectIndex - Returns true if the specified index corresponds to a
/// fixed stack object.
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 4c0624521d..764c1cb8ce 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -481,6 +481,7 @@ public:
bool isExceptionHandlingDwarf() const {
return (ExceptionsType == ExceptionHandling::DwarfCFI ||
ExceptionsType == ExceptionHandling::ARM ||
+ // Win64 handler data still uses DWARF LSDA encoding.
ExceptionsType == ExceptionHandling::Win64);
}
bool doesDwarfUseRelocationsAcrossSections() const {
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index 7c42e23a11..bfddd06017 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -93,6 +93,19 @@ public:
/// stack pointer.
virtual bool isFPCloseToIncomingSP() const { return true; }
+ /// assignCalleeSavedSpillSlots - Allows target to override spill slot
+ /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should
+ /// assign frame slots to all CSI entries and return true. If this method
+ /// returns false, spill slots will be assigned using generic implementation.
+ /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of
+ /// CSI.
+ virtual bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ return false;
+ }
+
/// getCalleeSavedSpillSlots - This method returns a pointer to an array of
/// pairs, that contains an entry for each callee saved register that must be
/// spilled to a particular stack location if it is spilled.
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 4768a43e9a..89bf89fdd7 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -77,9 +77,9 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality)
return;
- MCSymbol *GCCHandlerSym =
- Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
- Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
+ Asm->OutStreamer.EmitWin64EHHandler(PersHandlerSym, true, true);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
Asm->getFunctionNumber()));
@@ -98,15 +98,8 @@ void Win64Exception::endFunction(const MachineFunction *) {
MMI->TidyLandingPads();
if (shouldEmitPersonality) {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
- const MCSymbol *Sym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-
Asm->OutStreamer.PushSection();
Asm->OutStreamer.EmitWin64EHHandlerData();
- Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
- 4);
emitExceptionTable();
Asm->OutStreamer.PopSection();
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index d432f3deb0..6138aef4ad 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -578,6 +578,21 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
return -++NumFixedObjects;
}
+/// CreateFixedSpillStackObject - Create a spill slot at a fixed location
+/// on the stack. Returns an index with a negative value.
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+ int64_t SPOffset) {
+ unsigned StackAlign = getFrameLowering()->getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+ !RealignOption,
+ Align, getFrameLowering()->getStackAlignment());
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
+ /*Immutable*/ true,
+ /*isSS*/ true,
+ /*Alloca*/ nullptr));
+ return -++NumFixedObjects;
+}
BitVector
MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 1ba2c7418f..b98d210e9d 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
}
}
- if (CSI.empty())
- return; // Early exit if no callee saved registers are modified!
-
- unsigned NumFixedSpillSlots;
- const TargetFrameLowering::SpillSlot *FixedSpillSlots =
- TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ // If target doesn't implement this, use generic code.
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+ I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
- // Now that we know which registers need to be saved and restored, allocate
- // stack slots for them.
- for (std::vector<CalleeSavedInfo>::iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ }
- int FrameIdx;
- if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
I->setFrameIdx(FrameIdx);
- continue;
}
-
- // Check to see if this physreg must be spilled to a particular stack slot
- // on this target.
- const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
- while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
- FixedSlot->Reg != Reg)
- ++FixedSlot;
-
- if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
- // Nope, just spill it anywhere convenient.
- unsigned Align = RC->getAlignment();
- unsigned StackAlign = TFI->getStackAlignment();
-
- // We may not be able to satisfy the desired alignment specification of
- // the TargetRegisterClass if the stack alignment is smaller. Use the
- // min.
- Align = std::min(Align, StackAlign);
- FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
- if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
- if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
- } else {
- // Spill it to the stack where we must.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
- }
-
- I->setFrameIdx(FrameIdx);
}
MFI->setCalleeSavedInfo(CSI);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index b602656e0e..d490ef30b6 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -649,11 +649,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
// though it contains relocatable pointers. In PIC mode, this is probably a
// big runtime hit for C++ apps. Either the contents of the LSDA need to be
// adjusted or this should be a data section.
- LSDASection =
- Ctx->getCOFFSection(".gcc_except_table",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ assert(T.isOSWindows() && "Windows is the only supported COFF target");
+ if (T.getArch() == Triple::x86_64) {
+ // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
+ LSDASection = 0;
+ } else {
+ LSDASection = Ctx->getCOFFSection(".gcc_except_table",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ }
// Debug info.
COFFDebugSymbolsSection =
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 08f8066652..8e675e5f4e 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -504,6 +504,8 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
+ if (Offset > 240)
+ report_fatal_error("Frame offset must be less than or equal to 240!");
MCSymbol *Label = getContext().CreateTempSymbol();
MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset);
EmitLabel(Label);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 3f80343f01..d2e8caf728 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -143,8 +143,11 @@ getNonexecutableStackSection(MCContext &Ctx) const {
void X86MCAsmInfoMicrosoft::anchor() { }
X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
+ PointerSize = 8;
+ ExceptionsType = ExceptionHandling::Win64;
+ }
AssemblerDialect = AsmWriterFlavor;
@@ -158,17 +161,18 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
void X86MCAsmInfoGNUCOFF::anchor() { }
X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
+ assert(Triple.isOSWindows() && "Windows is the only supported COFF target");
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PointerSize = 8;
+ ExceptionsType = ExceptionHandling::Win64;
+ } else {
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
- // Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfCFI;
-
UseIntegratedAssembler = true;
}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index b275a9cc3e..a3ae7ee315 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1131,6 +1131,16 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
break;
+
+ case X86::SEH_PushReg:
+ case X86::SEH_SaveReg:
+ case X86::SEH_SaveXMM:
+ case X86::SEH_StackAlloc:
+ case X86::SEH_SetFrame:
+ case X86::SEH_PushFrame:
+ case X86::SEH_EndPrologue:
+ break;
+
case X86::MOVPC32r: {
// This emits the "call" portion of this pseudo instruction.
MCE.emitByte(BaseOpcode);
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index acdede753d..886ee1ef0e 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -305,9 +306,10 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
-void X86FrameLowering::emitCalleeSavedFrameMoves(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned FramePtr) const {
+void
+X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
@@ -318,53 +320,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.empty()) return;
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth = -RegInfo->getSlotSize();
-
- // FIXME: This is dirty hack. The code itself is pretty mess right now.
- // It should be rewritten from scratch and generalized sometimes.
-
- // Determine maximum offset (minimum due to stack growth).
- int64_t MaxOffset = 0;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I)
- MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(I->getFrameIdx()));
-
// Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
unsigned Reg = I->getReg();
- Offset = MaxOffset - Offset + saveAreaOffset;
-
- // Don't output a new machine move if we're re-saving the frame
- // pointer. This happens when the PrologEpilogInserter has inserted an extra
- // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
- // generates one when frame pointers are used. If we generate a "machine
- // move" for this extra "PUSH", the linker will lose track of the fact that
- // the frame pointer should have the value of the first "PUSH" when it's
- // trying to unwind.
- //
- // FIXME: This looks inelegant. It's possibly correct, but it's covering up
- // another bug. I.e., one where we generate a prolog like this:
- //
- // pushl %ebp
- // movl %esp, %ebp
- // pushl %ebp
- // pushl %esi
- // ...
- //
- // The immediate re-push of EBP is unnecessary. At the least, it's an
- // optimization bug. EBP can be used as a scratch register in certain
- // cases, but probably not when we have a frame pointer.
- if (HasFP && FramePtr == Reg)
- continue;
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex =
@@ -396,6 +356,84 @@ static bool usesTheStack(const MachineFunction &MF) {
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
/// generate the exception handling frames.
+
+/*
+ Here's a gist of what gets emitted:
+
+ ; Establish frame pointer, if needed
+ [if needs FP]
+ push %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ .seh_pushreg %rpb
+ mov %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+
+ ; Spill general-purpose registers
+ [for all callee-saved GPRs]
+ pushq %<reg>
+ [if not needs FP]
+ .cfi_def_cfa_offset (offset from RETADDR)
+ .seh_pushreg %<reg>
+
+ ; If the required stack alignment > default stack alignment
+ ; rsp needs to be re-aligned. This creates a "re-alignment gap"
+ ; of unknown size in the stack frame.
+ [if stack needs re-alignment]
+ and $MASK, %rsp
+
+ ; Allocate space for locals
+ [if target is Windows and allocated space > 4096 bytes]
+ ; Windows needs special care for allocations larger
+ ; than one page.
+ mov $NNN, %rax
+ call ___chkstk_ms/___chkstk
+ sub %rax, %rsp
+ [else]
+ sub $NNN, %rsp
+
+ [if needs FP]
+ .seh_stackalloc (size of XMM spill slots)
+ .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
+ [else]
+ .seh_stackalloc NNN
+
+ ; Spill XMMs
+ ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
+ ; they may get spilled on any platform, if the current function
+ ; calls @llvm.eh.unwind.init
+ [if needs FP]
+ [for all callee-saved XMM registers]
+ movaps %<xmm reg>, -MMM(%rbp)
+ [for all callee-saved XMM registers]
+ .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
+ ; i.e. the offset relative to (%rbp - SEHFrameOffset)
+ [else]
+ [for all callee-saved XMM registers]
+ movaps %<xmm reg>, KKK(%rsp)
+ [for all callee-saved XMM registers]
+ .seh_savexmm %<xmm reg>, KKK
+
+ .seh_endprologue
+
+ [if needs base pointer]
+ mov %rsp, %rbx
+
+ ; Emit CFI info
+ [if needs FP]
+ [for all callee-saved registers]
+ .cfi_offset %<reg>, (offset from %rbp)
+ [else]
+ .cfi_def_cfa_offset (offset from RETADDR)
+ [for all callee-saved registers]
+ .cfi_offset %<reg>, (offset from %rsp)
+
+ Notes:
+ - .seh directives are emitted only for Windows 64 ABI
+ - .cfi directives are emitted for all other ABIs
+ - for 32-bit code, substitute %e?? registers for %r??
+*/
+
void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -406,8 +444,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- Fn->needsUnwindTableEntry();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
@@ -415,6 +451,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
bool IsWin64 = STI.isTargetWin64();
+ bool IsSEH =
+ MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::Win64; // Not necessarily synonymous with IsWin64.
+ bool NeedsWin64SEH = IsSEH && Fn->needsUnwindTableEntry();
+ bool NeedsDwarfCFI =
+ !IsSEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
@@ -512,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(FramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
- if (needsFrameMoves) {
+ if (NeedsDwarfCFI) {
// Mark the place where EBP/RBP was saved.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
@@ -530,13 +572,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
+ if (NeedsWin64SEH) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+ .addImm(FramePtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
- if (needsFrameMoves) {
+ if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
@@ -546,9 +594,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I)
+ // Mark the FramePtr as live-in in every block.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
I->addLiveIn(FramePtr);
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
@@ -562,10 +609,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
- MBBI->setFlag(MachineInstr::FrameSetup);
+ unsigned Reg = MBBI->getOperand(0).getReg();
++MBBI;
- if (!HasFP && needsFrameMoves) {
+ if (!HasFP && NeedsDwarfCFI) {
// Mark callee-saved push instruction.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
@@ -575,16 +622,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
StackOffset += stackGrowth;
}
+
+ if (NeedsWin64SEH) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
+ MachineInstr::FrameSetup);
+ }
}
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
-
- // NOTE: We push the registers before realigning the stack, so
- // vector callee-saved (xmm) registers may be saved w/o proper
- // alignment in this way. However, currently these regs are saved in
- // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
- // this shouldn't be a problem.
if (RegInfo->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
MachineInstr *MI =
@@ -683,23 +729,85 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
- } else if (NumBytes)
+ } else if (NumBytes) {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
+ }
+
+ int SEHFrameOffset = 0;
+ if (NeedsWin64SEH) {
+ if (HasFP) {
+ // We need to set frame base offset low enough such that all saved
+ // register offsets would be positive relative to it, but we can't
+ // just use NumBytes, because .seh_setframe offset must be <=240.
+ // So we pretend to have only allocated enough space to spill the
+ // non-volatile registers.
+ // We don't care about the rest of stack allocation, because unwinder
+ // will restore SP to (BP - SEHFrameOffset)
+ for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
+ int offset = MFI->getObjectOffset(Info.getFrameIdx());
+ SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
+ }
+ SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
+
+ // This only needs to account for XMM spill slots, GPR slots
+ // are covered by .seh_pushreg's emitted above.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+ .addImm(SEHFrameOffset - X86FI->getCalleeSavedFrameSize())
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
+ .addImm(FramePtr)
+ .addImm(SEHFrameOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ // SP will be the base register for restoring XMMs
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
+ // Skip the rest of register spilling code
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ // Emit SEH info for non-GPRs
+ if (NeedsWin64SEH) {
+ for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
+ unsigned Reg = Info.getReg();
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
+ assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
+
+ int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
+ Offset += SEHFrameOffset;
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
+ .addImm(Reg)
+ .addImm(Offset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
// to reference locals.
if (RegInfo->hasBasePointer(MF)) {
- // Update the frame pointer with the current stack pointer.
+ // Update the base pointer with the current stack pointer.
unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
}
- if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+ if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
// Mark end of stack pointer adjustment.
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -714,7 +822,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Emit DWARF info specifying the offsets of the callee-saved registers.
if (PushedRegs)
- emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr);
+ emitCalleeSavedFrameMoves(MBB, MBBI, DL);
}
}
@@ -974,24 +1082,79 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return getFrameIndexOffset(MF, FI);
}
-bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
+bool X86FrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ unsigned SlotSize = RegInfo->getSlotSize();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+ unsigned CalleeSavedFrameSize = 0;
+ int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
+
+ if (hasFP(MF)) {
+ // emitPrologue always spills frame register the first thing.
+ SpillSlotOffset -= SlotSize;
+ MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+
+ // Since emitPrologue and emitEpilogue will handle spilling and restoring of
+ // the frame register, we can delete it from CSI list and not have to worry
+ // about avoiding it later.
+ unsigned FPReg = RegInfo->getFrameRegister(MF);
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ if (CSI[i].getReg() == FPReg) {
+ CSI.erase(CSI.begin() + i);
+ break;
+ }
+ }
+ }
+
+ // Assign slots for GPRs. It increases frame size.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
+ continue;
+
+ SpillSlotOffset -= SlotSize;
+ CalleeSavedFrameSize += SlotSize;
+
+ int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
+
+ // Assign slots for XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ // ensure alignment
+ SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
+ // spill into slot
+ SpillSlotOffset -= RC->getSize();
+ int SlotIndex =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ MFI->ensureMaxAlignment(RC->getAlignment());
+ }
+ return true;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction &MF = *MBB.getParent();
- const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
- unsigned SlotSize = RegInfo->getSlotSize();
- unsigned FPReg = TRI->getFrameRegister(MF);
- unsigned CalleeFrameSize = 0;
-
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Push GPRs. It increases frame size.
@@ -1003,19 +1166,13 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
- if (Reg == FPReg)
- // X86RegisterInfo::emitPrologue will handle spilling of frame register.
- continue;
- CalleeFrameSize += SlotSize;
+
BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
}
- X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
-
// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
// It can be done by spilling XMMs to stack frame.
- // Note that only Win64 ABI might spill XMMs.
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (X86::GR64RegClass.contains(Reg) ||
@@ -1027,6 +1184,9 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
TRI);
+ --MI;
+ MI->setFlag(MachineInstr::FrameSetup);
+ ++MI;
}
return true;
@@ -1057,16 +1217,13 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
// POP GPRs.
- unsigned FPReg = TRI->getFrameRegister(MF);
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
if (!X86::GR64RegClass.contains(Reg) &&
!X86::GR32RegClass.contains(Reg))
continue;
- if (Reg == FPReg)
- // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
- continue;
+
BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
}
return true;
@@ -1097,22 +1254,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
TailCallReturnAddrDelta - SlotSize, true);
}
- if (hasFP(MF)) {
- assert((TailCallReturnAddrDelta <= 0) &&
- "The Delta should always be zero or negative");
- const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
-
- // Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MFI->CreateFixedObject(SlotSize,
- -(int)SlotSize +
- TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta,
- true);
- assert(FrameIdx == MFI->getObjectIndexBegin() &&
- "Slot for EBP register must be last in order to be found!");
- (void)FrameIdx;
- }
-
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 5c43c1488d..5ad3d4dc2e 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -27,8 +27,8 @@ public:
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned FramePtr) const;
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -42,6 +42,11 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 714fc0ccbd..4105b7377c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -605,9 +605,8 @@ void X86TargetLowering::resetOperationActions() {
}
// FIXME - use subtarget debug flags
- if (!Subtarget->isTargetDarwin() &&
- !Subtarget->isTargetELF() &&
- !Subtarget->isTargetCygMing()) {
+ if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 496a9cb761..77a876df02 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -197,6 +197,26 @@ let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
}
//===----------------------------------------------------------------------===//
+// Pseudo instructions used by unwind info.
+//
+let isPseudo = 1 in {
+ def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
+ "#SEH_PushReg $reg", []>;
+ def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+ "#SEH_SaveReg $reg, $dst", []>;
+ def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+ "#SEH_SaveXMM $reg, $dst", []>;
+ def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
+ "#SEH_StackAlloc $size", []>;
+ def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
+ "#SEH_SetFrame $reg, $offset", []>;
+ def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
+ "#SEH_PushFrame $mode", []>;
+ def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
+ "#SEH_EndPrologue", []>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 0190080b93..3314c641c9 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
+#include "X86RegisterInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -779,6 +780,9 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
@@ -883,6 +887,39 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(X86::R10)
.addReg(X86::RAX));
return;
+
+ case X86::SEH_PushReg:
+ OutStreamer.EmitWin64EHPushReg(
+ RI->getSEHRegNum(MI->getOperand(0).getImm()));
+ return;
+
+ case X86::SEH_SaveReg:
+ OutStreamer.EmitWin64EHSaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_SaveXMM:
+ OutStreamer.EmitWin64EHSaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_StackAlloc:
+ OutStreamer.EmitWin64EHAllocStack(MI->getOperand(0).getImm());
+ return;
+
+ case X86::SEH_SetFrame:
+ OutStreamer.EmitWin64EHSetFrame(
+ RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_PushFrame:
+ OutStreamer.EmitWin64EHPushFrame(MI->getOperand(0).getImm());
+ return;
+
+ case X86::SEH_EndPrologue:
+ OutStreamer.EmitWin64EHEndProlog();
+ return;
}
MCInst TmpInst;
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index 5b8fe72b5d..b99c58c6e4 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -1,12 +1,14 @@
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s --check-prefix=LIN
-; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=LIN
; RUN: llc < %s -mtriple=i386-pc-mingw32 -o - | FileCheck %s --check-prefix=WIN
; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN
+; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN64
; LIN: .cfi_personality 0, __gnat_eh_personality
; LIN: .cfi_lsda 0, .Lexception0
; WIN: .cfi_personality 0, ___gnat_eh_personality
; WIN: .cfi_lsda 0, Lexception0
+; WIN64: .seh_handler __gnat_eh_personality
+; WIN64: .seh_handlerdata
@error = external global i8
@@ -15,7 +17,7 @@ entry:
invoke void @raise()
to label %eh_then unwind label %unwind
-unwind: ; preds = %entry
+unwind: ; preds = %entry
%eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
catch i8* @error
%eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 1259cf47b2..dfb98bb1ab 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
; CHECK: subq $40, %rsp
-; CHECK: movaps %xmm8, (%rsp)
-; CHECK: movaps %xmm7, 16(%rsp)
+; CHECK: movaps %xmm8, 16(%rsp)
+; CHECK: movaps %xmm7, (%rsp)
define i32 @a() nounwind {
entry:
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 7337815a39..3e051bff76 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -7,21 +7,21 @@ declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
declare i32 @func_int(i32, i32)
-; WIN64: testf16_inp
+; WIN64-LABEL: testf16_inp
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
; WIN64: vaddps {{.*}}, {{%ymm[0-1]}}
; WIN64: leaq {{.*}}(%rsp), %rcx
; WIN64: call
; WIN64: ret
-; X32: testf16_inp
+; X32-LABEL: testf16_inp
; X32: movl %eax, (%esp)
; X32: vaddps {{.*}}, {{%ymm[0-1]}}
; X32: vaddps {{.*}}, {{%ymm[0-1]}}
; X32: call
; X32: ret
-; X64: testf16_inp
+; X64-LABEL: testf16_inp
; X64: vaddps {{.*}}, {{%ymm[0-1]}}
; X64: vaddps {{.*}}, {{%ymm[0-1]}}
; X64: leaq {{.*}}(%rsp), %rdi
@@ -41,14 +41,14 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
;test calling conventions - preserved registers
; preserved ymm6-ymm15
-; WIN64: testf16_regs
+; WIN64-LABEL: testf16_regs
; WIN64: call
; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; WIN64: ret
; preserved ymm8-ymm15
-; X64: testf16_regs
+; X64-LABEL: testf16_regs
; X64: call
; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
@@ -65,28 +65,30 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
}
; test calling conventions - prolog and epilog
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill
+; WIN64-LABEL: test_prolog_epilog
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill
; WIN64: call
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
-
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload
+
+; X64-LABEL: test_prolog_epilog
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill
@@ -111,12 +113,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
; test functions with integer parameters
; pass parameters on stack for 32-bit platform
+; X32-LABEL: test_int
; X32: movl {{.*}}, 4(%esp)
; X32: movl {{.*}}, (%esp)
; X32: call
; X32: addl {{.*}}, %eax
; pass parameters in registers for 64-bit platform
+; X64-LABEL: test_int
; X64: leal {{.*}}, %edi
; X64: movl {{.*}}, %esi
; X64: call
@@ -128,21 +132,21 @@ define i32 @test_int(i32 %a, i32 %b) nounwind {
ret i32 %c
}
-; WIN64: test_float4
+; WIN64-LABEL: test_float4
; WIN64-NOT: vzeroupper
; WIN64: call
; WIN64-NOT: vzeroupper
; WIN64: call
; WIN64: ret
-; X64: test_float4
+; X64-LABEL: test_float4
; X64-NOT: vzeroupper
; X64: call
; X64-NOT: vzeroupper
; X64: call
; X64: ret
-; X32: test_float4
+; X32-LABEL: test_float4
; X32: vzeroupper
; X32: call
; X32: vzeroupper
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index 8c328ec58f..a732eb1efb 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -13,14 +13,14 @@ define i32 @main() uwtable optsize ssp {
; APPLE: GCC_except_table0:
; APPLE: Lexception0:
-; MINGW64: .cfi_startproc
-; MINGW64: .cfi_personality 0, __gxx_personality_v0
-; MINGW64: .cfi_lsda 0, .Lexception0
-; MINGW64: .cfi_def_cfa_offset 16
+; MINGW64: .seh_proc
+; MINGW64: .seh_handler __gxx_personality_v0
+; MINGW64: .seh_setframe 5, 0
; MINGW64: callq _Unwind_Resume
-; MINGW64: .cfi_endproc
+; MINGW64: .seh_handlerdata
; MINGW64: GCC_except_table0:
; MINGW64: Lexception0:
+; MINGW64: .seh_endproc
; MINGW32: .cfi_startproc
; MINGW32: .cfi_personality 0, ___gxx_personality_v0
diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll
new file mode 100644
index 0000000000..f1f874eb2f
--- /dev/null
+++ b/test/CodeGen/X86/win64_eh.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
+
+; Check function without prolog
+define void @foo0() uwtable {
+entry:
+ ret void
+}
+; WIN64-LABEL: foo0:
+; WIN64: .seh_proc foo0
+; WIN64: .seh_endprologue
+; WIN64: ret
+; WIN64: .seh_endproc
+
+; Checks a small stack allocation
+define void @foo1() uwtable {
+entry:
+ %baz = alloca [2000 x i16], align 2
+ ret void
+}
+; WIN64-LABEL: foo1:
+; WIN64: .seh_proc foo1
+; WIN64: subq $4000, %rsp
+; WIN64: .seh_stackalloc 4000
+; WIN64: .seh_endprologue
+; WIN64: addq $4000, %rsp
+; WIN64: ret
+; WIN64: .seh_endproc
+
+; Checks a stack allocation requiring call to __chkstk/___chkstk_ms
+define void @foo2() uwtable {
+entry:
+ %baz = alloca [4000 x i16], align 2
+ ret void
+}
+; WIN64-LABEL: foo2:
+; WIN64: .seh_proc foo2
+; WIN64: movabsq $8000, %rax
+; WIN64: callq {{__chkstk|___chkstk_ms}}
+; WIN64: subq %rax, %rsp
+; WIN64: .seh_stackalloc 8000
+; WIN64: .seh_endprologue
+; WIN64: addq $8000, %rsp
+; WIN64: ret
+; WIN64: .seh_endproc
+
+
+; Checks stack push
+define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
+entry:
+ %a = alloca i32
+ %b = alloca i32
+ %c = alloca i32
+ %d = alloca i32
+ %e = alloca i32
+ %f = alloca i32
+ store i32 %a_arg, i32* %a
+ store i32 %b_arg, i32* %b
+ store i32 %c_arg, i32* %c
+ store i32 %d_arg, i32* %d
+ store i32 %e_arg, i32* %e
+ store i32 %f_arg, i32* %f
+ %tmp = load i32* %a
+ %tmp1 = mul i32 %tmp, 2
+ %tmp2 = load i32* %b
+ %tmp3 = mul i32 %tmp2, 3
+ %tmp4 = add i32 %tmp1, %tmp3
+ %tmp5 = load i32* %c
+ %tmp6 = mul i32 %tmp5, 5
+ %tmp7 = add i32 %tmp4, %tmp6
+ %tmp8 = load i32* %d
+ %tmp9 = mul i32 %tmp8, 7
+ %tmp10 = add i32 %tmp7, %tmp9
+ %tmp11 = load i32* %e
+ %tmp12 = mul i32 %tmp11, 11
+ %tmp13 = add i32 %tmp10, %tmp12
+ %tmp14 = load i32* %f
+ %tmp15 = mul i32 %tmp14, 13
+ %tmp16 = add i32 %tmp13, %tmp15
+ ret i32 %tmp16
+}
+; WIN64-LABEL: foo3:
+; WIN64: .seh_proc foo3
+; WIN64: pushq %rsi
+; WIN64: .seh_pushreg 6
+; WIN64: subq $24, %rsp
+; WIN64: .seh_stackalloc 24
+; WIN64: .seh_endprologue
+; WIN64: addq $24, %rsp
+; WIN64: popq %rsi
+; WIN64: ret
+; WIN64: .seh_endproc
+
+
+; Check emission of eh handler and handler data
+declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*)
+declare void @_d_eh_resume_unwind(i8*)
+
+declare i32 @bar()
+
+define i32 @foo4() #0 {
+entry:
+ %step = alloca i32, align 4
+ store i32 0, i32* %step
+ %tmp = load i32* %step
+
+ %tmp1 = invoke i32 @bar()
+ to label %finally unwind label %landingpad
+
+finally:
+ store i32 1, i32* %step
+ br label %endtryfinally
+
+landingpad:
+ %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality
+ cleanup
+ %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0
+ store i32 2, i32* %step
+ call void @_d_eh_resume_unwind(i8* %tmp3)
+ unreachable
+
+endtryfinally:
+ %tmp10 = load i32* %step
+ ret i32 %tmp10
+}
+; WIN64-LABEL: foo4:
+; WIN64: .seh_proc foo4
+; WIN64: .seh_handler _d_eh_personality, @unwind, @except
+; WIN64: subq $56, %rsp
+; WIN64: .seh_stackalloc 56
+; WIN64: .seh_endprologue
+; WIN64: addq $56, %rsp
+; WIN64: ret
+; WIN64: .seh_handlerdata
+; WIN64: .seh_endproc
+
+
+; Check stack re-alignment and xmm spilling
+define void @foo5() uwtable {
+entry:
+ %s = alloca i32, align 64
+ call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"()
+ ret void
+}
+; WIN64-LABEL: foo5:
+; WIN64: .seh_proc foo5
+; WIN64: pushq %rbp
+; WIN64: .seh_pushreg 5
+; WIN64: movq %rsp, %rbp
+; WIN64: pushq %rdi
+; WIN64: .seh_pushreg 7
+; WIN64: pushq %rbx
+; WIN64: .seh_pushreg 3
+; WIN64: andq $-64, %rsp
+; WIN64: subq $128, %rsp
+; WIN64: .seh_stackalloc 48
+; WIN64: .seh_setframe 5, 64
+; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill
+; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill
+; WIN64: .seh_savexmm 6, 16
+; WIN64: .seh_savexmm 7, 32
+; WIN64: .seh_endprologue
+; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload
+; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload
+; WIN64: leaq -16(%rbp), %rsp
+; WIN64: popq %rbx
+; WIN64: popq %rdi
+; WIN64: popq %rbp
+; WIN64: retq
+; WIN64: .seh_endproc