summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnton Korobeynikov <asl@math.spbu.ru>2009-08-03 08:12:53 +0000
committerAnton Korobeynikov <asl@math.spbu.ru>2009-08-03 08:12:53 +0000
commitcf6b739d3d4921dc9fc6908ec2009055c0927125 (patch)
treec369c882028da54d8252ec3e61309e1fe2e89018
parent3e4c41a84a2c0f055e2bfef48a66b5890bcfd5e5 (diff)
downloadllvm-cf6b739d3d4921dc9fc6908ec2009055c0927125.tar.gz
llvm-cf6b739d3d4921dc9fc6908ec2009055c0927125.tar.bz2
llvm-cf6b739d3d4921dc9fc6908ec2009055c0927125.tar.xz
Unbreak Win64 CC. Step one: honour register save area, fix some alignment and provide a different set of call-clobberred registers.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77962 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86CallingConv.td8
-rw-r--r--lib/Target/X86/X86CompilationCallback_Win64.asm56
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp5
-rw-r--r--lib/Target/X86/X86Instr64bit.td39
-rw-r--r--lib/Target/X86/X86InstrInfo.td2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp36
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp4
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll2
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll6
9 files changed, 100 insertions, 58 deletions
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index e9fcbd5a48..25da8f8e41 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -201,8 +201,8 @@ def CC_X86_Win64_C : CallingConv<[
[XMM0, XMM1, XMM2, XMM3]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 16-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
@@ -211,8 +211,8 @@ def CC_X86_Win64_C : CallingConv<[
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
- // __m64 vectors get 8-byte stack slots that are 16-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
index 8002f98765..a11c5c3ce4 100644
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -15,52 +15,52 @@ extrn X86CompilationCallback2: PROC
.code
X86CompilationCallback proc
+ ; Save all int arg registers into register spill area.
+ mov [rsp+ 8], rcx
+ mov [rsp+16], rdx
+ mov [rsp+24], r8
+ mov [rsp+32], r9
+
push rbp
- ; Save RSP
+ ; Save RSP.
mov rbp, rsp
- ; Save all int arg registers
- push rcx
- push rdx
- push r8
- push r9
-
; Align stack on 16-byte boundary.
and rsp, -16
- ; Save all XMM arg registers
- sub rsp, 64
- movaps [rsp], xmm0
- movaps [rsp+16], xmm1
- movaps [rsp+32], xmm2
- movaps [rsp+48], xmm3
+ ; Save all XMM arg registers. Also allocate reg spill area.
+ sub rsp, 96
+ movaps [rsp +32], xmm0
+ movaps [rsp+16+32], xmm1
+ movaps [rsp+32+32], xmm2
+ movaps [rsp+48+32], xmm3
; JIT callee
- ; Pass prev frame and return address
+ ; Pass prev frame and return address.
mov rcx, rbp
mov rdx, qword ptr [rbp+8]
call X86CompilationCallback2
- ; Restore all XMM arg registers
- movaps xmm3, [rsp+48]
- movaps xmm2, [rsp+32]
- movaps xmm1, [rsp+16]
- movaps xmm0, [rsp]
+ ; Restore all XMM arg registers.
+ movaps xmm3, [rsp+48+32]
+ movaps xmm2, [rsp+32+32]
+ movaps xmm1, [rsp+16+32]
+ movaps xmm0, [rsp +32]
- ; Restore RSP
+ ; Restore RSP.
mov rsp, rbp
- ; Restore all int arg registers
- sub rsp, 32
- pop r9
- pop r8
- pop rdx
- pop rcx
-
- ; Restore RBP
+ ; Restore RBP.
pop rbp
+
+ ; Restore all int arg registers.
+ mov r9, [rsp+32]
+ mov r8, [rsp+24]
+ mov rdx, [rsp+16]
+ mov rcx, [rsp+ 8]
+
ret
X86CompilationCallback endp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 36ea94b71e..b151349688 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1228,7 +1228,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
MVT::v2i64, InFlag).getValue(1);
Val = Chain.getValue(0);
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
- Val, DAG.getConstant(0, MVT::i64));
+ Val, DAG.getConstant(0, MVT::i64));
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
MVT::i64, InFlag).getValue(1);
@@ -1628,8 +1628,9 @@ X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
const CCValAssign &VA,
SDValue Chain,
SDValue Arg, ISD::ArgFlagsTy Flags) {
+ const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
DebugLoc dl = TheCall->getDebugLoc();
- unsigned LocMemOffset = VA.getLocMemOffset();
+ unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal()) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 5935745777..c8712d4a48 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -128,13 +128,37 @@ let isCall = 1 in
def CALL64pcrel32 : Ii32<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call\t$dst", []>,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, NotWin64]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
- "call\t{*}$dst", [(X86call GR64:$dst)]>;
+ "call\t{*}$dst", [(X86call GR64:$dst)]>,
+ Requires<[NotWin64]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
- "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
+ "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+ Requires<[NotWin64]>;
}
+ // FIXME: We need to teach codegen about single list of call-clobbered registers.
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+ Uses = [RSP] in {
+ def WINCALL64pcrel32 : I<0xE8, RawFrm,
+ (outs), (ins i64i32imm:$dst, variable_ops),
+ "call\t${dst:call}", [(X86call imm:$dst)]>,
+ Requires<[IsWin64]>;
+ def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call\t{*}$dst",
+ [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+ def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call\t{*}$dst",
+ [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>;
+ }
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -1495,9 +1519,14 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
// Direct PC relative function call for small code model. 32-bit displacement
// sign extended to 64-bit.
def : Pat<(X86call (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>;
+ (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
def : Pat<(X86call (i64 texternalsym:$dst)),
- (CALL64pcrel32 texternalsym:$dst)>;
+ (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
(CALL64pcrel32 tglobaladdr:$dst)>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 02ddf2583e..7fb78f57b9 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -254,6 +254,8 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 3fa53733c0..5817bf0ac6 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -484,15 +484,18 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
int
X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
- int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
- uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+ const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
+ uint64_t StackSize = MFI->getStackSize();
if (needsStackRealignment(MF)) {
if (FI < 0)
// Skip the saved EBP
Offset += SlotSize;
else {
- unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
+ unsigned Align = MFI->getObjectAlignment(FI);
assert( (-(Offset + StackSize)) % Align == 0);
Align = 0;
return Offset + StackSize;
@@ -622,14 +625,14 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
void
X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
- MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
- calculateMaxStackAlignment(FFI));
+ unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
+ calculateMaxStackAlignment(MFI));
- FFI->setMaxAlignment(MaxAlign);
+ MFI->setMaxAlignment(MaxAlign);
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -643,18 +646,18 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// ...
// }
// [EBP]
- MF.getFrameInfo()->
- CreateFixedObject(-TailCallReturnAddrDelta,
- (-1*SlotSize)+TailCallReturnAddrDelta);
+ MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1*SlotSize)+TailCallReturnAddrDelta);
}
+
if (hasFP(MF)) {
assert((TailCallReturnAddrDelta <= 0) &&
"The Delta should always be zero or negative");
// Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
- (int)SlotSize * -2+
- TailCallReturnAddrDelta);
- assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ int FrameIdx = MFI->CreateFixedObject(SlotSize,
+ (int)SlotSize * -2+
+ TailCallReturnAddrDelta);
+ assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
}
@@ -887,6 +890,11 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
StackSize = std::max(MinSize,
StackSize > 128 ? StackSize - 128 : 0);
MFI->setStackSize(StackSize);
+ } else if (Subtarget->isTargetWin64()) {
+ // We need to always allocate 32 bytes as register spill area.
+ // FIXME: we might reuse these 32 bytes for leaf functions.
+ StackSize += 32;
+ MFI->setStackSize(StackSize);
}
// Insert stack pointer adjustment for later moving of return addr. Only
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 43340f5124..704c7d3a88 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -64,7 +64,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget(TT, FS, is64Bit),
DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
- Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
+ Subtarget.getStackAlignment(),
+ (Subtarget.isTargetWin64() ? -40 :
+ (Subtarget.is64Bit() ? -8 : -4))),
InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index c628b8affd..a96fcb2e83 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
+; RUN: llvm-as < %s | llc | grep "subq.*\\\$40, \\\%rsp"
target triple = "x86_64-mingw64"
define x86_fp80 @a(i64 %x) nounwind readnone {
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 33d797297b..7dfb65a23f 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
; RUN: llvm-as < %s | llc -o %t1 -f
-; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
-; RUN: grep "movaps \\\%xmm8, \\\(\\\%rsp\\\)" %t1
-; RUN: grep "movaps \\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
+; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
+; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
+; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
target triple = "x86_64-mingw64"
define i32 @a() nounwind {