summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael Espindola <rafael.espindola@gmail.com>2012-01-11 19:00:37 +0000
committerRafael Espindola <rafael.espindola@gmail.com>2012-01-11 19:00:37 +0000
commit2028b793e1fd1a8dd4d99b0b7c9972865d5e806a (patch)
tree7ef8a689686717facb1837bc11b510c2ee1f9f79
parent7692ce9e810ed1707da46faf20c84f1ffd54bc55 (diff)
downloadllvm-2028b793e1fd1a8dd4d99b0b7c9972865d5e806a.tar.gz
llvm-2028b793e1fd1a8dd4d99b0b7c9972865d5e806a.tar.bz2
llvm-2028b793e1fd1a8dd4d99b0b7c9972865d5e806a.tar.xz
Support segmented stacks on mac.
This uses TLS slot 90, which actually belongs to JavaScriptCore. We only support frames with static size Patch by Brian Anderson. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147960 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp83
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp3
-rw-r--r--test/CodeGen/X86/segmented-stacks.ll279
3 files changed, 273 insertions, 92 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 4386762c85..4cda76c0a4 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1298,10 +1298,15 @@ HasNestArgument(const MachineFunction *MF) {
return false;
}
+
+/// GetScratchRegister - Get a register for performing work in the segmented
+/// stack prologue. Depending on platform and the properties of the function
+/// either one or two registers will be needed. Set primary to true for
+/// the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
if (Is64Bit) {
- return X86::R11;
+ return Primary ? X86::R11 : X86::R12;
} else {
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
@@ -1313,13 +1318,13 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
"nested function.");
return -1;
} else {
- return X86::EAX;
+ return Primary ? X86::EAX : X86::ECX;
}
} else {
if (IsNested)
- return X86::EDX;
+ return Primary ? X86::EDX : X86::EAX;
else
- return X86::ECX;
+ return Primary ? X86::ECX : X86::EAX;
}
}
}
@@ -1339,14 +1344,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
DebugLoc DL;
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux())
- report_fatal_error("Segmented stacks supported only on linux.");
+ if (!ST->isTargetLinux() && !ST->isTargetDarwin())
+ report_fatal_error("Segmented stacks supported only on linux and darwin.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -1377,12 +1382,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// prologue.
StackSize = MFI->getStackSize();
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- TlsReg = X86::FS;
- TlsOffset = 0x70;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ }
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::RSP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
@@ -1392,16 +1406,55 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
TlsReg = X86::GS;
- TlsOffset = 0x30;
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::ESP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ if (ST->isTargetLinux()) {
+ TlsOffset = 0x30;
+
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else if (ST->isTargetDarwin()) {
+ TlsOffset = 0x48 + 90*4;
+
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ unsigned ScratchReg2;
+ bool SaveScratch2;
+ if (CompareStackPointer) {
+ // The primary scratch register is available for holding the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ SaveScratch2 = false;
+ } else {
+ // Need to use a second register to hold the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+ // Unfortunately, with fastcc the second scratch register may hold an arg
+ SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+ }
+
+ // If Scratch2 is live-in then it needs to be saved
+ assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+ "Scratch register is live-in and not saved");
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+ .addReg(ScratchReg2, RegState::Kill);
+
+ BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+ .addImm(TlsOffset);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2).addImm(1).addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+ }
}
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index d73a3dd7f3..b8002d57eb 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -102,9 +102,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType = FloatABI::Hard;
-
- if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF())
- report_fatal_error("Segmented stacks are only implemented on ELF.");
}
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 3ba18cffcc..6e91d00ac6 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
; We used to crash with filetype=obj
; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)
@@ -13,25 +17,46 @@ define void @test_basic() {
call void @dummy_use (i32* %mem, i32 10)
ret void
-; X32: test_basic:
+; X32-Linux: test_basic:
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB0_2
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB0_2
-; X32: pushl $0
-; X32-NEXT: pushl $60
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $60
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
-; X64: test_basic:
+; X64-Linux: test_basic:
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB0_2
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB0_2
-; X64: movabsq $40, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Linux: movabsq $40, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: test_basic:
+
+; X32-Darwin: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja LBB0_2
+
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin: test_basic:
+
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB0_2
+
+; X64-Darwin: movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
@@ -40,23 +65,42 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
%result = add i32 %other, %addend
ret i32 %result
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB1_2
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB1_2
+
+; X32-Linux: pushl $4
+; X32-Linux-NEXT: pushl $0
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB1_2
+
+; X64-Linux: movq %r10, %rax
+; X64-Linux-NEXT: movabsq $0, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+; X64-Linux-NEXT: movq %rax, %r10
-; X32: pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja LBB1_2
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB1_2
+; X32-Darwin: pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64-NEXT: movq %rax, %r10
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB1_2
+
+; X64-Darwin: movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
}
@@ -65,23 +109,42 @@ define void @test_large() {
call void @dummy_use (i32* %mem, i32 0)
ret void
-; X32: leal -40012(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
-; X32-NEXT: ja .LBB2_2
+; X32-Linux: leal -40012(%esp), %ecx
+; X32-Linux-NEXT: cmpl %gs:48, %ecx
+; X32-Linux-NEXT: ja .LBB2_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $40012
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: leaq -40008(%rsp), %r11
+; X64-Linux-NEXT: cmpq %fs:112, %r11
+; X64-Linux-NEXT: ja .LBB2_2
+
+; X64-Linux: movabsq $40008, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja LBB2_2
-; X32: pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
-; X64-NEXT: ja .LBB2_2
+; X64-Darwin: leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja LBB2_2
-; X64: movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin: movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
@@ -90,25 +153,46 @@ define fastcc void @test_fastcc() {
call void @dummy_use (i32* %mem, i32 10)
ret void
-; X32: test_fastcc:
+; X32-Linux: test_fastcc:
+
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB3_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $60
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: test_fastcc:
+
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB3_2
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB3_2
+; X64-Linux: movabsq $40, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
-; X32: pushl $0
-; X32-NEXT: pushl $60
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: test_fastcc:
-; X64: test_fastcc:
+; X32-Darwin: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja LBB3_2
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB3_2
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movabsq $40, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin: test_fastcc:
+
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB3_2
+
+; X64-Darwin: movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
@@ -117,25 +201,72 @@ define fastcc void @test_fastcc_large() {
call void @dummy_use (i32* %mem, i32 0)
ret void
-; X32: test_fastcc_large:
+; X32-Linux: test_fastcc_large:
+
+; X32-Linux: leal -40012(%esp), %eax
+; X32-Linux-NEXT: cmpl %gs:48, %eax
+; X32-Linux-NEXT: ja .LBB4_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $40012
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: test_fastcc_large:
+
+; X64-Linux: leaq -40008(%rsp), %r11
+; X64-Linux-NEXT: cmpq %fs:112, %r11
+; X64-Linux-NEXT: ja .LBB4_2
+
+; X64-Linux: movabsq $40008, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: test_fastcc_large:
+
+; X32-Darwin: leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja LBB4_2
+
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin: test_fastcc_large:
+
+; X64-Darwin: leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja LBB4_2
+
+; X64-Darwin: movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+ %mem = alloca i32, i32 10000
+ call void @dummy_use (i32* %mem, i32 %a)
+ ret void
-; X32: leal -40012(%esp), %eax
-; X32-NEXT: cmpl %gs:48, %eax
-; X32-NEXT: ja .LBB4_2
+; This is testing that the Mac implementation preserves ecx
-; X32: pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: test_fastcc_large_with_ecx_arg:
-; X64: test_fastcc_large:
+; X32-Darwin: leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja LBB5_2
-; X64: leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
-; X64-NEXT: ja .LBB4_2
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
}