summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSaleem Abdulrasool <compnerd@compnerd.org>2014-06-09 20:18:42 +0000
committerSaleem Abdulrasool <compnerd@compnerd.org>2014-06-09 20:18:42 +0000
commitc90ddb186967525c41456565152d7cd898616bda (patch)
tree568c491d221518a3d71bd81bc426d179c2058f9b
parent8a0c2d17f5a26bde5e080abd88587aba76b6a325 (diff)
downloadllvm-c90ddb186967525c41456565152d7cd898616bda.tar.gz
llvm-c90ddb186967525c41456565152d7cd898616bda.tar.bz2
llvm-c90ddb186967525c41456565152d7cd898616bda.tar.xz
ARM: add VLA extension for WoA Itanium ABI
The armv7-windows-itanium environment is nearly identical to the MSVC ABI. It has a few divergences, mostly revolving around the use of the Itanium ABI for C++. VLA support is one of the extensions that are amongst the set of the extensions. This adds support for proper VLA emission for this environment. This is somewhat similar to the handling for __chkstk emission on X86 and the large stack frame emission for ARM. The invocation style for chkstk is still controlled via the -mcmodel flag to clang. Make an explicit note that this is an extension. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210489 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/Extensions.rst14
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp107
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td13
-rw-r--r--test/CodeGen/ARM/Windows/vla.ll31
5 files changed, 170 insertions, 1 deletions
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
index cd489c0e54..271c08598b 100644
--- a/docs/Extensions.rst
+++ b/docs/Extensions.rst
@@ -195,3 +195,17 @@ range via a slight deviation. It will generate an indirect jump as follows:
blx r12
sub.w sp, sp, r4
+Variable Length Arrays
+^^^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2012) does not permit the
+emission of Variable Length Arrays (VLAs).
+
+The Windows ARM Itanium ABI extends the base ABI by adding support for emitting
+a dynamic stack allocation. When emitting a variable stack allocation, a call
+to ``__chkstk`` is emitted unconditionally to ensure that guard pages are setup
+properly. The emission of this stack probe emission is handled similar to the
+standard stack probe emission.
+
+The MSVC environment does not emit code for VLAs currently.
+
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index dbcd423431..64270dade2 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setExceptionSelectorRegister(ARM::R1);
}
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
@@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
@@ -6214,6 +6220,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ llvm_unreachable("Don't know how to custom lower this!");
}
}
@@ -7113,6 +7123,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetMachine &TM = getTargetMachine();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(Subtarget->isTargetWindows() &&
+ "__chkstk is only supported on Windows");
+ assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+ // __chkstk takes the number of words to allocate on the stack in R4, and
+ // returns the stack adjustment in number of bytes in R4. This will not
+ // clober any other registers (other than the obvious lr).
+ //
+ // Although, technically, IP should be considered a register which may be
+ // clobbered, the call itself will not touch it. Windows on ARM is a pure
+ // thumb-2 environment, so there is no interworking required. As a result, we
+ // do not expect a veneer to be emitted by the linker, clobbering IP.
+ //
+ // Each module recieves its own copy of __chkstk, so no import thunk is
+ // required, again, ensuring that IP is not clobbered.
+ //
+ // Finally, although some linkers may theoretically provide a trampoline for
+ // out of range calls (which is quite common due to a 32M range limitation of
+ // branches for Thumb), we can generate the long-call version via
+ // -mcmodel=large, alleviating the need for the trampoline which may clobber
+ // IP.
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault: {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+ .addExternalSymbol("__chkstk");
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ }
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -7361,6 +7438,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
+ case ARM::WIN__CHKSTK:
+ return EmitLowered__chkstk(MI, BB);
}
}
@@ -10481,6 +10560,32 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
return CallInfo.first;
}
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "unsupported target platform");
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Flag;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+ SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+ Chain = NewSP.getValue(1);
+
+ SDValue Ops[2] = { NewSP, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index c15305c5e6..1ace0f330f 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -95,6 +95,8 @@ namespace llvm {
PRELOAD, // Preload
+ WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
@@ -470,6 +472,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -578,6 +581,9 @@ namespace llvm {
MachineBasicBlock *EmitStructByval(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 718d5da9d0..b44a19b01c 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let Inst{11-0} = a;
}
+// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
+// are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+ def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
diff --git a/test/CodeGen/ARM/Windows/vla.ll b/test/CodeGen/ARM/Windows/vla.ll
new file mode 100644
index 0000000000..56901dee0d
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/vla.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-SMALL-CODE
+; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -code-model=large -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-LARGE-CODE
+; RUN: llc -mtriple=thumbv7-windows-msvc -mcpu=cortex-a9 -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-MSVC
+
+define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) {
+entry:
+ %vla = alloca i8, i32 %sz, align 1
+ %arrayidx = getelementptr inbounds i8* %vla, i32 %idx
+ %0 = load volatile i8* %arrayidx, align 1
+ ret i8 %0
+}
+
+; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7
+; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #7
+; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2
+; CHECK-SMALL-CODE: bl __chkstk
+; CHECK-SMALL-CODE: sub.w sp, sp, r4
+
+; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7
+; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #7
+; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2
+; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk
+; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk
+; CHECK-LARGE-CODE: blx [[IP]]
+; CHECK-LARGE-CODE: sub.w sp, sp, r4
+
+; CHECK-MSVC-NOT: __chkstk
+