diff options
author | Tim Northover <tnorthover@apple.com> | 2014-04-03 11:44:58 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2014-04-03 11:44:58 +0000 |
commit | badb1377291e99cea122b64ee62fa0382e9ee737 (patch) | |
tree | fb4307171ce495484cea427864e5dc163d159b32 /lib/Target/ARM/ARMISelDAGToDAG.cpp | |
parent | 37e5cfa4aae0dd693ab0c35ff78d37f5ddfe177d (diff) | |
download | llvm-badb1377291e99cea122b64ee62fa0382e9ee737.tar.gz llvm-badb1377291e99cea122b64ee62fa0382e9ee737.tar.bz2 llvm-badb1377291e99cea122b64ee62fa0382e9ee737.tar.xz |
ARM: expand atomic ldrex/strex loops in IR
The previous situation where ATOMIC_LOAD_WHATEVER nodes were expanded
at MachineInstr emission time had grown to be extremely large and
involved, to account for the subtly different code needed for the
various flavours (8/16/32/64 bit, cmpxchg/add/minmax).
Moving this transformation into the IR clears up the code
substantially, and makes future optimisations much easier:
1. an atomicrmw followed by using the *new* value can be more
efficient. As an IR pass, simple CSE could handle this
efficiently.
2. Making use of cmpxchg success/failure orderings only has to be done
in one (simpler) place.
3. The common "cmpxchg; did we store?" idiom can be exposed to
optimisation.
I intend to gradually improve this situation within the ARM backend
and make sure there are no hidden issues before moving the code out
into CodeGen to be shared with (at least ARM64/AArch64, though I think
PPC & Mips could benefit too).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205525 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM/ARMISelDAGToDAG.cpp')
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 113 |
1 files changed, 0 insertions, 113 deletions
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 970c63342c..70e11c50e6 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -252,8 +252,6 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, @@ -2411,38 +2409,6 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16,unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast<AtomicSDNode>(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) { - Op = Op64; - VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); - } else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector<SDValue, 6> Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); -} - SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); @@ -3320,85 +3286,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - - case ISD::ATOMIC_LOAD: - if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) - return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); - else - break; - - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_ADD_I8, - ARM::ATOMIC_LOAD_ADD_I16, - ARM::ATOMIC_LOAD_ADD_I32, - ARM::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_SUB_I8, - ARM::ATOMIC_LOAD_SUB_I16, - ARM::ATOMIC_LOAD_SUB_I32, - ARM::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_AND_I8, - ARM::ATOMIC_LOAD_AND_I16, - ARM::ATOMIC_LOAD_AND_I32, - ARM::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_OR_I8, - ARM::ATOMIC_LOAD_OR_I16, - ARM::ATOMIC_LOAD_OR_I32, - ARM::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_XOR_I8, - ARM::ATOMIC_LOAD_XOR_I16, - ARM::ATOMIC_LOAD_XOR_I32, - ARM::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_NAND_I8, - ARM::ATOMIC_LOAD_NAND_I16, - ARM::ATOMIC_LOAD_NAND_I32, - ARM::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_MIN_I8, - ARM::ATOMIC_LOAD_MIN_I16, - ARM::ATOMIC_LOAD_MIN_I32, - ARM::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_MAX_I8, - ARM::ATOMIC_LOAD_MAX_I16, - ARM::ATOMIC_LOAD_MAX_I32, - ARM::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_UMIN_I8, - ARM::ATOMIC_LOAD_UMIN_I16, - ARM::ATOMIC_LOAD_UMIN_I32, - ARM::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_UMAX_I8, - ARM::ATOMIC_LOAD_UMAX_I16, - ARM::ATOMIC_LOAD_UMAX_I32, - ARM::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(N, - ARM::ATOMIC_SWAP_I8, - ARM::ATOMIC_SWAP_I16, - ARM::ATOMIC_SWAP_I32, - ARM::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(N, - ARM::ATOMIC_CMP_SWAP_I8, - ARM::ATOMIC_CMP_SWAP_I16, - ARM::ATOMIC_CMP_SWAP_I32, - ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); |