summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARM.h3
-rw-r--r--lib/Target/ARM/ARMAtomicExpandPass.cpp397
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp113
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp750
-rw-r--r--lib/Target/ARM/ARMISelLowering.h23
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td203
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
8 files changed, 408 insertions, 1086 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index cd589f5901..4412b45e5b 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -27,6 +27,7 @@ class JITCodeEmitter;
class MachineInstr;
class MCInst;
class TargetLowering;
+class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -48,6 +49,8 @@ FunctionPass *createThumb2SizeReductionPass();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM);
+
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/Target/ARM/ARMAtomicExpandPass.cpp
new file mode 100644
index 0000000000..33cdda5d6e
--- /dev/null
+++ b/lib/Target/ARM/ARMAtomicExpandPass.cpp
@@ -0,0 +1,397 @@
+//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions with
+// appropriate (intrinsic-based) ldrex/strex loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-atomic-expand"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ class ARMAtomicExpandPass : public FunctionPass {
+ const TargetLowering *TLI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit ARMAtomicExpandPass(const TargetMachine *TM = 0)
+ : FunctionPass(ID), TLI(TM->getTargetLowering()) {}
+
+ bool runOnFunction(Function &F) override;
+ bool expandAtomicInsts(Function &F);
+
+ bool expandAtomicLoad(LoadInst *LI);
+ bool expandAtomicStore(StoreInst *LI);
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+
+ AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
+ void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
+
+ /// Perform a load-linked operation on Addr, returning a "Value *" with the
+ /// corresponding pointee type. This may entail some non-trivial operations
+ /// to truncate or reconstruct illegal types since intrinsics must be legal
+ Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord);
+
+ /// Perform a store-conditional operation to Addr. Return the status of the
+ /// store: 0 if the it succeeded, non-zero otherwise.
+ Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr,
+ AtomicOrdering Ord);
+
+ /// Return true if the given (atomic) instruction should be expanded by this
+ /// pass.
+ bool shouldExpandAtomic(Instruction *Inst);
+ };
+}
+
+char ARMAtomicExpandPass::ID = 0;
+
+FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) {
+ return new ARMAtomicExpandPass(TM);
+}
+
+bool ARMAtomicExpandPass::runOnFunction(Function &F) {
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (BasicBlock &BB : F)
+ for (Instruction &Inst : BB) {
+ if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
+ (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
+ (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
+ AtomicInsts.push_back(&Inst);
+ }
+
+ bool MadeChange = false;
+ for (Instruction *Inst : AtomicInsts) {
+ if (!shouldExpandAtomic(Inst))
+ continue;
+
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ MadeChange |= expandAtomicRMW(AI);
+ else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ MadeChange |= expandAtomicCmpXchg(CI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ MadeChange |= expandAtomicLoad(LI);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ MadeChange |= expandAtomicStore(SI);
+ else
+ llvm_unreachable("Unknown atomic instruction");
+ }
+
+ return MadeChange;
+}
+
+bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
+ // Load instructions don't actually need a leading fence, even in the
+ // SequentiallyConsistent case.
+ AtomicOrdering MemOpOrder =
+ TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+
+ // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
+ // an ldrexd (A3.5.3).
+ IRBuilder<> Builder(LI);
+ Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+
+ insertTrailingFence(Builder, LI->getOrdering());
+
+ LI->replaceAllUsesWith(Val);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
+ // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
+ // we need a loop and the entire instruction is essentially an "atomicrmw
+ // xchg" that ignores the value loaded.
+ IRBuilder<> Builder(SI);
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), SI->getOrdering());
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ return expandAtomicRMW(AI);
+}
+
+bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
+ AtomicOrdering Order = AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // fence?
+ // atomicrmw.start:
+ // %loaded = @load.linked(%addr)
+ // %new = some_op iN %loaded, %incr
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %atomicrmw.end
+ // atomicrmw.end:
+ // fence?
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
+
+ Value *NewVal;
+ switch (AI->getOperation()) {
+ case AtomicRMWInst::Xchg:
+ NewVal = AI->getValOperand();
+ break;
+ case AtomicRMWInst::Add:
+ NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Sub:
+ NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::And:
+ NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Nand:
+ NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
+ "new");
+ break;
+ case AtomicRMWInst::Or:
+ NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Xor:
+ NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+
+ Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ insertTrailingFence(Builder, Order);
+
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ AtomicOrdering Order = CI->getSuccessOrdering();
+ Value *Addr = CI->getPointerOperand();
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // fence?
+ // cmpxchg.start:
+ // %loaded = @load.linked(%addr)
+ // %should_store = icmp eq %loaded, %desired
+ // br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end
+ // cmpxchg.trystore:
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %cmpxchg.end
+ // cmpxchg.end:
+ // fence?
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ BasicBlock *TryStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+
+ // This grabs the DebugLoc from CI
+ IRBuilder<> Builder(CI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore =
+ Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB);
+
+ Builder.SetInsertPoint(TryStoreBB);
+ Value *StoreSuccess =
+ storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ // Finally, make sure later instructions don't get reordered with a fence if
+ // necessary.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ insertTrailingFence(Builder, Order);
+
+ CI->replaceAllUsesWith(Loaded);
+ CI->eraseFromParent();
+
+ return true;
+}
+
+Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall2(
+ Strex, Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr);
+}
+
+AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord) {
+ if (!TLI->getInsertFencesForAtomic())
+ return Ord;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ Builder.CreateFence(Release);
+
+ // The exclusive operations don't need any barrier if we're adding separate
+ // fences.
+ return Monotonic;
+}
+
+void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord) {
+ if (!TLI->getInsertFencesForAtomic())
+ return;
+
+ if (Ord == Acquire || Ord == AcquireRelease)
+ Builder.CreateFence(Acquire);
+ else if (Ord == SequentiallyConsistent)
+ Builder.CreateFence(SequentiallyConsistent);
+}
+
+bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) {
+ // Loads and stores less than 64-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong:
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getType()->getPrimitiveSizeInBits() == 64;
+
+ // For the real atomic operations, we have ldrex/strex up to 64 bits.
+ return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 970c63342c..70e11c50e6 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -252,8 +252,6 @@ private:
SDNode *SelectConcatVector(SDNode *N);
- SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
-
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
@@ -2411,38 +2409,6 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
-SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
- unsigned Op16,unsigned Op32,
- unsigned Op64) {
- // Mostly direct translation to the given operations, except that we preserve
- // the AtomicOrdering for use later on.
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
- EVT VT = AN->getMemoryVT();
-
- unsigned Op;
- SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
- if (VT == MVT::i8)
- Op = Op8;
- else if (VT == MVT::i16)
- Op = Op16;
- else if (VT == MVT::i32)
- Op = Op32;
- else if (VT == MVT::i64) {
- Op = Op64;
- VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
- } else
- llvm_unreachable("Unexpected atomic operation");
-
- SmallVector<SDValue, 6> Ops;
- for (unsigned i = 1; i < AN->getNumOperands(); ++i)
- Ops.push_back(AN->getOperand(i));
-
- Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
- Ops.push_back(AN->getOperand(0)); // Chain moves to the end
-
- return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
-}
-
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
@@ -3320,85 +3286,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
-
- case ISD::ATOMIC_LOAD:
- if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
- return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
- else
- break;
-
- case ISD::ATOMIC_LOAD_ADD:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_ADD_I8,
- ARM::ATOMIC_LOAD_ADD_I16,
- ARM::ATOMIC_LOAD_ADD_I32,
- ARM::ATOMIC_LOAD_ADD_I64);
- case ISD::ATOMIC_LOAD_SUB:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_SUB_I8,
- ARM::ATOMIC_LOAD_SUB_I16,
- ARM::ATOMIC_LOAD_SUB_I32,
- ARM::ATOMIC_LOAD_SUB_I64);
- case ISD::ATOMIC_LOAD_AND:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_AND_I8,
- ARM::ATOMIC_LOAD_AND_I16,
- ARM::ATOMIC_LOAD_AND_I32,
- ARM::ATOMIC_LOAD_AND_I64);
- case ISD::ATOMIC_LOAD_OR:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_OR_I8,
- ARM::ATOMIC_LOAD_OR_I16,
- ARM::ATOMIC_LOAD_OR_I32,
- ARM::ATOMIC_LOAD_OR_I64);
- case ISD::ATOMIC_LOAD_XOR:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_XOR_I8,
- ARM::ATOMIC_LOAD_XOR_I16,
- ARM::ATOMIC_LOAD_XOR_I32,
- ARM::ATOMIC_LOAD_XOR_I64);
- case ISD::ATOMIC_LOAD_NAND:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_NAND_I8,
- ARM::ATOMIC_LOAD_NAND_I16,
- ARM::ATOMIC_LOAD_NAND_I32,
- ARM::ATOMIC_LOAD_NAND_I64);
- case ISD::ATOMIC_LOAD_MIN:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_MIN_I8,
- ARM::ATOMIC_LOAD_MIN_I16,
- ARM::ATOMIC_LOAD_MIN_I32,
- ARM::ATOMIC_LOAD_MIN_I64);
- case ISD::ATOMIC_LOAD_MAX:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_MAX_I8,
- ARM::ATOMIC_LOAD_MAX_I16,
- ARM::ATOMIC_LOAD_MAX_I32,
- ARM::ATOMIC_LOAD_MAX_I64);
- case ISD::ATOMIC_LOAD_UMIN:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_UMIN_I8,
- ARM::ATOMIC_LOAD_UMIN_I16,
- ARM::ATOMIC_LOAD_UMIN_I32,
- ARM::ATOMIC_LOAD_UMIN_I64);
- case ISD::ATOMIC_LOAD_UMAX:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_UMAX_I8,
- ARM::ATOMIC_LOAD_UMAX_I16,
- ARM::ATOMIC_LOAD_UMAX_I32,
- ARM::ATOMIC_LOAD_UMAX_I64);
- case ISD::ATOMIC_SWAP:
- return SelectAtomic(N,
- ARM::ATOMIC_SWAP_I8,
- ARM::ATOMIC_SWAP_I16,
- ARM::ATOMIC_SWAP_I32,
- ARM::ATOMIC_SWAP_I64);
- case ISD::ATOMIC_CMP_SWAP:
- return SelectAtomic(N,
- ARM::ATOMIC_CMP_SWAP_I8,
- ARM::ATOMIC_CMP_SWAP_I16,
- ARM::ATOMIC_CMP_SWAP_I32,
- ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 737007aa4c..310d845db1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -737,28 +737,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
- // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
- // handled normally.
+ // ATOMIC_FENCE needs custom lowering; the others should have been expanded
+ // to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- // Custom lowering for 64-bit ops
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasV8Ops()) {
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
}
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
// If there's anything we can use as a barrier, go through custom lowering
// for ATOMIC_FENCE.
@@ -913,44 +901,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
-static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
- bool isThumb2, unsigned &LdrOpc,
- unsigned &StrOpc) {
- static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
- {ARM::LDREXH, ARM::t2LDREXH},
- {ARM::LDREX, ARM::t2LDREX},
- {ARM::LDREXD, ARM::t2LDREXD}};
- static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
- {ARM::LDAEXH, ARM::t2LDAEXH},
- {ARM::LDAEX, ARM::t2LDAEX},
- {ARM::LDAEXD, ARM::t2LDAEXD}};
- static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
- {ARM::STREXH, ARM::t2STREXH},
- {ARM::STREX, ARM::t2STREX},
- {ARM::STREXD, ARM::t2STREXD}};
- static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
- {ARM::STLEXH, ARM::t2STLEXH},
- {ARM::STLEX, ARM::t2STLEX},
- {ARM::STLEXD, ARM::t2STLEXD}};
-
- const unsigned (*LoadOps)[2], (*StoreOps)[2];
- if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- LoadOps = LoadAcqs;
- else
- LoadOps = LoadBares;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- StoreOps = StoreRels;
- else
- StoreOps = StoreBares;
-
- assert(isPowerOf2_32(Size) && Size <= 8 &&
- "unsupported size for atomic binary op!");
-
- LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
- StrOpc = StoreOps[Log2_32(Size)][isThumb2];
-}
-
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -6028,35 +5978,6 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-static void
-ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG) {
- SDLoc dl(Node);
- assert (Node->getValueType(0) == MVT::i64 &&
- "Only know how to expand i64 atomics");
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(0)); // Chain
- Ops.push_back(Node->getOperand(1)); // Ptr
- for(unsigned i=2; i<Node->getNumOperands(); i++) {
- // Low part
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(i), DAG.getIntPtrConstant(0)));
- // High part
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(i), DAG.getIntPtrConstant(1)));
- }
- SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result = DAG.getAtomic(
- Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
- cast<MemSDNode>(Node)->getMemOperand(), AN->getSuccessOrdering(),
- AN->getFailureOrdering(), AN->getSynchScope());
- SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
- Results.push_back(Result.getValue(2));
-}
-
static void ReplaceREADCYCLECOUNTER(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
@@ -6174,22 +6095,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
- case ISD::ATOMIC_STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG);
- return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -6199,531 +6104,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
// ARM Scheduler Hooks
//===----------------------------------------------------------------------===//
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned oldval = MI->getOperand(2).getReg();
- unsigned newval = MI->getOperand(3).getReg();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
-
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
- MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineFunction *MF = BB->getParent();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It; // insert the new blocks after the current block
-
- MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loop1MBB);
- MF->insert(It, loop2MBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // thisMBB:
- // ...
- // fallthrough --> loop1MBB
- BB->addSuccessor(loop1MBB);
-
- // loop1MBB:
- // ldrex dest, [ptr]
- // cmp dest, oldval
- // bne exitMBB
- BB = loop1MBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(dest).addReg(oldval));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(loop2MBB);
- BB->addSuccessor(exitMBB);
-
- // loop2MBB:
- // strex scratch, newval, [ptr]
- // cmp scratch, #0
- // bne loop1MBB
- BB = loop2MBB;
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrex dest, ptr
- // <binop> scratch2, dest, incr
- // strex scratch, scratch2, ptr
- // cmp scratch, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- if (BinOpcode) {
- // operand order needs to go the other way for NAND
- if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(incr).addReg(dest)).addReg(0);
- else
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(dest).addReg(incr)).addReg(0);
- }
-
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- bool signExtend,
- ARMCC::CondCodes Cond) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- unsigned oldval = dest;
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc, extendOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
- case 1:
- extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
- break;
- case 2:
- extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
- break;
- case 4:
- extendOpc = 0;
- break;
- }
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned scratch2 = MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrex dest, ptr
- // (sign extend dest, if required)
- // cmp dest, incr
- // cmov.cond scratch2, incr, dest
- // strex scratch, scratch2, ptr
- // cmp scratch, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
-
- // Sign extend the value, if necessary.
- if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
- : &ARM::GPRnopcRegClass);
- if (!isThumb2)
- MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
- .addReg(dest)
- .addImm(0));
- }
-
- // Build compare and cmov instructions.
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(oldval).addReg(incr));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
-
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Op1, unsigned Op2,
- bool NeedsCarry, bool IsCmpxchg,
- bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned destlo = MI->getOperand(0).getReg();
- unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
- AtomicOrdering Ord =
- static_cast<AtomicOrdering>(MI->getOperand(IsCmpxchg ? 7 : 5).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *contBB = 0, *cont2BB = 0;
- if (IsCmpxchg || IsMinMax)
- contBB = MF->CreateMachineBasicBlock(LLVM_BB);
- if (IsCmpxchg)
- cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-
- MF->insert(It, loopMBB);
- if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
- if (IsCmpxchg) MF->insert(It, cont2BB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned storesuccess = MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrexd r2, r3, ptr
- // <binopa> r0, r2, incr
- // <binopb> r1, r3, incr
- // strexd storesuccess, r0, r1, ptr
- // cmp storesuccess, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
-
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(GPRPair0, RegState::Define)
- .addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
- }
-
- unsigned StoreLo, StoreHi;
- if (IsCmpxchg) {
- // Add early exit
- for (unsigned i = 0; i < 2; i++) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
- ARM::CMPrr))
- .addReg(i == 0 ? destlo : desthi)
- .addReg(i == 0 ? vallo : valhi));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(i == 0 ? contBB : cont2BB);
- BB = (i == 0 ? contBB : cont2BB);
- }
-
- // Copy to physregs for strexd
- StoreLo = MI->getOperand(5).getReg();
- StoreHi = MI->getOperand(6).getReg();
- } else if (Op1) {
- // Perform binary operation
- unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
- .addReg(destlo).addReg(vallo))
- .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
- unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
- .addReg(desthi).addReg(valhi))
- .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
-
- StoreLo = tmpRegLo;
- StoreHi = tmpRegHi;
- } else {
- // Copy to physregs for strexd
- StoreLo = vallo;
- StoreHi = valhi;
- }
- if (IsMinMax) {
- // Compare and branch to exit block.
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(contBB);
- BB = contBB;
- StoreLo = vallo;
- StoreHi = valhi;
- }
-
- // Store
- if (isThumb2) {
- MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
- } else {
- // Marshal a pair...
- unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(StoreLo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
- .addReg(r1)
- .addReg(StoreHi)
- .addImm(ARM::gsub_1);
-
- // ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(StorePair).addReg(ptr));
- }
- // Cmp+jump
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(storesuccess).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
-
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- unsigned destlo = MI->getOperand(0).getReg();
- unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- }
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
-
- if (isThumb2) {
- MIB.addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr);
-
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
-
- // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
- }
- AddDefaultPred(MIB);
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -7654,130 +7034,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent();
return BB;
}
- case ARM::ATOMIC_LOAD_ADD_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
-
- case ARM::ATOMIC_LOAD_AND_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
-
- case ARM::ATOMIC_LOAD_OR_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
-
- case ARM::ATOMIC_LOAD_XOR_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
-
- case ARM::ATOMIC_LOAD_NAND_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
-
- case ARM::ATOMIC_LOAD_SUB_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
-
- case ARM::ATOMIC_LOAD_MIN_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MIN_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MIN_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
-
- case ARM::ATOMIC_LOAD_MAX_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
- case ARM::ATOMIC_LOAD_MAX_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
- case ARM::ATOMIC_LOAD_MAX_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
-
- case ARM::ATOMIC_LOAD_UMIN_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMIN_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMIN_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
-
- case ARM::ATOMIC_LOAD_UMAX_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
- case ARM::ATOMIC_LOAD_UMAX_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
- case ARM::ATOMIC_LOAD_UMAX_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
-
- case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
- case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
- case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
-
- case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
- case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
- case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
-
- case ARM::ATOMIC_LOAD_I64:
- return EmitAtomicLoad64(MI, BB);
-
- case ARM::ATOMIC_LOAD_ADD_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
- isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
- /*NeedsCarry*/ true);
- case ARM::ATOMIC_LOAD_SUB_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true);
- case ARM::ATOMIC_LOAD_OR_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
- isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_XOR_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
- isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_AND_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
- isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_SWAP_I64:
- return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMIC_CMP_SWAP_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMIC_LOAD_MIN_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MAX_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMIC_LOAD_UMIN_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMAX_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::HS);
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 022945f625..f33e6db9d7 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -548,29 +548,6 @@ namespace llvm {
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
- MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const;
- MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- unsigned BinOpcode) const;
- MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Op1,
- unsigned Op2,
- bool NeedsCarry = false,
- bool IsCmpxchg = false,
- bool IsMinMax = false,
- ARMCC::CondCodes CC = ARMCC::AL) const;
- MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- bool signExtend,
- ARMCC::CondCodes Cond) const;
- MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index dfcc11edcd..1f09c9f07d 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -4336,209 +4336,6 @@ let usesCustomInserter = 1, Defs = [CPSR] in {
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-
-// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
-// (64-bit pseudos use a hand-written selection code).
- let mayLoad = 1, mayStore = 1 in {
- def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_ADD_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
- GPR:$set1, GPR:$set2, i32imm:$ordering),
- NoItinerary, []>;
- }
- let mayLoad = 1 in
- def ATOMIC_LOAD_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, i32imm:$ordering),
- NoItinerary, []>;
}
let usesCustomInserter = 1 in {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6a6d7ed01c..4ae539a1bc 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -226,6 +226,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool ARMPassConfig::addPreISel() {
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
+ addPass(createARMAtomicExpandPass(TM));
+
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9b5fa75fe2..8e14883913 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,6 +17,7 @@ add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
A15SDOptimizer.cpp
ARMAsmPrinter.cpp
+ ARMAtomicExpandPass.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp