summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARM.h3
-rw-r--r--lib/Target/ARM/ARMAtomicExpandPass.cpp397
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp113
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp750
-rw-r--r--lib/Target/ARM/ARMISelLowering.h23
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td203
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--test/CodeGen/ARM/atomic-64bit.ll155
-rw-r--r--test/CodeGen/ARM/atomic-ops-v8.ll420
-rw-r--r--test/CodeGen/ARM/atomicrmw_minmax.ll2
11 files changed, 752 insertions, 1319 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index cd589f5901..4412b45e5b 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -27,6 +27,7 @@ class JITCodeEmitter;
class MachineInstr;
class MCInst;
class TargetLowering;
+class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -48,6 +49,8 @@ FunctionPass *createThumb2SizeReductionPass();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
+FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM);
+
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/Target/ARM/ARMAtomicExpandPass.cpp
new file mode 100644
index 0000000000..33cdda5d6e
--- /dev/null
+++ b/lib/Target/ARM/ARMAtomicExpandPass.cpp
@@ -0,0 +1,397 @@
+//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions with
+// appropriate (intrinsic-based) ldrex/strex loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-atomic-expand"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+namespace {
+ class ARMAtomicExpandPass : public FunctionPass {
+ const TargetLowering *TLI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit ARMAtomicExpandPass(const TargetMachine *TM = 0)
+ : FunctionPass(ID), TLI(TM->getTargetLowering()) {}
+
+ bool runOnFunction(Function &F) override;
+ bool expandAtomicInsts(Function &F);
+
+ bool expandAtomicLoad(LoadInst *LI);
+ bool expandAtomicStore(StoreInst *LI);
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+
+ AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
+ void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
+
+ /// Perform a load-linked operation on Addr, returning a "Value *" with the
+ /// corresponding pointee type. This may entail some non-trivial operations
+ /// to truncate or reconstruct illegal types since intrinsics must be legal
+ Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord);
+
+ /// Perform a store-conditional operation to Addr. Return the status of the
+ /// store: 0 if the it succeeded, non-zero otherwise.
+ Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr,
+ AtomicOrdering Ord);
+
+ /// Return true if the given (atomic) instruction should be expanded by this
+ /// pass.
+ bool shouldExpandAtomic(Instruction *Inst);
+ };
+}
+
+char ARMAtomicExpandPass::ID = 0;
+
+FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) {
+ return new ARMAtomicExpandPass(TM);
+}
+
+bool ARMAtomicExpandPass::runOnFunction(Function &F) {
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (BasicBlock &BB : F)
+ for (Instruction &Inst : BB) {
+ if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
+ (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
+ (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
+ AtomicInsts.push_back(&Inst);
+ }
+
+ bool MadeChange = false;
+ for (Instruction *Inst : AtomicInsts) {
+ if (!shouldExpandAtomic(Inst))
+ continue;
+
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ MadeChange |= expandAtomicRMW(AI);
+ else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ MadeChange |= expandAtomicCmpXchg(CI);
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ MadeChange |= expandAtomicLoad(LI);
+ else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ MadeChange |= expandAtomicStore(SI);
+ else
+ llvm_unreachable("Unknown atomic instruction");
+ }
+
+ return MadeChange;
+}
+
+bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
+ // Load instructions don't actually need a leading fence, even in the
+ // SequentiallyConsistent case.
+ AtomicOrdering MemOpOrder =
+ TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+
+ // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
+ // an ldrexd (A3.5.3).
+ IRBuilder<> Builder(LI);
+ Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+
+ insertTrailingFence(Builder, LI->getOrdering());
+
+ LI->replaceAllUsesWith(Val);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
+ // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
+ // we need a loop and the entire instruction is essentially an "atomicrmw
+ // xchg" that ignores the value loaded.
+ IRBuilder<> Builder(SI);
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), SI->getOrdering());
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ return expandAtomicRMW(AI);
+}
+
+bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
+ AtomicOrdering Order = AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // fence?
+ // atomicrmw.start:
+ // %loaded = @load.linked(%addr)
+ // %new = some_op iN %loaded, %incr
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %atomicrmw.end
+ // atomicrmw.end:
+ // fence?
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
+
+ Value *NewVal;
+ switch (AI->getOperation()) {
+ case AtomicRMWInst::Xchg:
+ NewVal = AI->getValOperand();
+ break;
+ case AtomicRMWInst::Add:
+ NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Sub:
+ NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::And:
+ NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Nand:
+ NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
+ "new");
+ break;
+ case AtomicRMWInst::Or:
+ NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Xor:
+ NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
+ NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
+ break;
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+
+ Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ insertTrailingFence(Builder, Order);
+
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ AtomicOrdering Order = CI->getSuccessOrdering();
+ Value *Addr = CI->getPointerOperand();
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // fence?
+ // cmpxchg.start:
+ // %loaded = @load.linked(%addr)
+ // %should_store = icmp eq %loaded, %desired
+ // br i1 %should_store, label %cmpxchg.trystore, label %cmpxchg.end
+ // cmpxchg.trystore:
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %cmpxchg.end
+ // cmpxchg.end:
+ // fence?
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ BasicBlock *TryStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+
+ // This grabs the DebugLoc from CI
+ IRBuilder<> Builder(CI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore =
+ Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, ExitBB);
+
+ Builder.SetInsertPoint(TryStoreBB);
+ Value *StoreSuccess =
+ storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ // Finally, make sure later instructions don't get reordered with a fence if
+ // necessary.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ insertTrailingFence(Builder, Order);
+
+ CI->replaceAllUsesWith(Loaded);
+ CI->eraseFromParent();
+
+ return true;
+}
+
+Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall2(
+ Strex, Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr);
+}
+
+AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord) {
+ if (!TLI->getInsertFencesForAtomic())
+ return Ord;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ Builder.CreateFence(Release);
+
+ // The exclusive operations don't need any barrier if we're adding separate
+ // fences.
+ return Monotonic;
+}
+
+void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
+ AtomicOrdering Ord) {
+ if (!TLI->getInsertFencesForAtomic())
+ return;
+
+ if (Ord == Acquire || Ord == AcquireRelease)
+ Builder.CreateFence(Acquire);
+ else if (Ord == SequentiallyConsistent)
+ Builder.CreateFence(SequentiallyConsistent);
+}
+
+bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) {
+ // Loads and stores less than 64-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong:
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getType()->getPrimitiveSizeInBits() == 64;
+
+ // For the real atomic operations, we have ldrex/strex up to 64 bits.
+ return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 970c63342c..70e11c50e6 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -252,8 +252,6 @@ private:
SDNode *SelectConcatVector(SDNode *N);
- SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
-
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
@@ -2411,38 +2409,6 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
-SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
- unsigned Op16,unsigned Op32,
- unsigned Op64) {
- // Mostly direct translation to the given operations, except that we preserve
- // the AtomicOrdering for use later on.
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
- EVT VT = AN->getMemoryVT();
-
- unsigned Op;
- SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
- if (VT == MVT::i8)
- Op = Op8;
- else if (VT == MVT::i16)
- Op = Op16;
- else if (VT == MVT::i32)
- Op = Op32;
- else if (VT == MVT::i64) {
- Op = Op64;
- VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
- } else
- llvm_unreachable("Unexpected atomic operation");
-
- SmallVector<SDValue, 6> Ops;
- for (unsigned i = 1; i < AN->getNumOperands(); ++i)
- Ops.push_back(AN->getOperand(i));
-
- Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
- Ops.push_back(AN->getOperand(0)); // Chain moves to the end
-
- return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
-}
-
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
@@ -3320,85 +3286,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
-
- case ISD::ATOMIC_LOAD:
- if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
- return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
- else
- break;
-
- case ISD::ATOMIC_LOAD_ADD:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_ADD_I8,
- ARM::ATOMIC_LOAD_ADD_I16,
- ARM::ATOMIC_LOAD_ADD_I32,
- ARM::ATOMIC_LOAD_ADD_I64);
- case ISD::ATOMIC_LOAD_SUB:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_SUB_I8,
- ARM::ATOMIC_LOAD_SUB_I16,
- ARM::ATOMIC_LOAD_SUB_I32,
- ARM::ATOMIC_LOAD_SUB_I64);
- case ISD::ATOMIC_LOAD_AND:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_AND_I8,
- ARM::ATOMIC_LOAD_AND_I16,
- ARM::ATOMIC_LOAD_AND_I32,
- ARM::ATOMIC_LOAD_AND_I64);
- case ISD::ATOMIC_LOAD_OR:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_OR_I8,
- ARM::ATOMIC_LOAD_OR_I16,
- ARM::ATOMIC_LOAD_OR_I32,
- ARM::ATOMIC_LOAD_OR_I64);
- case ISD::ATOMIC_LOAD_XOR:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_XOR_I8,
- ARM::ATOMIC_LOAD_XOR_I16,
- ARM::ATOMIC_LOAD_XOR_I32,
- ARM::ATOMIC_LOAD_XOR_I64);
- case ISD::ATOMIC_LOAD_NAND:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_NAND_I8,
- ARM::ATOMIC_LOAD_NAND_I16,
- ARM::ATOMIC_LOAD_NAND_I32,
- ARM::ATOMIC_LOAD_NAND_I64);
- case ISD::ATOMIC_LOAD_MIN:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_MIN_I8,
- ARM::ATOMIC_LOAD_MIN_I16,
- ARM::ATOMIC_LOAD_MIN_I32,
- ARM::ATOMIC_LOAD_MIN_I64);
- case ISD::ATOMIC_LOAD_MAX:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_MAX_I8,
- ARM::ATOMIC_LOAD_MAX_I16,
- ARM::ATOMIC_LOAD_MAX_I32,
- ARM::ATOMIC_LOAD_MAX_I64);
- case ISD::ATOMIC_LOAD_UMIN:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_UMIN_I8,
- ARM::ATOMIC_LOAD_UMIN_I16,
- ARM::ATOMIC_LOAD_UMIN_I32,
- ARM::ATOMIC_LOAD_UMIN_I64);
- case ISD::ATOMIC_LOAD_UMAX:
- return SelectAtomic(N,
- ARM::ATOMIC_LOAD_UMAX_I8,
- ARM::ATOMIC_LOAD_UMAX_I16,
- ARM::ATOMIC_LOAD_UMAX_I32,
- ARM::ATOMIC_LOAD_UMAX_I64);
- case ISD::ATOMIC_SWAP:
- return SelectAtomic(N,
- ARM::ATOMIC_SWAP_I8,
- ARM::ATOMIC_SWAP_I16,
- ARM::ATOMIC_SWAP_I32,
- ARM::ATOMIC_SWAP_I64);
- case ISD::ATOMIC_CMP_SWAP:
- return SelectAtomic(N,
- ARM::ATOMIC_CMP_SWAP_I8,
- ARM::ATOMIC_CMP_SWAP_I16,
- ARM::ATOMIC_CMP_SWAP_I32,
- ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 737007aa4c..310d845db1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -737,28 +737,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
- // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
- // handled normally.
+ // ATOMIC_FENCE needs custom lowering; the others should have been expanded
+ // to ldrex/strex loops already.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- // Custom lowering for 64-bit ops
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
// On v8, we have particularly efficient implementations of atomic fences
// if they can be combined with nearby atomic loads and stores.
if (!Subtarget->hasV8Ops()) {
// Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
setInsertFencesForAtomic(true);
}
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
// If there's anything we can use as a barrier, go through custom lowering
// for ATOMIC_FENCE.
@@ -913,44 +901,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
-static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
- bool isThumb2, unsigned &LdrOpc,
- unsigned &StrOpc) {
- static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
- {ARM::LDREXH, ARM::t2LDREXH},
- {ARM::LDREX, ARM::t2LDREX},
- {ARM::LDREXD, ARM::t2LDREXD}};
- static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
- {ARM::LDAEXH, ARM::t2LDAEXH},
- {ARM::LDAEX, ARM::t2LDAEX},
- {ARM::LDAEXD, ARM::t2LDAEXD}};
- static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
- {ARM::STREXH, ARM::t2STREXH},
- {ARM::STREX, ARM::t2STREX},
- {ARM::STREXD, ARM::t2STREXD}};
- static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
- {ARM::STLEXH, ARM::t2STLEXH},
- {ARM::STLEX, ARM::t2STLEX},
- {ARM::STLEXD, ARM::t2STLEXD}};
-
- const unsigned (*LoadOps)[2], (*StoreOps)[2];
- if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- LoadOps = LoadAcqs;
- else
- LoadOps = LoadBares;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- StoreOps = StoreRels;
- else
- StoreOps = StoreBares;
-
- assert(isPowerOf2_32(Size) && Size <= 8 &&
- "unsupported size for atomic binary op!");
-
- LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
- StrOpc = StoreOps[Log2_32(Size)][isThumb2];
-}
-
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -6028,35 +5978,6 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-static void
-ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG) {
- SDLoc dl(Node);
- assert (Node->getValueType(0) == MVT::i64 &&
- "Only know how to expand i64 atomics");
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(0)); // Chain
- Ops.push_back(Node->getOperand(1)); // Ptr
- for(unsigned i=2; i<Node->getNumOperands(); i++) {
- // Low part
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(i), DAG.getIntPtrConstant(0)));
- // High part
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(i), DAG.getIntPtrConstant(1)));
- }
- SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result = DAG.getAtomic(
- Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
- cast<MemSDNode>(Node)->getMemOperand(), AN->getSuccessOrdering(),
- AN->getFailureOrdering(), AN->getSynchScope());
- SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
- Results.push_back(Result.getValue(2));
-}
-
static void ReplaceREADCYCLECOUNTER(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
@@ -6174,22 +6095,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
- case ISD::ATOMIC_STORE:
- case ISD::ATOMIC_LOAD:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG);
- return;
}
if (Res.getNode())
Results.push_back(Res);
@@ -6199,531 +6104,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
// ARM Scheduler Hooks
//===----------------------------------------------------------------------===//
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned oldval = MI->getOperand(2).getReg();
- unsigned newval = MI->getOperand(3).getReg();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass);
-
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
- MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineFunction *MF = BB->getParent();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It; // insert the new blocks after the current block
-
- MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loop1MBB);
- MF->insert(It, loop2MBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // thisMBB:
- // ...
- // fallthrough --> loop1MBB
- BB->addSuccessor(loop1MBB);
-
- // loop1MBB:
- // ldrex dest, [ptr]
- // cmp dest, oldval
- // bne exitMBB
- BB = loop1MBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(dest).addReg(oldval));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(loop2MBB);
- BB->addSuccessor(exitMBB);
-
- // loop2MBB:
- // strex scratch, newval, [ptr]
- // cmp scratch, #0
- // bne loop1MBB
- BB = loop2MBB;
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrex dest, ptr
- // <binop> scratch2, dest, incr
- // strex scratch, scratch2, ptr
- // cmp scratch, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- if (BinOpcode) {
- // operand order needs to go the other way for NAND
- if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(incr).addReg(dest)).addReg(0);
- else
- AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
- addReg(dest).addReg(incr)).addReg(0);
- }
-
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- bool signExtend,
- ARMCC::CondCodes Cond) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- unsigned oldval = dest;
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc, extendOpc;
- getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
- case 1:
- extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
- break;
- case 2:
- extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
- break;
- case 4:
- extendOpc = 0;
- break;
- }
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::rGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned scratch2 = MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrex dest, ptr
- // (sign extend dest, if required)
- // cmp dest, incr
- // cmov.cond scratch2, incr, dest
- // strex scratch, scratch2, ptr
- // cmp scratch, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (ldrOpc == ARM::t2LDREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
-
- // Sign extend the value, if necessary.
- if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
- : &ARM::GPRnopcRegClass);
- if (!isThumb2)
- MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
- .addReg(dest)
- .addImm(0));
- }
-
- // Build compare and cmov instructions.
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(oldval).addReg(incr));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
- .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
-
- MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- if (strOpc == ARM::t2STREX)
- MIB.addImm(0);
- AddDefaultPred(MIB);
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(scratch).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Op1, unsigned Op2,
- bool NeedsCarry, bool IsCmpxchg,
- bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned destlo = MI->getOperand(0).getReg();
- unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
- AtomicOrdering Ord =
- static_cast<AtomicOrdering>(MI->getOperand(IsCmpxchg ? 7 : 5).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
- }
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *contBB = 0, *cont2BB = 0;
- if (IsCmpxchg || IsMinMax)
- contBB = MF->CreateMachineBasicBlock(LLVM_BB);
- if (IsCmpxchg)
- cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-
- MF->insert(It, loopMBB);
- if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
- if (IsCmpxchg) MF->insert(It, cont2BB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- unsigned storesuccess = MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldrexd r2, r3, ptr
- // <binopa> r0, r2, incr
- // <binopb> r1, r3, incr
- // strexd storesuccess, r0, r1, ptr
- // cmp storesuccess, #0
- // bne- loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
-
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(GPRPair0, RegState::Define)
- .addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
- }
-
- unsigned StoreLo, StoreHi;
- if (IsCmpxchg) {
- // Add early exit
- for (unsigned i = 0; i < 2; i++) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
- ARM::CMPrr))
- .addReg(i == 0 ? destlo : desthi)
- .addReg(i == 0 ? vallo : valhi));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(i == 0 ? contBB : cont2BB);
- BB = (i == 0 ? contBB : cont2BB);
- }
-
- // Copy to physregs for strexd
- StoreLo = MI->getOperand(5).getReg();
- StoreHi = MI->getOperand(6).getReg();
- } else if (Op1) {
- // Perform binary operation
- unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
- .addReg(destlo).addReg(vallo))
- .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
- unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
- .addReg(desthi).addReg(valhi))
- .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
-
- StoreLo = tmpRegLo;
- StoreHi = tmpRegHi;
- } else {
- // Copy to physregs for strexd
- StoreLo = vallo;
- StoreHi = valhi;
- }
- if (IsMinMax) {
- // Compare and branch to exit block.
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
- BB->addSuccessor(exitMBB);
- BB->addSuccessor(contBB);
- BB = contBB;
- StoreLo = vallo;
- StoreHi = valhi;
- }
-
- // Store
- if (isThumb2) {
- MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
- } else {
- // Marshal a pair...
- unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(StoreLo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
- .addReg(r1)
- .addReg(StoreHi)
- .addImm(ARM::gsub_1);
-
- // ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(StorePair).addReg(ptr));
- }
- // Cmp+jump
- AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(storesuccess).addImm(0));
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
-
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- unsigned destlo = MI->getOperand(0).getReg();
- unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- if (isThumb2) {
- MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
- MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
- MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
- }
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
-
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
-
- if (isThumb2) {
- MIB.addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr);
-
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
-
- // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
- }
- AddDefaultPred(MIB);
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -7654,130 +7034,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent();
return BB;
}
- case ARM::ATOMIC_LOAD_ADD_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
- case ARM::ATOMIC_LOAD_ADD_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
-
- case ARM::ATOMIC_LOAD_AND_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_LOAD_AND_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
-
- case ARM::ATOMIC_LOAD_OR_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_OR_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
-
- case ARM::ATOMIC_LOAD_XOR_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_XOR_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
-
- case ARM::ATOMIC_LOAD_NAND_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
- case ARM::ATOMIC_LOAD_NAND_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
-
- case ARM::ATOMIC_LOAD_SUB_I8:
- return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I16:
- return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
- case ARM::ATOMIC_LOAD_SUB_I32:
- return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
-
- case ARM::ATOMIC_LOAD_MIN_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MIN_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MIN_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
-
- case ARM::ATOMIC_LOAD_MAX_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
- case ARM::ATOMIC_LOAD_MAX_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
- case ARM::ATOMIC_LOAD_MAX_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
-
- case ARM::ATOMIC_LOAD_UMIN_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMIN_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMIN_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
-
- case ARM::ATOMIC_LOAD_UMAX_I8:
- return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
- case ARM::ATOMIC_LOAD_UMAX_I16:
- return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
- case ARM::ATOMIC_LOAD_UMAX_I32:
- return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
-
- case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
- case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
- case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
-
- case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
- case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
- case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
-
- case ARM::ATOMIC_LOAD_I64:
- return EmitAtomicLoad64(MI, BB);
-
- case ARM::ATOMIC_LOAD_ADD_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
- isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
- /*NeedsCarry*/ true);
- case ARM::ATOMIC_LOAD_SUB_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true);
- case ARM::ATOMIC_LOAD_OR_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
- isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMIC_LOAD_XOR_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
- isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMIC_LOAD_AND_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
- isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMIC_SWAP_I64:
- return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMIC_CMP_SWAP_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMIC_LOAD_MIN_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMIC_LOAD_MAX_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMIC_LOAD_UMIN_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMIC_LOAD_UMAX_I64:
- return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
- isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
- /*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::HS);
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 022945f625..f33e6db9d7 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -548,29 +548,6 @@ namespace llvm {
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
- MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const;
- MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- unsigned BinOpcode) const;
- MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Op1,
- unsigned Op2,
- bool NeedsCarry = false,
- bool IsCmpxchg = false,
- bool IsMinMax = false,
- ARMCC::CondCodes CC = ARMCC::AL) const;
- MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- bool signExtend,
- ARMCC::CondCodes Cond) const;
- MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index dfcc11edcd..1f09c9f07d 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -4336,209 +4336,6 @@ let usesCustomInserter = 1, Defs = [CPSR] in {
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-
-// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
-// (64-bit pseudos use a hand-written selection code).
- let mayLoad = 1, mayStore = 1 in {
- def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst),
- (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_ADD_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_SUB_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_AND_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_OR_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_XOR_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_NAND_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MIN_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_MAX_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_SWAP_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
- NoItinerary, []>;
- def ATOMIC_CMP_SWAP_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
- GPR:$set1, GPR:$set2, i32imm:$ordering),
- NoItinerary, []>;
- }
- let mayLoad = 1 in
- def ATOMIC_LOAD_I64 : PseudoInst<
- (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, i32imm:$ordering),
- NoItinerary, []>;
}
let usesCustomInserter = 1 in {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6a6d7ed01c..4ae539a1bc 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -226,6 +226,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool ARMPassConfig::addPreISel() {
+ const ARMSubtarget *Subtarget = &getARMSubtarget();
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
+ addPass(createARMAtomicExpandPass(TM));
+
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9b5fa75fe2..8e14883913 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,6 +17,7 @@ add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
A15SDOptimizer.cpp
ARMAsmPrinter.cpp
+ ARMAtomicExpandPass.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 7cf45ebde7..a881d5fd71 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -55,8 +55,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
; CHECK-LABEL: test3:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-DAG: and [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -65,8 +65,8 @@ define i64 @test3(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test3:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-DAG: and.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -80,8 +80,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
; CHECK-LABEL: test4:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-DAG: orr [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -90,8 +90,8 @@ define i64 @test4(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test4:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-DAG: orr.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -105,8 +105,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
; CHECK-LABEL: test5:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
-; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
+; CHECK-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]]
+; CHECK-DAG: eor [[REG4:(r[0-9]?[13579])]], [[REG2]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -115,8 +115,8 @@ define i64 @test5(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test5:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
-; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
+; CHECK-THUMB-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]]
+; CHECK-THUMB-DAG: eor.w [[REG4:[a-z0-9]+]], [[REG2]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -151,8 +151,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-LABEL: test7:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: cmp [[REG1]]
-; CHECK: cmpeq [[REG2]]
+; CHECK-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], r1
+; CHECK-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
+; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK: bne
; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
@@ -162,9 +163,9 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-THUMB-LABEL: test7:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: cmp [[REG1]]
-; CHECK-THUMB: it eq
-; CHECK-THUMB: cmpeq [[REG2]]
+; CHECK-THUMB-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
+; CHECK-THUMB-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
+; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-THUMB: bne
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
@@ -216,9 +217,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
; CHECK-LABEL: test10:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: blt
+; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp [[REG1]], r1
+; CHECK: movwls [[CARRY_LO]], #1
+; CHECK: cmp [[REG2]], r2
+; CHECK: movwle [[CARRY_HI]], #1
+; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp [[CARRY_HI]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
+; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -227,9 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test10:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: blt
+; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp [[REG1]], r2
+; CHECK-THUMB: movls.w [[CARRY_LO]], #1
+; CHECK-THUMB: cmp [[REG2]], r3
+; CHECK-THUMB: movle [[CARRY_HI]], #1
+; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp [[CARRY_HI]], #0
+; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -243,9 +262,18 @@ define i64 @test11(i64* %ptr, i64 %val) {
; CHECK-LABEL: test11:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: blo
+; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp [[REG1]], r1
+; CHECK: movwls [[CARRY_LO]], #1
+; CHECK: cmp [[REG2]], r2
+; CHECK: movwls [[CARRY_HI]], #1
+; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp [[CARRY_HI]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
+; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -255,9 +283,18 @@ define i64 @test11(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test11:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: blo
+; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp [[REG1]], r2
+; CHECK-THUMB: movls.w [[CARRY_LO]], #1
+; CHECK-THUMB: cmp [[REG2]], r3
+; CHECK-THUMB: movls [[CARRY_HI]], #1
+; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp [[CARRY_HI]], #0
+; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -271,9 +308,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
; CHECK-LABEL: test12:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: bge
+; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp [[REG1]], r1
+; CHECK: movwhi [[CARRY_LO]], #1
+; CHECK: cmp [[REG2]], r2
+; CHECK: movwgt [[CARRY_HI]], #1
+; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp [[CARRY_HI]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
+; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -282,9 +328,18 @@ define i64 @test12(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test12:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: bge
+; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp [[REG1]], r2
+; CHECK-THUMB: movhi.w [[CARRY_LO]], #1
+; CHECK-THUMB: cmp [[REG2]], r3
+; CHECK-THUMB: movgt [[CARRY_HI]], #1
+; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp [[CARRY_HI]], #0
+; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
@@ -298,9 +353,18 @@ define i64 @test13(i64* %ptr, i64 %val) {
; CHECK-LABEL: test13:
; CHECK: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
-; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
-; CHECK: bhs
+; CHECK: mov [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK: mov [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2
+; CHECK: cmp [[REG1]], r1
+; CHECK: movwhi [[CARRY_LO]], #1
+; CHECK: cmp [[REG2]], r2
+; CHECK: movwhi [[CARRY_HI]], #1
+; CHECK: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK: cmp [[CARRY_HI]], #0
+; CHECK: movne [[OUT_HI]], [[REG2]]
+; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1
+; CHECK: movne [[OUT_LO]], [[REG1]]
; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK: cmp
; CHECK: bne
@@ -309,9 +373,18 @@ define i64 @test13(i64* %ptr, i64 %val) {
; CHECK-THUMB-LABEL: test13:
; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]]
-; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]]
-; CHECK-THUMB: bhs
+; CHECK-THUMB: mov.w [[CARRY_LO:[a-z0-9]+]], #0
+; CHECK-THUMB: movs [[CARRY_HI:[a-z0-9]+]], #0
+; CHECK-THUMB: cmp [[REG1]], r2
+; CHECK-THUMB: movhi.w [[CARRY_LO]], #1
+; CHECK-THUMB: cmp [[REG2]], r3
+; CHECK-THUMB: movhi [[CARRY_HI]], #1
+; CHECK-THUMB: moveq [[CARRY_HI]], [[CARRY_LO]]
+; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3
+; CHECK-THUMB: cmp [[CARRY_HI]], #0
+; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2
+; CHECK-THUMB: movne [[OUT_HI]], [[REG2]]
+; CHECK-THUMB: movne [[OUT_LO]], [[REG1]]
; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
; CHECK-THUMB: cmp
; CHECK-THUMB: bne
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 87e76a48c5..1ca78bfd1e 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-THUMB
@var8 = global i8 0
@var16 = global i16 0
@@ -15,7 +15,7 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -38,7 +38,7 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -61,7 +61,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -75,7 +75,7 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+define void @test_atomic_load_add_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i64:
%old = atomicrmw add i64* @var64, i64 %offset monotonic
; CHECK-NOT: dmb
@@ -84,10 +84,10 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: adds{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
; CHECK-NEXT: adc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
@@ -95,9 +95,9 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
@@ -109,7 +109,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -132,7 +132,7 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -155,7 +155,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -169,7 +169,7 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+define void @test_atomic_load_sub_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i64:
%old = atomicrmw sub i64* @var64, i64 %offset seq_cst
; CHECK-NOT: dmb
@@ -178,10 +178,10 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: subs{{(\.w)?}} [[NEW1:r[0-9]+|lr]], r[[OLD1]], r0
; CHECK-NEXT: sbc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
@@ -189,9 +189,9 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
@@ -203,7 +203,7 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -226,7 +226,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -249,7 +249,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -263,7 +263,7 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+define void @test_atomic_load_and_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_and_i64:
%old = atomicrmw and i64* @var64, i64 %offset acquire
; CHECK-NOT: dmb
@@ -272,20 +272,20 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-DAG: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-DAG: and{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
@@ -297,7 +297,7 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -320,7 +320,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -343,7 +343,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -357,7 +357,7 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+define void @test_atomic_load_or_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_or_i64:
%old = atomicrmw or i64* @var64, i64 %offset release
; CHECK-NOT: dmb
@@ -366,20 +366,20 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-DAG: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-DAG: orr{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
@@ -391,7 +391,7 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -414,7 +414,7 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -437,7 +437,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
@@ -451,7 +451,7 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+define void @test_atomic_load_xor_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_xor_i64:
%old = atomicrmw xor i64* @var64, i64 %offset monotonic
; CHECK-NOT: dmb
@@ -460,20 +460,20 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-DAG: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-DAG: eor{{(\.w)?}} [[NEW2:r[0-9]+|lr]], r[[OLD2]], r1
+; CHECK: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK: strd r[[OLD1]], r[[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
@@ -485,7 +485,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
@@ -507,7 +507,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
@@ -529,7 +529,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
@@ -542,7 +542,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+define void @test_atomic_load_xchg_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_xchg_i64:
%old = atomicrmw xchg i64* @var64, i64 %offset acquire
; CHECK-NOT: dmb
@@ -551,7 +551,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
@@ -560,28 +560,28 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
-define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_min_i8(i8 signext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_min_i8:
%old = atomicrmw min i8* @var8, i8 %offset acquire
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-DAG: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK-DAG: movt [[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it ge
-; CHECK: movge r[[OLDX]], r0
-; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; Thumb mode: it le
+; CHECK: movle r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -591,23 +591,23 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
ret i8 %old
}
-define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_min_i16(i16 signext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_min_i16:
%old = atomicrmw min i16* @var16, i16 %offset release
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16
+; CHECK: movt [[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it ge
-; CHECK: movge r[[OLDX]], r0
-; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; Thumb mode: it le
+; CHECK: movle r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -626,13 +626,13 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lt
-; CHECK: movlt r[[NEW]], r[[OLD]]
+; Thumb mode: it le
+; CHECK: movle r[[NEW]], r[[OLD]]
; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -643,7 +643,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+define void @test_atomic_load_min_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_min_i64:
%old = atomicrmw min i64* @var64, i64 %offset seq_cst
; CHECK-NOT: dmb
@@ -652,41 +652,50 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: blt .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwls [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwle [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
-define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_max_i8(i8 signext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_max_i8:
%old = atomicrmw max i8* @var8, i8 %offset seq_cst
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK: movt [[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it le
-; CHECK: movle r[[OLDX]], r0
-; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; Thumb mode: it gt
+; CHECK: movgt r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -696,7 +705,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
ret i8 %old
}
-define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_max_i16(i16 signext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_max_i16:
%old = atomicrmw max i16* @var16, i16 %offset acquire
; CHECK-NOT: dmb
@@ -705,13 +714,13 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: cmp r[[OLDX]], r0
-; Thumb mode: it le
-; CHECK: movle r[[OLDX]], r0
+; Thumb mode: it gt
+; CHECK: movgt r[[OLDX]], r[[OLD]]
; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -731,7 +740,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
@@ -748,7 +757,7 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+define void @test_atomic_load_max_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_max_i64:
%old = atomicrmw max i64* @var64, i64 %offset monotonic
; CHECK-NOT: dmb
@@ -757,41 +766,50 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: bge .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwhi [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwgt [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
-define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_umin_i8(i8 zeroext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_umin_i8:
%old = atomicrmw umin i8* @var8, i8 %offset monotonic
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK: movt [[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
-; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lo
-; CHECK: movlo r[[NEW]], r[[OLD]]
-; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: uxtb r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ls
+; CHECK: movls r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -801,23 +819,23 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
ret i8 %old
}
-define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_umin_i16(i16 zeroext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_umin_i16:
%old = atomicrmw umin i16* @var16, i16 %offset acquire
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16
+; CHECK: movt [[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
-; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lo
-; CHECK: movlo r[[NEW]], r[[OLD]]
-; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: uxth r[[OLDX]], r[[OLD]]
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ls
+; CHECK: movls r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -836,13 +854,13 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
; CHECK-NEXT: cmp r[[OLD]], r0
-; Thumb mode: it lo
-; CHECK: movlo r[[NEW]], r[[OLD]]
+; Thumb mode: it ls
+; CHECK: movls r[[NEW]], r[[OLD]]
; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
@@ -853,50 +871,59 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_umin_i64:
- %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+ %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: blo .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwls [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwls [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
-define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+define i8 @test_atomic_load_umax_i8(i8 zeroext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_umax_i8:
%old = atomicrmw umax i8* @var8, i8 %offset acq_rel
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var8
+; CHECK: movt [[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: uxtb r[[OLDX:[0-9]+]], r[[OLD]]
+; CHECK-NEXT: cmp r[[OLDX]], r0
; Thumb mode: it hi
; CHECK: movhi r[[NEW]], r[[OLD]]
-; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -906,23 +933,23 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
ret i8 %old
}
-define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+define i16 @test_atomic_load_umax_i16(i16 zeroext %offset) nounwind {
; CHECK-LABEL: test_atomic_load_umax_i16:
%old = atomicrmw umax i16* @var16, i16 %offset monotonic
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var16
+; CHECK: movt [[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: uxth r[[OLDX:[0-9]+]], r[[OLD]]
+; CHECK-NEXT: cmp r[[OLDX]], r0
; Thumb mode: it hi
; CHECK: movhi r[[NEW]], r[[OLD]]
-; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -941,7 +968,7 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
@@ -958,34 +985,43 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
ret i32 %old
}
-define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_umax_i64:
- %old = atomicrmw umax i64* @var64, i64 %offset release
+ %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
-; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
-; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3
-; CHECK-NEXT: BB#2:
-; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: mov [[HICARRY:r[0-9]+|lr]], #0
+; CHECK-ARM: cmp [[OLD1]], r0
+; CHECK-ARM: movwhi [[LOCARRY]], #1
+; CHECK-ARM: cmp [[OLD2]], r1
+; CHECK-ARM: movwhi [[HICARRY]], #1
+; CHECK-ARM: moveq [[HICARRY]], [[LOCARRY]]
+; CHECK-ARM: cmp [[HICARRY]], #0
+; CHECK-ARM: mov [[MINHI:r[0-9]+]], r1
+; CHECK-ARM: movne [[MINHI]], [[OLD2]]
+; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0
+; CHECK-ARM: movne [[MINLO]], [[OLD1]]
+; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]]
+; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD1]]
-; CHECK-NEXT: mov r1, r[[OLD2]]
- ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
-define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8:
%old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
; CHECK-NOT: dmb
@@ -994,14 +1030,15 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var8
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: uxtb r[[OLDX:[0-9]+]], r[[OLD]]
+; CHECK-NEXT: cmp r[[OLDX]], r0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
-; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -1011,7 +1048,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
ret i8 %old
}
-define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i16:
%old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst seq_cst
; CHECK-NOT: dmb
@@ -1020,14 +1057,15 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var16
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: uxth r[[OLDX:[0-9]+]], r[[OLD]]
+; CHECK-NEXT: cmp r[[OLDX]], r0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
-; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -1046,14 +1084,14 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var32
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
; CHECK-NEXT: cmp r[[OLD]], r0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
-; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -1063,7 +1101,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
ret i32 %old
}
-define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i64:
%old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic monotonic
; CHECK-NOT: dmb
@@ -1072,24 +1110,24 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
+; CHECK: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp [[OLD1]], r0
-; Thumb mode: it eq
-; CHECK: cmpeq [[OLD2]], r1
+; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
+; CHECK-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
+; CHECK: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r2, r3 is a reasonable guess.
-; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
+; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, [[OLD1]]
-; CHECK-NEXT: mov r1, [[OLD2]]
- ret i64 %old
+; CHECK-ARM: strd [[OLD1]], [[OLD2]], [r[[ADDR]]]
+ store i64 %old, i64* @var64
+ ret void
}
define i8 @test_atomic_load_monotonic_i8() nounwind {
@@ -1303,13 +1341,13 @@ define void @test_atomic_store_release_i64(i64 %val) nounwind {
store atomic i64 %val, i64* @var64 release, align 8
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
-; CHECK: movt r[[ADDR]], :upper16:var64
+; CHECK: movw [[ADDR:r[0-9]+|lr]], :lower16:var64
+; CHECK: movt [[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1:
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, {{.*}}[[ADDR]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
; CHECK-NOT: dmb
@@ -1337,7 +1375,7 @@ atomic_ver:
; The key point here is that the second dmb isn't immediately followed by the
; simple_ver basic block, which LLVM attempted to do when DMB had been marked
; with isBarrier. For now, look for something that looks like "somewhere".
-; CHECK-NEXT: mov
+; CHECK-NEXT: {{mov|bx}}
somewhere:
%combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
ret i32 %combined
diff --git a/test/CodeGen/ARM/atomicrmw_minmax.ll b/test/CodeGen/ARM/atomicrmw_minmax.ll
index 5befc228e0..1a7092a430 100644
--- a/test/CodeGen/ARM/atomicrmw_minmax.ll
+++ b/test/CodeGen/ARM/atomicrmw_minmax.ll
@@ -15,7 +15,7 @@ define i32 @min(i8 %ctx, i32* %ptr, i32 %val)
{
; CHECK: ldrex
; CHECK: cmp [[old:r[0-9]*]], [[val:r[0-9]*]]
-; CHECK: movlo {{r[0-9]*}}, [[old]]
+; CHECK: movls {{r[0-9]*}}, [[old]]
%old = atomicrmw umin i32* %ptr, i32 %val monotonic
ret i32 %old
}