From 90dd89ed81d3d872fb85d5bb87f28f95d0bf0e50 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 17 Apr 2014 20:00:33 +0000 Subject: ARM64: switch to IR-based atomic operations. Goodbye code! (Game: spot the bug fixed by the change). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 82 ---- lib/Target/ARM64/ARM64ISelLowering.cpp | 734 ++++----------------------------- lib/Target/ARM64/ARM64ISelLowering.h | 22 +- lib/Target/ARM64/ARM64InstrAtomics.td | 65 --- 4 files changed, 91 insertions(+), 812 deletions(-) (limited to 'lib/Target') diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp index 9e5b5af0d9..8ef13e781b 100644 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp @@ -157,9 +157,6 @@ public: SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32, - unsigned Op64); - SDNode *SelectBitfieldExtractOp(SDNode *N); SDNode *SelectBitfieldInsertOp(SDNode *N); @@ -1138,37 +1135,6 @@ SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, return St; } -SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16, unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) - Op = Op64; - else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, - &Ops[0], Ops.size()); -} - static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, @@ -1829,54 +1795,6 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { return I; break; - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8, - ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32, - ARM64::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8, - ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32, - ARM64::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8, - ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32, - ARM64::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8, - ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32, - ARM64::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8, - ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32, - ARM64::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16, - ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8, - ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32, - ARM64::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8, - ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32, - ARM64::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16, - ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16, - ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16, - ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8, - ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32, - ARM64::ATOMIC_CMP_SWAP_I64); - case ISD::LOAD: { // Try to select as an indexed load. Fall through to normal processing // if we can't. diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 794bd02942..1bb2198c1e 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -222,26 +222,6 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - // 128-bit atomics - setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom); - // These are surprisingly difficult. The only single-copy atomic 128-bit - // instruction on AArch64 is stxp (when it succeeds). So a store can safely - // become a simple swap, but a load can only be determined to have been atomic - // if storing the same value back succeeds. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand); - // Variable arguments. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Custom); @@ -706,437 +686,6 @@ const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const { } } -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - unsigned &LdrOpc, unsigned &StrOpc) { - static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW, - ARM64::LDXRX, ARM64::LDXPX }; - static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW, - ARM64::LDAXRX, ARM64::LDAXPX }; - static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW, - ARM64::STXRX, ARM64::STXPX }; - static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW, - ARM64::STLXRX, ARM64::STLXPX }; - - unsigned *LoadOps, *StoreOps; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 16 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)]; - StrOpc = StoreOps[Log2_32(Size)]; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); - unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister( - &ARM64::GPR32RegClass); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - - // FIXME: We currently always generate a seq_cst operation; we should - // be able to relax this in some cases. - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr)) - .addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define) - .addReg(dest) - .addReg(oldval); - BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned scratch2 = - (!BinOpcode) - ? incr - : RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass - : &ARM64::GPR32RegClass); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // scratch2, dest, incr - // stxr scratch, scratch2, ptr - // cbnz scratch, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr) - BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest); - else - BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr); - } - - BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128( - MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo, - unsigned BinOpcodeHi) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned IncrLo = MI->getOperand(3).getReg(); - unsigned IncrHi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(5).getImm()); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, LoopMBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned ScratchLo = IncrLo, ScratchHi = IncrHi; - if (BinOpcodeLo) { - assert(BinOpcodeHi && "Expect neither or both opcodes to be defined"); - ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - } - - // ThisMBB: - // ... - // fallthrough --> LoopMBB - BB->addSuccessor(LoopMBB); - - // LoopMBB: - // ldxp DestLo, DestHi, Ptr - // ScratchLo, DestLo, IncrLo - // ScratchHi, DestHi, IncrHi - // stxp ScratchRes, ScratchLo, ScratchHi, ptr - // cbnz ScratchRes, LoopMBB - // fallthrough --> ExitMBB - BB = LoopMBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - if (BinOpcodeLo) { - // operand order needs to go the other way for NAND - if (BinOpcodeLo == ARM64::BICXrr) { - std::swap(IncrLo, DestLo); - std::swap(IncrHi, DestHi); - } - - BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg( - IncrLo); - BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg( - IncrHi); - } - - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(ScratchLo) - .addReg(ScratchHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB); - - BB->addSuccessor(LoopMBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI, - MachineBasicBlock *BB) const { - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned OldValLo = MI->getOperand(3).getReg(); - unsigned OldValHi = MI->getOperand(4).getReg(); - unsigned NewValLo = MI->getOperand(5).getReg(); - unsigned NewValHi = MI->getOperand(6).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(7).getImm()); - unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister( - &ARM64::GPR32RegClass); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, Loop1MBB); - MF->insert(It, Loop2MBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // ThisMBB: - // ... - // fallthrough --> Loop1MBB - BB->addSuccessor(Loop1MBB); - - // Loop1MBB: - // ldxp DestLo, DestHi, [Ptr] - // cmp DestLo, OldValLo - // sbc xzr, DestHi, OldValHi - // bne ExitMBB - BB = Loop1MBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg( - OldValLo); - BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg( - OldValHi); - - BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB); - BB->addSuccessor(Loop2MBB); - BB->addSuccessor(ExitMBB); - - // Loop2MBB: - // stxp ScratchRes, NewValLo, NewValHi, [Ptr] - // cbnz ScratchRes, Loop1MBB - BB = Loop2MBB; - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(NewValLo) - .addReg(NewValHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB); - BB->addSuccessor(Loop1MBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128( - MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned IncrLo = MI->getOperand(3).getReg(); - unsigned IncrHi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(5).getImm()); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, LoopMBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - - // ThisMBB: - // ... - // fallthrough --> LoopMBB - BB->addSuccessor(LoopMBB); - - // LoopMBB: - // ldxp DestLo, DestHi, Ptr - // cmp ScratchLo, DestLo, IncrLo - // sbc xzr, ScratchHi, DestHi, IncrHi - // csel ScratchLo, DestLo, IncrLo, - // csel ScratchHi, DestHi, IncrHi, - // stxp ScratchRes, ScratchLo, ScratchHi, ptr - // cbnz ScratchRes, LoopMBB - // fallthrough --> ExitMBB - BB = LoopMBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - - BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg( - IncrLo); - BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg( - IncrHi); - - BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo) - .addReg(DestLo) - .addReg(IncrLo) - .addImm(CondCode); - BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi) - .addReg(DestHi) - .addReg(IncrHi) - .addImm(CondCode); - - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(ScratchLo) - .addReg(ScratchHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB); - - BB->addSuccessor(LoopMBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - MachineBasicBlock * ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -1209,106 +758,6 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, assert(0 && "Unexpected instruction for custom inserter!"); break; - case ARM64::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr); - case ARM64::ATOMIC_LOAD_ADD_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr); - - case ARM64::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr); - case ARM64::ATOMIC_LOAD_AND_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr); - - case ARM64::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr); - case ARM64::ATOMIC_LOAD_OR_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr); - - case ARM64::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr); - case ARM64::ATOMIC_LOAD_XOR_I128: - return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr); - - case ARM64::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr); - case ARM64::ATOMIC_LOAD_NAND_I128: - return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr); - - case ARM64::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr); - case ARM64::ATOMIC_LOAD_SUB_I128: - return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr); - - case ARM64::ATOMIC_LOAD_MIN_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::LT); - - case ARM64::ATOMIC_LOAD_MAX_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::GT); - - case ARM64::ATOMIC_LOAD_UMIN_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::CC); - - case ARM64::ATOMIC_LOAD_UMAX_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::HI); - - case ARM64::ATOMIC_SWAP_I8: - return EmitAtomicBinary(MI, BB, 1, 0); - case ARM64::ATOMIC_SWAP_I16: - return EmitAtomicBinary(MI, BB, 2, 0); - case ARM64::ATOMIC_SWAP_I32: - return EmitAtomicBinary(MI, BB, 4, 0); - case ARM64::ATOMIC_SWAP_I64: - return EmitAtomicBinary(MI, BB, 8, 0); - case ARM64::ATOMIC_SWAP_I128: - return EmitAtomicBinary128(MI, BB, 0, 0); - - case ARM64::ATOMIC_CMP_SWAP_I8: - return EmitAtomicCmpSwap(MI, BB, 1); - case ARM64::ATOMIC_CMP_SWAP_I16: - return EmitAtomicCmpSwap(MI, BB, 2); - case ARM64::ATOMIC_CMP_SWAP_I32: - return EmitAtomicCmpSwap(MI, BB, 4); - case ARM64::ATOMIC_CMP_SWAP_I64: - return EmitAtomicCmpSwap(MI, BB, 8); - case ARM64::ATOMIC_CMP_SWAP_I128: - return EmitAtomicCmpSwap128(MI, BB); - case ARM64::F128CSEL: return EmitF128CSEL(MI, BB); @@ -7476,113 +6925,12 @@ bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -/// The only 128-bit atomic operation is an stxp that succeeds. In particular -/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic -/// load is: -/// loop: -/// ldxp x0, x1, [x8] -/// stxp w2, x0, x1, [x8] -/// cbnz w2, loop -/// If the stxp succeeds then the ldxp managed to get both halves without an -/// intervening stxp from a different thread and the read was atomic. -static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) { - SDLoc DL(N); - AtomicSDNode *AN = cast(N); - EVT VT = AN->getMemoryVT(); - SDValue Zero = DAG.getConstant(0, VT); - - // FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing - // scheme very well. Given the complexity of what we're already generating, an - // extra couple of ORRs probably won't make much difference. - SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(), - N->getOperand(0), N->getOperand(1), Zero, - AN->getMemOperand(), AN->getOrdering(), - AN->getSynchScope()); - - Results.push_back(Result.getValue(0)); // Value - Results.push_back(Result.getValue(1)); // Chain -} - -static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG, unsigned NewOp) { - SDLoc DL(N); - AtomicOrdering Ordering = cast(N)->getOrdering(); - assert(N->getValueType(0) == MVT::i128 && - "Only know how to expand i128 atomics"); - - SmallVector Ops; - Ops.push_back(N->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) { - // Low part of Val2 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(3), DAG.getIntPtrConstant(1))); - } - - Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32)); - Ops.push_back(N->getOperand(0)); // Chain - - SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other); - SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops); - SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2)); - Results.push_back(SDValue(Result, 2)); -} - void ARM64TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this"); - case ISD::ATOMIC_LOAD: - ReplaceATOMIC_LOAD_128(N, Results, DAG); - return; - case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128); - return; - case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128); - return; - case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128); - return; - case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128); - return; - case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128); - return; - case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128); - return; - case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128); - return; - case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128); - return; - case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128); - return; - case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128); - return; - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128); - return; - case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128); - return; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); @@ -7590,3 +6938,85 @@ void ARM64TargetLowering::ReplaceNodeResults(SDNode *N, return; } } + +bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 128-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong: + if (StoreInst *SI = dyn_cast(Inst)) + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128; + else if (LoadInst *LI = dyn_cast(Inst)) + return LI->getType()->getPrimitiveSizeInBits() == 128; + + // For the real atomic operations, we have ldxr/stxr up to 128 bits. + return Inst->getType()->getPrimitiveSizeInBits() <= 128; +} + +Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i64, i64} and we have to recombine them into a + // single i128 here. + if (ValTy->getPrimitiveSizeInBits() == 128) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm64_ldaxp : Intrinsic::arm64_ldxp; + Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm64_ldaxr : Intrinsic::arm64_ldxr; + Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldxr, Addr), + cast(Addr->getType())->getElementType()); +} + +Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, + Value *Val, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i128 intrinsics take two + // parameters: "i64, i64". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 128) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm64_stlxp : Intrinsic::arm64_stxp; + Function *Stxr = Intrinsic::getDeclaration(M, Int); + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi"); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Stxr, Lo, Hi, Addr); + } + + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm64_stlxr : Intrinsic::arm64_stxr; + Type *Tys[] = { Addr->getType() }; + Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Stxr, Builder.CreateZExtOrBitCast( + Val, Stxr->getFunctionType()->getParamType(0)), + Addr); +} diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/ARM64/ARM64ISelLowering.h index db91d6c3cb..2f853b4110 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.h +++ b/lib/Target/ARM64/ARM64ISelLowering.h @@ -233,19 +233,6 @@ public: SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const; - MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size) const; - MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned BinOpcodeLo, - unsigned BinOpcodeHi) const; - MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned CondCode) const; MachineBasicBlock *EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *BB) const; @@ -293,9 +280,18 @@ public: const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; + /// \brief Returns true if it is beneficial to convert a load of a constant + /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + + bool shouldExpandAtomicInIR(Instruction *Inst) const override; + private: /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td index 296f8d216f..989e7a2e74 100644 --- a/lib/Target/ARM64/ARM64InstrAtomics.td +++ b/lib/Target/ARM64/ARM64InstrAtomics.td @@ -140,71 +140,6 @@ def : Pat<(relaxed_store am_indexed64:$ptr, GPR64:$val), def : Pat<(relaxed_store am_unscaled64:$ptr, GPR64:$val), (STURXi GPR64:$val, am_unscaled64:$ptr)>; -//===---------------------------------- -// Atomic read-modify-write operations -//===---------------------------------- - -// More complicated operations need lots of C++ support, so we just create -// skeletons here for the C++ code to refer to. - -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { -multiclass AtomicSizes { - def _I8 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I16 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I32 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I64 : Pseudo<(outs GPR64:$dst), - (ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>; - def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), - (ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi, - i32imm:$ordering), []>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes; -defm ATOMIC_LOAD_SUB : AtomicSizes; -defm ATOMIC_LOAD_AND : AtomicSizes; -defm ATOMIC_LOAD_OR : AtomicSizes; -defm ATOMIC_LOAD_XOR : AtomicSizes; -defm ATOMIC_LOAD_NAND : AtomicSizes; -defm ATOMIC_SWAP : AtomicSizes; -let Defs = [CPSR] in { - // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes; - defm ATOMIC_LOAD_MAX : AtomicSizes; - defm ATOMIC_LOAD_UMIN : AtomicSizes; - defm ATOMIC_LOAD_UMAX : AtomicSizes; -} - -class AtomicCmpSwap - : Pseudo<(outs GPRData:$dst), - (ins GPR64sp:$ptr, GPRData:$old, GPRData:$new, - i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [CPSR]; -} - -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; - -def ATOMIC_CMP_SWAP_I128 - : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), - (ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi, - GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [CPSR]; -} - //===---------------------------------- // Low-level exclusive operations //===---------------------------------- -- cgit v1.2.3