diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 991 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 41 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 3 | ||||
-rw-r--r-- | test/CodeGen/X86/2010-01-08-Atomic64Bug.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/X86/atomic16.ll | 250 | ||||
-rw-r--r-- | test/CodeGen/X86/atomic32.ll | 250 | ||||
-rw-r--r-- | test/CodeGen/X86/atomic64.ll | 216 | ||||
-rw-r--r-- | test/CodeGen/X86/atomic6432.ll | 209 | ||||
-rw-r--r-- | test/CodeGen/X86/atomic8.ll | 251 | ||||
-rw-r--r-- | test/CodeGen/X86/atomic_op.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/X86/pr13458.ll | 14 |
13 files changed, 1709 insertions, 553 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index baa83c6511..6130603bce 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11911,385 +11911,498 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, //===----------------------------------------------------------------------===// // private utility function + +// Get CMPXCHG opcode for the specified data type. +static unsigned getCmpXChgOpcode(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: return X86::LCMPXCHG8; + case MVT::i16: return X86::LCMPXCHG16; + case MVT::i32: return X86::LCMPXCHG32; + case MVT::i64: return X86::LCMPXCHG64; + default: + break; + } + llvm_unreachable("Invalid operand size!"); +} + +// Get LOAD opcode for the specified data type. +static unsigned getLoadOpcode(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: return X86::MOV8rm; + case MVT::i16: return X86::MOV16rm; + case MVT::i32: return X86::MOV32rm; + case MVT::i64: return X86::MOV64rm; + default: + break; + } + llvm_unreachable("Invalid operand size!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction. +static unsigned getNonAtomicOpcode(unsigned Opc) { + switch (Opc) { + case X86::ATOMAND8: return X86::AND8rr; + case X86::ATOMAND16: return X86::AND16rr; + case X86::ATOMAND32: return X86::AND32rr; + case X86::ATOMAND64: return X86::AND64rr; + case X86::ATOMOR8: return X86::OR8rr; + case X86::ATOMOR16: return X86::OR16rr; + case X86::ATOMOR32: return X86::OR32rr; + case X86::ATOMOR64: return X86::OR64rr; + case X86::ATOMXOR8: return X86::XOR8rr; + case X86::ATOMXOR16: return X86::XOR16rr; + case X86::ATOMXOR32: return X86::XOR32rr; + case X86::ATOMXOR64: return X86::XOR64rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction with +// extra opcode. +static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc, + unsigned &ExtraOpc) { + switch (Opc) { + case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr; + case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr; + case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr; + case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr; + case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr; + case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr; + case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr; + case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr; + case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr; + case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr; + case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr; + case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr; + case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr; + case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr; + case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr; + case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction for +// 64-bit data type on 32-bit target. +static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { + switch (Opc) { + case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr; + case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr; + case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr; + case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; + case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; + case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get opcode of the non-atomic one from the specified atomic instruction for +// 64-bit data type on 32-bit target with extra opcode. +static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc, + unsigned &HiOpc, + unsigned &ExtraOpc) { + switch (Opc) { + case X86::ATOMNAND6432: + ExtraOpc = X86::NOT32r; + HiOpc = X86::AND32rr; + return X86::AND32rr; + } + llvm_unreachable("Unhandled atomic-load-op opcode!"); +} + +// Get pseudo CMOV opcode from the specified data type. +static unsigned getPseudoCMOVOpc(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i16: return X86::CMOV_GR16; + case MVT::i32: return X86::CMOV_GR32; + default: + break; + } + llvm_unreachable("Unknown CMOV opcode!"); +} + +// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions. +// They will be translated into a spin-loop or compare-exchange loop from +// +// ... +// dst = atomic-fetch-op MI.addr, MI.val +// ... +// +// to +// +// ... +// EAX = LOAD MI.addr +// loop: +// t1 = OP MI.val, EAX +// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] +// JNE loop +// sink: +// dst = EAX +// ... MachineBasicBlock * -X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, - MachineBasicBlock *MBB, - unsigned regOpc, - unsigned immOpc, - unsigned LoadOpc, - unsigned CXchgOpc, - unsigned notOpc, - unsigned EAXreg, - const TargetRegisterClass *RC, - bool Invert) const { - // For the atomic bitwise operator, we generate - // thisMBB: - // newMBB: - // ld t1 = [bitinstr.addr] - // op t2 = t1, [bitinstr.val] - // not t3 = t2 (if Invert) - // mov EAX = t1 - // lcs dest = [bitinstr.addr], t3 [EAX is implicit] - // bz newMBB - // fallthrough -->nextMBB +X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, + MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + DebugLoc DL = MI->getDebugLoc(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 && + "Unexpected number of operands"); + + assert(MI->hasOneMemOperand() && + "Expected atomic-load-op to have one memoperand"); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg, SrcReg; + unsigned MemOpndSlot; + + unsigned CurOp = 0; + + DstReg = MI->getOperand(CurOp++).getReg(); + MemOpndSlot = CurOp; + CurOp += X86::AddrNumOperands; + SrcReg = MI->getOperand(CurOp++).getReg(); + + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + EVT VT = *RC->vt_begin(); + unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT); + + unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); + unsigned LOADOpc = getLoadOpcode(VT); + + // For the atomic load-arith operator, we generate + // + // thisMBB: + // EAX = LOAD [MI.addr] + // mainMBB: + // t1 = OP MI.val, EAX + // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] + // JNE mainMBB + // sinkMBB: - /// First build the CFG - MachineFunction *F = MBB->getParent(); MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, newMBB); - F->insert(MBBIter, nextMBB); - - // Transfer the remainder of thisMBB and its successor edges to nextMBB. - nextMBB->splice(nextMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(bInstr)), - thisMBB->end()); - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - // Update thisMBB to fall through to newMBB - thisMBB->addSuccessor(newMBB); - - // newMBB jumps to itself and fall through to nextMBB - newMBB->addSuccessor(nextMBB); - newMBB->addSuccessor(newMBB); - - // Insert instructions into newMBB based on incoming instruction - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 && - "unexpected number of operands"); - DebugLoc dl = bInstr->getDebugLoc(); - MachineOperand& destOper = bInstr->getOperand(0); - MachineOperand* argOpers[2 + X86::AddrNumOperands]; - int numArgs = bInstr->getNumOperands() - 1; - for (int i=0; i < numArgs; ++i) - argOpers[i] = &bInstr->getOperand(i+1); - - // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] - int valArgIndx = lastAddrIndx + 1; - - unsigned t1 = F->getRegInfo().createVirtualRegister(RC); - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); - assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && - "invalid operand"); - if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); - else - MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); - MIB.addReg(t1); - (*MIB).addOperand(*argOpers[valArgIndx]); + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); - unsigned t3 = F->getRegInfo().createVirtualRegister(RC); - if (Invert) { - MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2); + // thisMBB: + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + + thisMBB->addSuccessor(mainMBB); + + // mainMBB: + MachineBasicBlock *origMainMBB = mainMBB; + mainMBB->addLiveIn(AccPhyReg); + + // Copy AccPhyReg as it is used more than once. + unsigned AccReg = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg) + .addReg(AccPhyReg); + + unsigned t1 = MRI.createVirtualRegister(RC); + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic-load-op opcode!"); + case X86::ATOMAND8: + case X86::ATOMAND16: + case X86::ATOMAND32: + case X86::ATOMAND64: + case X86::ATOMOR8: + case X86::ATOMOR16: + case X86::ATOMOR32: + case X86::ATOMOR64: + case X86::ATOMXOR8: + case X86::ATOMXOR16: + case X86::ATOMXOR32: + case X86::ATOMXOR64: { + unsigned ARITHOpc = getNonAtomicOpcode(Opc); + BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg) + .addReg(AccReg); + break; + } + case X86::ATOMNAND8: + case X86::ATOMNAND16: + case X86::ATOMNAND32: + case X86::ATOMNAND64: { + unsigned t2 = MRI.createVirtualRegister(RC); + unsigned NOTOpc; + unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); + BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg) + .addReg(AccReg); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2); + break; + } + case X86::ATOMMAX16: + case X86::ATOMMAX32: + case X86::ATOMMAX64: + case X86::ATOMMIN16: + case X86::ATOMMIN32: + case X86::ATOMMIN64: + case X86::ATOMUMAX16: + case X86::ATOMUMAX32: + case X86::ATOMUMAX64: + case X86::ATOMUMIN16: + case X86::ATOMUMIN32: + case X86::ATOMUMIN64: { + unsigned CMPOpc; + unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc); + + BuildMI(mainMBB, DL, TII->get(CMPOpc)) + .addReg(SrcReg) + .addReg(AccReg); + + if (Subtarget->hasCMov()) { + // Native support + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1) + .addReg(SrcReg) + .addReg(AccReg); + } else { + // Use pseudo select and lower them. + assert((VT == MVT::i16 || VT == MVT::i32) && + "Invalid atomic-load-op transformation!"); + unsigned SelOpc = getPseudoCMOVOpc(VT); + X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); + assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); + MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1) + .addReg(SrcReg).addReg(AccReg) + .addImm(CC); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + break; + } } - else - t3 = t2; - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); + // Copy AccPhyReg back from virtual register. + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg) + .addReg(AccReg); + + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); MIB.addReg(t1); + MIB.setMemRefs(MMOBegin, MMOEnd); - MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t3); - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).setMemRefs(bInstr->memoperands_begin(), - bInstr->memoperands_end()); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); - MIB.addReg(EAXreg); + mainMBB->addSuccessor(origMainMBB); + mainMBB->addSuccessor(sinkMBB); - // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); + // sinkMBB: + sinkMBB->addLiveIn(AccPhyReg); - bInstr->eraseFromParent(); // The pseudo instruction is gone now. - return nextMBB; + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), DstReg) + .addReg(AccPhyReg); + + MI->eraseFromParent(); + return sinkMBB; } -// private utility function: 64 bit atomics on 32 bit host. +// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic +// instructions. They will be translated into a spin-loop or compare-exchange +// loop from +// +// ... +// dst = atomic-fetch-op MI.addr, MI.val +// ... +// +// to +// +// ... +// EAX = LOAD [MI.addr + 0] +// EDX = LOAD [MI.addr + 4] +// loop: +// EBX = OP MI.val.lo, EAX +// ECX = OP MI.val.hi, EDX +// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] +// JNE loop +// sink: +// dst = EDX:EAX +// ... MachineBasicBlock * -X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, - MachineBasicBlock *MBB, - unsigned regOpcL, - unsigned regOpcH, - unsigned immOpcL, - unsigned immOpcH, - bool Invert) const { - // For the atomic bitwise operator, we generate - // thisMBB (instructions are in pairs, except cmpxchg8b) - // ld t1,t2 = [bitinstr.addr] - // newMBB: - // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) - // op t5, t6 <- out1, out2, [bitinstr.val] - // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) - // neg t7, t8 < t5, t6 (if Invert) - // mov ECX, EBX <- t5, t6 - // mov EAX, EDX <- t1, t2 - // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] - // mov t3, t4 <- EAX, EDX - // bz newMBB - // result in out1, out2 - // fallthrough -->nextMBB - - const TargetRegisterClass *RC = &X86::GR32RegClass; - const unsigned LoadOpc = X86::MOV32rm; - const unsigned NotOpc = X86::NOT32r; +X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, + MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + DebugLoc DL = MI->getDebugLoc(); - /// First build the CFG - MachineFunction *F = MBB->getParent(); - MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, newMBB); - F->insert(MBBIter, nextMBB); - - // Transfer the remainder of thisMBB and its successor edges to nextMBB. - nextMBB->splice(nextMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(bInstr)), - thisMBB->end()); - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - // Update thisMBB to fall through to newMBB - thisMBB->addSuccessor(newMBB); - - // newMBB jumps to itself and fall through to nextMBB - newMBB->addSuccessor(nextMBB); - newMBB->addSuccessor(newMBB); - - DebugLoc dl = bInstr->getDebugLoc(); - // Insert instructions into newMBB based on incoming instruction - // There are 8 "real" operands plus 9 implicit def/uses, ignored here. - assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 && - "unexpected number of operands"); - MachineOperand& dest1Oper = bInstr->getOperand(0); - MachineOperand& dest2Oper = bInstr->getOperand(1); - MachineOperand* argOpers[2 + X86::AddrNumOperands]; - for (int i=0; i < 2 + X86::AddrNumOperands; ++i) { - argOpers[i] = &bInstr->getOperand(i+2); - - // We use some of the operands multiple times, so conservatively just - // clear any kill flags that might be present. - if (argOpers[i]->isReg() && argOpers[i]->isUse()) - argOpers[i]->setIsKill(false); - } - - // x86 address has 5 operands: base, index, scale, displacement, and segment. - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] - - unsigned t1 = F->getRegInfo().createVirtualRegister(RC); - MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2); - // add 4 to displacement. - for (int i=0; i <= lastAddrIndx-2; ++i) - (*MIB).addOperand(*argOpers[i]); - MachineOperand newOp3 = *(argOpers[3]); - if (newOp3.isImm()) - newOp3.setImm(newOp3.getImm()+4); - else - newOp3.setOffset(newOp3.getOffset()+4); - (*MIB).addOperand(newOp3); - (*MIB).addOperand(*argOpers[lastAddrIndx]); - - // t3/4 are defined later, at the bottom of the loop - unsigned t3 = F->getRegInfo().createVirtualRegister(RC); - unsigned t4 = F->getRegInfo().createVirtualRegister(RC); - BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg()) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); - BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) - .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); - - // The subsequent operations should be using the destination registers of - // the PHI instructions. - t1 = dest1Oper.getReg(); - t2 = dest2Oper.getReg(); - - int valArgIndx = lastAddrIndx + 1; - assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && - "invalid operand"); - unsigned t5 = F->getRegInfo().createVirtualRegister(RC); - unsigned t6 = F->getRegInfo().createVirtualRegister(RC); - if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5); - else - MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); - if (regOpcL != X86::MOV32rr) - MIB.addReg(t1); - (*MIB).addOperand(*argOpers[valArgIndx]); - assert(argOpers[valArgIndx + 1]->isReg() == - argOpers[valArgIndx]->isReg()); - assert(argOpers[valArgIndx + 1]->isImm() == - argOpers[valArgIndx]->isImm()); - if (argOpers[valArgIndx + 1]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6); - else - MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); - if (regOpcH != X86::MOV32rr) - MIB.addReg(t2); - (*MIB).addOperand(*argOpers[valArgIndx + 1]); - - unsigned t7, t8; - if (Invert) { - t7 = F->getRegInfo().createVirtualRegister(RC); - t8 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6); - } else { - t7 = t5; - t8 = t6; - } + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); - MIB.addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); - MIB.addReg(t2); + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); - MIB.addReg(t7); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); - MIB.addReg(t8); + assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && + "Unexpected number of operands"); - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); + assert(MI->hasOneMemOperand() && + "Expected atomic-load-op32 to have one memoperand"); - assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).setMemRefs(bInstr->memoperands_begin(), - bInstr->memoperands_end()); + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3); - MIB.addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4); - MIB.addReg(X86::EDX); + unsigned DstLoReg, DstHiReg; + unsigned SrcLoReg, SrcHiReg; + unsigned MemOpndSlot; - // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); + unsigned CurOp = 0; - bInstr->eraseFromParent(); // The pseudo instruction is gone now. - return nextMBB; -} + DstLoReg = MI->getOperand(CurOp++).getReg(); + DstHiReg = MI->getOperand(CurOp++).getReg(); + MemOpndSlot = CurOp; + CurOp += X86::AddrNumOperands; + SrcLoReg = MI->getOperand(CurOp++).getReg(); + SrcHiReg = MI->getOperand(CurOp++).getReg(); -// private utility function -MachineBasicBlock * -X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, - MachineBasicBlock *MBB, - unsigned cmovOpc) const { - // For the atomic min/max operator, we generate - // thisMBB: - // newMBB: - // ld t1 = [min/max.addr] - // mov t2 = [min/max.val] - // cmp t1, t2 - // cmov[cond] t2 = t1 - // mov EAX = t1 - // lcs dest = [bitinstr.addr], t2 [EAX is implicit] - // bz newMBB - // fallthrough -->nextMBB + const TargetRegisterClass *RC = &X86::GR32RegClass; + + unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; + unsigned LOADOpc = X86::MOV32rm; + + // For the atomic load-arith operator, we generate // - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - MachineFunction::iterator MBBIter = MBB; - ++MBBIter; + // thisMBB: + // EAX = LOAD [MI.addr + 0] + // EDX = LOAD [MI.addr + 4] + // mainMBB: + // EBX = OP MI.vallo, EAX + // ECX = OP MI.valhi, EDX + // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] + // JNE mainMBB + // sinkMBB: - /// First build the CFG - MachineFunction *F = MBB->getParent(); MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(MBBIter, newMBB); - F->insert(MBBIter, nextMBB); - - // Transfer the remainder of thisMBB and its successor edges to nextMBB. - nextMBB->splice(nextMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(mInstr)), - thisMBB->end()); - nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - // Update thisMBB to fall through to newMBB - thisMBB->addSuccessor(newMBB); - - // newMBB jumps to newMBB and fall through to nextMBB - newMBB->addSuccessor(nextMBB); - newMBB->addSuccessor(newMBB); - - DebugLoc dl = mInstr->getDebugLoc(); - // Insert instructions into newMBB based on incoming instruction - assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 && - "unexpected number of operands"); - MachineOperand& destOper = mInstr->getOperand(0); - MachineOperand* argOpers[2 + X86::AddrNumOperands]; - int numArgs = mInstr->getNumOperands() - 1; - for (int i=0; i < numArgs; ++i) - argOpers[i] = &mInstr->getOperand(i+1); - - // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] - int valArgIndx = lastAddrIndx + 1; - - unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); - MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - - // We only support register and immediate values - assert((argOpers[valArgIndx]->isReg() || - argOpers[valArgIndx]->isImm()) && - "invalid operand"); - - unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); - if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); - else - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); - (*MIB).addOperand(*argOpers[valArgIndx]); + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); - MIB.addReg(t1); + MachineInstrBuilder MIB; - MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); - MIB.addReg(t1); - MIB.addReg(t2); + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + // Lo + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + // Hi + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) + else + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + } + MIB.setMemRefs(MMOBegin, MMOEnd); - // Generate movc - unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); - MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3); - MIB.addReg(t2); - MIB.addReg(t1); + thisMBB->addSuccessor(mainMBB); - // Cmp and exchange if none has modified the memory location - MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32)); - for (int i=0; i <= lastAddrIndx; ++i) - (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t3); - assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).setMemRefs(mInstr->memoperands_begin(), - mInstr->memoperands_end()); + // mainMBB: + MachineBasicBlock *origMainMBB = mainMBB; + mainMBB->addLiveIn(X86::EAX); + mainMBB->addLiveIn(X86::EDX); - MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); - MIB.addReg(X86::EAX); + // Copy EDX:EAX as they are used more than once. + unsigned LoReg = MRI.createVirtualRegister(RC); + unsigned HiReg = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX); - // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); + unsigned t1L = MRI.createVirtualRegister(RC); + unsigned t1H = MRI.createVirtualRegister(RC); - mInstr->eraseFromParent(); // The pseudo instruction is gone now. - return nextMBB; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic-load-op6432 opcode!"); + case X86::ATOMAND6432: + case X86::ATOMOR6432: + case X86::ATOMXOR6432: + case X86::ATOMADD6432: + case X86::ATOMSUB6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg); + break; + } + case X86::ATOMNAND6432: { + unsigned HiOpc, NOTOpc; + unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); + unsigned t2L = MRI.createVirtualRegister(RC); + unsigned t2H = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); + break; + } + case X86::ATOMSWAP6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg); + break; + } + } + + // Copy EDX:EAX back from HiReg:LoReg + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg); + // Copy ECX:EBX from t1H:t1L + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H); + + MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); + + mainMBB->addSuccessor(origMainMBB); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + sinkMBB->addLiveIn(X86::EAX); + sinkMBB->addLiveIn(X86::EDX); + + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), DstLoReg) + .addReg(X86::EAX); + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(TargetOpcode::COPY), DstHiReg) + .addReg(X86::EDX); + + MI->eraseFromParent(); + return sinkMBB; } // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 @@ -13176,130 +13289,42 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitMonitor(MI, BB); // Atomic Lowering. - case X86::ATOMMIN32: - case X86::ATOMMAX32: - case X86::ATOMUMIN32: - case X86::ATOMUMAX32: - case X86::ATOMMIN16: - case X86::ATOMMAX16: - case X86::ATOMUMIN16: - case X86::ATOMUMAX16: - case X86::ATOMMIN64: - case X86::ATOMMAX64: - case X86::ATOMUMIN64: - case X86::ATOMUMAX64: { - unsigned Opc; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break; - case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break; - case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break; - case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break; - case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break; - case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break; - case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break; - case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break; - case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break; - case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break; - case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break; - case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break; - // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. - } - return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc); - } - - case X86::ATOMAND32: - case X86::ATOMOR32: - case X86::ATOMXOR32: - case X86::ATOMNAND32: { - bool Invert = false; - unsigned RegOpc, ImmOpc; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::ATOMAND32: - RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break; - case X86::ATOMOR32: - RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break; - case X86::ATOMXOR32: - RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break; - case X86::ATOMNAND32: - RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break; - } - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, - X86::MOV32rm, X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass, Invert); - } - + case X86::ATOMAND8: case X86::ATOMAND16: + case X86::ATOMAND32: + case X86::ATOMAND64: + // Fall through + case X86::ATOMOR8: case X86::ATOMOR16: + case X86::ATOMOR32: + case X86::ATOMOR64: + // Fall through case X86::ATOMXOR16: - case X86::ATOMNAND16: { - bool Invert = false; - unsigned RegOpc, ImmOpc; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::ATOMAND16: - RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break; - case X86::ATOMOR16: - RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break; - case X86::ATOMXOR16: - RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break; - case X86::ATOMNAND16: - RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break; - } - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, - X86::MOV16rm, X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass, Invert); - } - - case X86::ATOMAND8: - case X86::ATOMOR8: case X86::ATOMXOR8: - case X86::ATOMNAND8: { - bool Invert = false; - unsigned RegOpc, ImmOpc; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::ATOMAND8: - RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break; - case X86::ATOMOR8: - RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break; - case X86::ATOMXOR8: - RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break; - case X86::ATOMNAND8: - RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break; - } - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, - X86::MOV8rm, X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass, Invert); - } - - // This group is for 64-bit host. - case X86::ATOMAND64: - case X86::ATOMOR64: + case X86::ATOMXOR32: case X86::ATOMXOR64: - case X86::ATOMNAND64: { - bool Invert = false; - unsigned RegOpc, ImmOpc; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::ATOMAND64: - RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break; - case X86::ATOMOR64: - RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break; - case X86::ATOMXOR64: - RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break; - case X86::ATOMNAND64: - RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break; - } - return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, - X86::MOV64rm, X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass, Invert); - } + // Fall through + case X86::ATOMNAND8: + case X86::ATOMNAND16: + case X86::ATOMNAND32: + case X86::ATOMNAND64: + // Fall through + case X86::ATOMMAX16: + case X86::ATOMMAX32: + case X86::ATOMMAX64: + // Fall through + case X86::ATOMMIN16: + case X86::ATOMMIN32: + case X86::ATOMMIN64: + // Fall through + case X86::ATOMUMAX16: + case X86::ATOMUMAX32: + case X86::ATOMUMAX64: + // Fall through + case X86::ATOMUMIN16: + case X86::ATOMUMIN32: + case X86::ATOMUMIN64: + return EmitAtomicLoadArith(MI, BB); // This group does 64-bit operations on a 32-bit host. case X86::ATOMAND6432: @@ -13308,44 +13333,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMNAND6432: case X86::ATOMADD6432: case X86::ATOMSUB6432: - case X86::ATOMSWAP6432: { - bool Invert = false; - unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH; - switch (MI->getOpcode()) { - default: llvm_unreachable("illegal opcode!"); - case X86::ATOMAND6432: - RegOpcL = RegOpcH = X86::AND32rr; - ImmOpcL = ImmOpcH = X86::AND32ri; - break; - case X86::ATOMOR6432: - RegOpcL = RegOpcH = X86::OR32rr; - ImmOpcL = ImmOpcH = X86::OR32ri; - break; - case X86::ATOMXOR6432: - RegOpcL = RegOpcH = X86::XOR32rr; - ImmOpcL = ImmOpcH = X86::XOR32ri; - break; - case X86::ATOMNAND6432: - RegOpcL = RegOpcH = X86::AND32rr; - ImmOpcL = ImmOpcH = X86::AND32ri; - Invert = true; - break; - case X86::ATOMADD6432: - RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr; - ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri; - break; - case X86::ATOMSUB6432: - RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr; - ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri; - break; - case X86::ATOMSWAP6432: - RegOpcL = RegOpcH = X86::MOV32rr; - ImmOpcL = ImmOpcH = X86::MOV32ri; - break; - } - return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH, - ImmOpcL, ImmOpcH, Invert); - } + case X86::ATOMSWAP6432: + return EmitAtomicLoadArith6432(MI, BB); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9c73777022..653654f28a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -861,36 +861,17 @@ namespace llvm { MachineBasicBlock *BB) const; MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; - /// Utility function to emit atomic bitwise operations (and, or, xor). - /// It takes the bitwise instruction to expand, the associated machine basic - /// block, and the associated X86 opcodes for reg/reg and reg/imm. - MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( - MachineInstr *BInstr, - MachineBasicBlock *BB, - unsigned regOpc, - unsigned immOpc, - unsigned loadOpc, - unsigned cxchgOpc, - unsigned notOpc, - unsigned EAXreg, - const TargetRegisterClass *RC, - bool Invert = false) const; - - MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( - MachineInstr *BInstr, - MachineBasicBlock *BB, - unsigned regOpcL, - unsigned regOpcH, - unsigned immOpcL, - unsigned immOpcH, - bool Invert = false) const; - - /// Utility function to emit atomic min and max. It takes the min/max - /// instruction to expand, the associated basic block, and the associated - /// cmov opcode for moving the min or max value. - MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, - MachineBasicBlock *BB, - unsigned cmovOpc) const; + /// Utility function to emit atomic-load-arith operations (and, or, xor, + /// nand, max, min, umax, umin). It takes the corresponding instruction to + /// expand, the associated machine basic block, and the associated X86 + /// opcodes for reg/reg. + MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + /// Utility function to emit atomic-load-arith operations (and, or, xor, + /// nand, add, sub, swap) for 64-bit operands on 32-bit target. + MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, + MachineBasicBlock *MBB) const; // Utility function to emit the low-level va_arg code for X86-64. MachineBasicBlock *EmitVAARG64WithCustomInserter( diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 75ceeb9b18..9131d30bd0 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -483,8 +483,7 @@ def CMOV_RFP80 : I<0, Pseudo, //===----------------------------------------------------------------------===// // Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { +let usesCustomInserter = 1 in { def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), "#ATOMAND8 PSEUDO!", @@ -578,11 +577,7 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; } -let Constraints = "$val1 = $dst1, $val2 = $dst2", - Defs = [EFLAGS, EAX, EBX, ECX, EDX], - Uses = [EAX, EBX, ECX, EDX], - mayLoad = 1, mayStore = 1, - usesCustomInserter = 1 in { +let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in { def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), (ins i64mem:$ptr, GR32:$val1, GR32:$val2), "#ATOMAND6432 PSEUDO!", []>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 4a9be39fa5..820ac06dc1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2266,7 +2266,7 @@ static X86::CondCode getCondFromSETOpc(unsigned Opc) { } /// getCondFromCmovOpc - return condition code of a CMov opcode. -static X86::CondCode getCondFromCMovOpc(unsigned Opc) { +X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) { switch (Opc) { default: return X86::COND_INVALID; case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: @@ -3314,7 +3314,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, if (OldCC != X86::COND_INVALID) OpcIsSET = true; else - OldCC = getCondFromCMovOpc(Instr.getOpcode()); + OldCC = X86::getCondFromCMovOpc(Instr.getOpcode()); } if (OldCC == X86::COND_INVALID) return false; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index b6f69af037..260f054d69 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -61,6 +61,9 @@ namespace X86 { // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); + // Turn CMov opcode into condition code. + CondCode getCondFromCMovOpc(unsigned Opc); + /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(X86::CondCode CC); diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll index 8b55bd79aa..e969b13302 100644 --- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll +++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll @@ -7,17 +7,16 @@ define void @t(i64* nocapture %p) nounwind ssp { entry: ; CHECK: t: -; CHECK: movl $1 -; CHECK: movl (%ebp), %eax -; CHECK: movl 4(%ebp), %edx +; CHECK: movl ([[REG:%[a-z]+]]), %eax +; CHECK: movl 4([[REG]]), %edx ; CHECK: LBB0_1: -; CHECK-NOT: movl $1 -; CHECK-NOT: movl $0 +; CHECK: movl $1 ; CHECK: addl +; CHECK: movl $0 ; CHECK: adcl ; CHECK: lock -; CHECK: cmpxchg8b -; CHECK: jne +; CHECK-NEXT: cmpxchg8b ([[REG]]) +; CHECK-NEXT: jne %0 = atomicrmw add i64* %p, i64 1 seq_cst ret void } diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll new file mode 100644 index 0000000000..e276d47e34 --- /dev/null +++ b/test/CodeGen/X86/atomic16.ll @@ -0,0 +1,250 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 + +@sc16 = external global i16 + +define void @atomic_fetch_add16() nounwind { +; X64: atomic_fetch_add16 +; X32: atomic_fetch_add16 +entry: +; 32-bit + %t1 = atomicrmw add i16* @sc16, i16 1 acquire +; X64: lock +; X64: incw +; X32: lock +; X32: incw + %t2 = atomicrmw add i16* @sc16, i16 3 acquire +; X64: lock +; X64: addw $3 +; X32: lock +; X32: addw $3 + %t3 = atomicrmw add i16* @sc16, i16 5 acquire +; X64: lock +; X64: xaddw +; X32: lock +; X32: xaddw + %t4 = atomicrmw add i16* @sc16, i16 %t3 acquire +; X64: lock +; X64: addw +; X32: lock +; X32: addw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_sub16() nounwind { +; X64: atomic_fetch_sub16 +; X32: atomic_fetch_sub16 + %t1 = atomicrmw sub i16* @sc16, i16 1 acquire +; X64: lock +; X64: decw +; X32: lock +; X32: decw + %t2 = atomicrmw sub i16* @sc16, i16 3 acquire +; X64: lock +; X64: subw $3 +; X32: lock +; X32: subw $3 + %t3 = atomicrmw sub i16* @sc16, i16 5 acquire +; X64: lock +; X64: xaddw +; X32: lock +; X32: xaddw + %t4 = atomicrmw sub i16* @sc16, i16 %t3 acquire +; X64: lock +; X64: subw +; X32: lock +; X32: subw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_and16() nounwind { +; X64: atomic_fetch_and16 +; X32: atomic_fetch_and16 + %t1 = atomicrmw and i16* @sc16, i16 3 acquire +; X64: lock +; X64: andw $3 +; X32: lock +; X32: andw $3 + %t2 = atomicrmw and i16* @sc16, i16 5 acquire +; X64: andw +; X64: lock +; X64: cmpxchgw +; X32: andw +; X32: lock +; X32: cmpxchgw + %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire +; X64: lock +; X64: andw +; X32: lock +; X32: andw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_or16() nounwind { +; X64: atomic_fetch_or16 +; X32: atomic_fetch_or16 + %t1 = atomicrmw or i16* @sc16, i16 3 acquire +; X64: lock +; X64: orw $3 +; X32: lock +; X32: orw $3 + %t2 = atomicrmw or i16* @sc16, i16 5 acquire +; X64: orw +; X64: lock +; X64: cmpxchgw +; X32: orw +; X32: lock +; X32: cmpxchgw + %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire +; X64: lock +; X64: orw +; X32: lock +; X32: orw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_xor16() nounwind { +; X64: atomic_fetch_xor16 +; X32: atomic_fetch_xor16 + %t1 = atomicrmw xor i16* @sc16, i16 3 acquire +; X64: lock +; X64: xorw $3 +; X32: lock +; X32: xorw $3 + %t2 = atomicrmw xor i16* @sc16, i16 5 acquire +; X64: xorw +; X64: lock +; X64: cmpxchgw +; X32: xorw +; X32: lock +; X32: cmpxchgw + %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire +; X64: lock +; X64: xorw +; X32: lock +; X32: xorw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_nand16(i16 %x) nounwind { +; X64: atomic_fetch_nand16 +; X32: atomic_fetch_nand16 + %t1 = atomicrmw nand i16* @sc16, i16 %x acquire +; X64: andw +; X64: notw +; X64: lock +; X64: cmpxchgw +; X32: andw +; X32: notw +; X32: lock +; X32: cmpxchgw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_max16(i16 %x) nounwind { + %t1 = atomicrmw max i16* @sc16, i16 %x acquire +; X64: cmpw +; X64: cmov +; X64: lock +; X64: cmpxchgw + +; X32: cmpw +; X32: cmov +; X32: lock +; X32: cmpxchgw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_min16(i16 %x) nounwind { + %t1 = atomicrmw min i16* @sc16, i16 %x acquire +; X64: cmpw +; X64: cmov +; X64: lock +; X64: cmpxchgw + +; X32: cmpw +; X32: cmov +; X32: lock +; X32: cmpxchgw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umax16(i16 %x) nounwind { + %t1 = atomicrmw umax i16* @sc16, i16 %x acquire +; X64: cmpw +; X64: cmov +; X64: lock +; X64: cmpxchgw + +; X32: cmpw +; X32: cmov +; X32: lock +; X32: cmpxchgw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umin16(i16 %x) nounwind { + %t1 = atomicrmw umin i16* @sc16, i16 %x acquire +; X64: cmpw +; X64: cmov +; X64: lock +; X64: cmpxchgw +; X32: cmpw +; X32: cmov +; X32: lock +; X32: cmpxchgw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_cmpxchg16() nounwind { + %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire +; X64: lock +; X64: cmpxchgw +; X32: lock +; X32: cmpxchgw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_store16(i16 %x) nounwind { + store atomic i16 %x, i16* @sc16 release, align 4 +; X64-NOT: lock +; X64: movw +; X32-NOT: lock +; X32: movw + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_swap16(i16 %x) nounwind { + %t1 = atomicrmw xchg i16* @sc16, i16 %x acquire +; X64-NOT: lock +; X64: xchgw +; X32-NOT: lock +; X32: xchgw + ret void +; X64: ret +; X32: ret +} diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll new file mode 100644 index 0000000000..dc927d8cb6 --- /dev/null +++ b/test/CodeGen/X86/atomic32.ll @@ -0,0 +1,250 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 + +@sc32 = external global i32 + +define void @atomic_fetch_add32() nounwind { +; X64: atomic_fetch_add32 +; X32: atomic_fetch_add32 +entry: +; 32-bit + %t1 = atomicrmw add i32* @sc32, i32 1 acquire +; X64: lock +; X64: incl +; X32: lock +; X32: incl + %t2 = atomicrmw add i32* @sc32, i32 3 acquire +; X64: lock +; X64: addl $3 +; X32: lock +; X32: addl $3 + %t3 = atomicrmw add i32* @sc32, i32 5 acquire +; X64: lock +; X64: xaddl +; X32: lock +; X32: xaddl + %t4 = atomicrmw add i32* @sc32, i32 %t3 acquire +; X64: lock +; X64: addl +; X32: lock +; X32: addl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_sub32() nounwind { +; X64: atomic_fetch_sub32 +; X32: atomic_fetch_sub32 + %t1 = atomicrmw sub i32* @sc32, i32 1 acquire +; X64: lock +; X64: decl +; X32: lock +; X32: decl + %t2 = atomicrmw sub i32* @sc32, i32 3 acquire +; X64: lock +; X64: subl $3 +; X32: lock +; X32: subl $3 + %t3 = atomicrmw sub i32* @sc32, i32 5 acquire +; X64: lock +; X64: xaddl +; X32: lock +; X32: xaddl + %t4 = atomicrmw sub i32* @sc32, i32 %t3 acquire +; X64: lock +; X64: subl +; X32: lock +; X32: subl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_and32() nounwind { +; X64: atomic_fetch_and32 +; X32: atomic_fetch_and32 + %t1 = atomicrmw and i32* @sc32, i32 3 acquire +; X64: lock +; X64: andl $3 +; X32: lock +; X32: andl $3 + %t2 = atomicrmw and i32* @sc32, i32 5 acquire +; X64: andl +; X64: lock +; X64: cmpxchgl +; X32: andl +; X32: lock +; X32: cmpxchgl + %t3 = atomicrmw and i32* @sc32, i32 %t2 acquire +; X64: lock +; X64: andl +; X32: lock +; X32: andl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_or32() nounwind { +; X64: atomic_fetch_or32 +; X32: atomic_fetch_or32 + %t1 = atomicrmw or i32* @sc32, i32 3 acquire +; X64: lock +; X64: orl $3 +; X32: lock +; X32: orl $3 + %t2 = atomicrmw or i32* @sc32, i32 5 acquire +; X64: orl +; X64: lock +; X64: cmpxchgl +; X32: orl +; X32: lock +; X32: cmpxchgl + %t3 = atomicrmw or i32* @sc32, i32 %t2 acquire +; X64: lock +; X64: orl +; X32: lock +; X32: orl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_xor32() nounwind { +; X64: atomic_fetch_xor32 +; X32: atomic_fetch_xor32 + %t1 = atomicrmw xor i32* @sc32, i32 3 acquire +; X64: lock +; X64: xorl $3 +; X32: lock +; X32: xorl $3 + %t2 = atomicrmw xor i32* @sc32, i32 5 acquire +; X64: xorl +; X64: lock +; X64: cmpxchgl +; X32: xorl +; X32: lock +; X32: cmpxchgl + %t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire +; X64: lock +; X64: xorl +; X32: lock +; X32: xorl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_nand32(i32 %x) nounwind { +; X64: atomic_fetch_nand32 +; X32: atomic_fetch_nand32 + %t1 = atomicrmw nand i32* @sc32, i32 %x acquire +; X64: andl +; X64: notl +; X64: lock +; X64: cmpxchgl +; X32: andl +; X32: notl +; X32: lock +; X32: cmpxchgl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_max32(i32 %x) nounwind { + %t1 = atomicrmw max i32* @sc32, i32 %x acquire +; X64: cmpl +; X64: cmov +; X64: lock +; X64: cmpxchgl + +; X32: cmpl +; X32: cmov +; X32: lock +; X32: cmpxchgl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_min32(i32 %x) nounwind { + %t1 = atomicrmw min i32* @sc32, i32 %x acquire +; X64: cmpl +; X64: cmov +; X64: lock +; X64: cmpxchgl + +; X32: cmpl +; X32: cmov +; X32: lock +; X32: cmpxchgl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umax32(i32 %x) nounwind { + %t1 = atomicrmw umax i32* @sc32, i32 %x acquire +; X64: cmpl +; X64: cmov +; X64: lock +; X64: cmpxchgl + +; X32: cmpl +; X32: cmov +; X32: lock +; X32: cmpxchgl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umin32(i32 %x) nounwind { + %t1 = atomicrmw umin i32* @sc32, i32 %x acquire +; X64: cmpl +; X64: cmov +; X64: lock +; X64: cmpxchgl +; X32: cmpl +; X32: cmov +; X32: lock +; X32: cmpxchgl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_cmpxchg32() nounwind { + %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire +; X64: lock +; X64: cmpxchgl +; X32: lock +; X32: cmpxchgl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_store32(i32 %x) nounwind { + store atomic i32 %x, i32* @sc32 release, align 4 +; X64-NOT: lock +; X64: movl +; X32-NOT: lock +; X32: movl + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_swap32(i32 %x) nounwind { + %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire +; X64-NOT: lock +; X64: xchgl +; X32-NOT: lock +; X32: xchgl + ret void +; X64: ret +; X32: ret +} diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll new file mode 100644 index 0000000000..45785cc8fe --- /dev/null +++ b/test/CodeGen/X86/atomic64.ll @@ -0,0 +1,216 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 + +@sc64 = external global i64 + +define void @atomic_fetch_add64() nounwind { +; X64: atomic_fetch_add64 +entry: + %t1 = atomicrmw add i64* @sc64, i64 1 acquire +; X64: lock +; X64: incq + %t2 = atomicrmw add i64* @sc64, i64 3 acquire +; X64: lock +; X64: addq $3 + %t3 = atomicrmw add i64* @sc64, i64 5 acquire +; X64: lock +; X64: xaddq + %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire +; X64: lock +; X64: addq + ret void +; X64: ret +} + +define void @atomic_fetch_sub64() nounwind { +; X64: atomic_fetch_sub64 + %t1 = atomicrmw sub i64* @sc64, i64 1 acquire +; X64: lock +; X64: decq + %t2 = atomicrmw sub i64* @sc64, i64 3 acquire +; X64: lock +; X64: subq $3 + %t3 = atomicrmw sub i64* @sc64, i64 5 acquire +; X64: lock +; X64: xaddq + %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire +; X64: lock +; X64: subq + ret void +; X64: ret +} + +define void @atomic_fetch_and64() nounwind { +; X64: atomic_fetch_and64 + %t1 = atomicrmw and i64* @sc64, i64 3 acquire +; X64: lock +; X64: andq $3 + %t2 = atomicrmw and i64* @sc64, i64 5 acquire +; X64: andq +; X64: lock +; X64: cmpxchgq + %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire +; X64: lock +; X64: andq + ret void +; X64: ret +} + +define void @atomic_fetch_or64() nounwind { +; X64: atomic_fetch_or64 + %t1 = atomicrmw or i64* @sc64, i64 3 acquire +; X64: lock +; X64: orq $3 + %t2 = atomicrmw or i64* @sc64, i64 5 acquire +; X64: orq +; X64: lock +; X64: cmpxchgq + %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire +; X64: lock +; X64: orq + ret void +; X64: ret +} + +define void @atomic_fetch_xor64() nounwind { +; X64: atomic_fetch_xor64 + %t1 = atomicrmw xor i64* @sc64, i64 3 acquire +; X64: lock +; X64: xorq $3 + %t2 = atomicrmw xor i64* @sc64, i64 5 acquire +; X64: xorq +; X64: lock +; X64: cmpxchgq + %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire +; X64: lock +; X64: xorq + ret void +; X64: ret +} + +define void @atomic_fetch_nand64(i64 %x) nounwind { +; X64: atomic_fetch_nand64 +; X32: atomic_fetch_nand64 + %t1 = atomicrmw nand i64* @sc64, i64 %x acquire +; X64: andq +; X64: notq +; X64: lock +; X64: cmpxchgq +; X32: andl +; X32: andl +; X32: notl +; X32: notl +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_max64(i64 %x) nounwind { + %t1 = atomicrmw max i64* @sc64, i64 %x acquire +; X64: cmpq +; X64: cmov +; X64: lock +; X64: cmpxchgq + +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_min64(i64 %x) nounwind { + %t1 = atomicrmw min i64* @sc64, i64 %x acquire +; X64: cmpq +; X64: cmov +; X64: lock +; X64: cmpxchgq + +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umax64(i64 %x) nounwind { + %t1 = atomicrmw umax i64* @sc64, i64 %x acquire +; X64: cmpq +; X64: cmov +; X64: lock +; X64: cmpxchgq + +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umin64(i64 %x) nounwind { + %t1 = atomicrmw umin i64* @sc64, i64 %x acquire +; X64: cmpq +; X64: cmov +; X64: lock +; X64: cmpxchgq + +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_cmpxchg64() nounwind { + %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire +; X64: lock +; X64: cmpxchgq +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_store64(i64 %x) nounwind { + store atomic i64 %x, i64* @sc64 release, align 8 +; X64-NOT: lock +; X64: movq +; X32: lock +; X32: cmpxchg8b + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_swap64(i64 %x) nounwind { + %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire +; X64-NOT: lock +; X64: xchgq +; X32: lock +; X32: xchg8b + ret void +; X64: ret +; X32: ret +} diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll new file mode 100644 index 0000000000..556c36ebfd --- /dev/null +++ b/test/CodeGen/X86/atomic6432.ll @@ -0,0 +1,209 @@ +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 +; XFAIL: * + +@sc64 = external global i64 + +define void @atomic_fetch_add64() nounwind { +; X32: atomic_fetch_add64 +entry: + %t1 = atomicrmw add i64* @sc64, i64 1 acquire +; X32: addl +; X32: adcl +; X32: lock +; X32: cmpxchg8b + %t2 = atomicrmw add i64* @sc64, i64 3 acquire +; X32: addl +; X32: adcl +; X32: lock +; X32: cmpxchg8b + %t3 = atomicrmw add i64* @sc64, i64 5 acquire +; X32: addl +; X32: adcl +; X32: lock +; X32: cmpxchg8b + %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire +; X32: addl +; X32: adcl +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_sub64() nounwind { +; X32: atomic_fetch_sub64 + %t1 = atomicrmw sub i64* @sc64, i64 1 acquire +; X32: subl +; X32: sbbl +; X32: lock +; X32: cmpxchg8b + %t2 = atomicrmw sub i64* @sc64, i64 3 acquire +; X32: subl +; X32: sbbl +; X32: lock +; X32: cmpxchg8b + %t3 = atomicrmw sub i64* @sc64, i64 5 acquire +; X32: subl +; X32: sbbl +; X32: lock +; X32: cmpxchg8b + %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire +; X32: subl +; X32: sbbl +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_and64() nounwind { +; X32: atomic_fetch_and64 + %t1 = atomicrmw and i64* @sc64, i64 3 acquire +; X32: andl +; X32: andl +; X32: lock +; X32: cmpxchg8b + %t2 = atomicrmw and i64* @sc64, i64 5 acquire +; X32: andl +; X32: andl +; X32: lock +; X32: cmpxchg8b + %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire +; X32: andl +; X32: andl +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_or64() nounwind { +; X32: atomic_fetch_or64 + %t1 = atomicrmw or i64* @sc64, i64 3 acquire +; X32: orl +; X32: orl +; X32: lock +; X32: cmpxchg8b + %t2 = atomicrmw or i64* @sc64, i64 5 acquire +; X32: orl +; X32: orl +; X32: lock +; X32: cmpxchg8b + %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire +; X32: orl +; X32: orl +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_xor64() nounwind { +; X32: atomic_fetch_xor64 + %t1 = atomicrmw xor i64* @sc64, i64 3 acquire +; X32: xorl +; X32: xorl +; X32: lock +; X32: cmpxchg8b + %t2 = atomicrmw xor i64* @sc64, i64 5 acquire +; X32: xorl +; X32: xorl +; X32: lock +; X32: cmpxchg8b + %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire +; X32: xorl +; X32: xorl +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_nand64(i64 %x) nounwind { +; X32: atomic_fetch_nand64 + %t1 = atomicrmw nand i64* @sc64, i64 %x acquire +; X32: andl +; X32: andl +; X32: notl +; X32: notl +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_max64(i64 %x) nounwind { + %t1 = atomicrmw max i64* @sc64, i64 %x acquire +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_min64(i64 %x) nounwind { + %t1 = atomicrmw min i64* @sc64, i64 %x acquire +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_umax64(i64 %x) nounwind { + %t1 = atomicrmw umax i64* @sc64, i64 %x acquire +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_umin64(i64 %x) nounwind { + %t1 = atomicrmw umin i64* @sc64, i64 %x acquire +; X32: cmpl +; X32: cmpl +; X32: cmov +; X32: cmov +; X32: cmov +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_cmpxchg64() nounwind { + %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_store64(i64 %x) nounwind { + store atomic i64 %x, i64* @sc64 release, align 8 +; X32: lock +; X32: cmpxchg8b + ret void +; X32: ret +} + +define void @atomic_fetch_swap64(i64 %x) nounwind { + %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire +; X32: lock +; X32: xchg8b + ret void +; X32: ret +} diff --git a/test/CodeGen/X86/atomic8.ll b/test/CodeGen/X86/atomic8.ll new file mode 100644 index 0000000000..035a28dbff --- /dev/null +++ b/test/CodeGen/X86/atomic8.ll @@ -0,0 +1,251 @@ +; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64 +; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32 +; XFAIL: * + +@sc8 = external global i8 + +define void @atomic_fetch_add8() nounwind { +; X64: atomic_fetch_add8 +; X32: atomic_fetch_add8 +entry: +; 32-bit + %t1 = atomicrmw add i8* @sc8, i8 1 acquire +; X64: lock +; X64: incb +; X32: lock +; X32: incb + %t2 = atomicrmw add i8* @sc8, i8 3 acquire +; X64: lock +; X64: addb $3 +; X32: lock +; X32: addb $3 + %t3 = atomicrmw add i8* @sc8, i8 5 acquire +; X64: lock +; X64: xaddb +; X32: lock +; X32: xaddb + %t4 = atomicrmw add i8* @sc8, i8 %t3 acquire +; X64: lock +; X64: addb +; X32: lock +; X32: addb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_sub8() nounwind { +; X64: atomic_fetch_sub8 +; X32: atomic_fetch_sub8 + %t1 = atomicrmw sub i8* @sc8, i8 1 acquire +; X64: lock +; X64: decb +; X32: lock +; X32: decb + %t2 = atomicrmw sub i8* @sc8, i8 3 acquire +; X64: lock +; X64: subb $3 +; X32: lock +; X32: subb $3 + %t3 = atomicrmw sub i8* @sc8, i8 5 acquire +; X64: lock +; X64: xaddb +; X32: lock +; X32: xaddb + %t4 = atomicrmw sub i8* @sc8, i8 %t3 acquire +; X64: lock +; X64: subb +; X32: lock +; X32: subb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_and8() nounwind { +; X64: atomic_fetch_and8 +; X32: atomic_fetch_and8 + %t1 = atomicrmw and i8* @sc8, i8 3 acquire +; X64: lock +; X64: andb $3 +; X32: lock +; X32: andb $3 + %t2 = atomicrmw and i8* @sc8, i8 5 acquire +; X64: andb +; X64: lock +; X64: cmpxchgb +; X32: andb +; X32: lock +; X32: cmpxchgb + %t3 = atomicrmw and i8* @sc8, i8 %t2 acquire +; X64: lock +; X64: andb +; X32: lock +; X32: andb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_or8() nounwind { +; X64: atomic_fetch_or8 +; X32: atomic_fetch_or8 + %t1 = atomicrmw or i8* @sc8, i8 3 acquire +; X64: lock +; X64: orb $3 +; X32: lock +; X32: orb $3 + %t2 = atomicrmw or i8* @sc8, i8 5 acquire +; X64: orb +; X64: lock +; X64: cmpxchgb +; X32: orb +; X32: lock +; X32: cmpxchgb + %t3 = atomicrmw or i8* @sc8, i8 %t2 acquire +; X64: lock +; X64: orb +; X32: lock +; X32: orb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_xor8() nounwind { +; X64: atomic_fetch_xor8 +; X32: atomic_fetch_xor8 + %t1 = atomicrmw xor i8* @sc8, i8 3 acquire +; X64: lock +; X64: xorb $3 +; X32: lock +; X32: xorb $3 + %t2 = atomicrmw xor i8* @sc8, i8 5 acquire +; X64: xorb +; X64: lock +; X64: cmpxchgb +; X32: xorb +; X32: lock +; X32: cmpxchgb + %t3 = atomicrmw xor i8* @sc8, i8 %t2 acquire +; X64: lock +; X64: xorb +; X32: lock +; X32: xorb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_nand8(i8 %x) nounwind { +; X64: atomic_fetch_nand8 +; X32: atomic_fetch_nand8 + %t1 = atomicrmw nand i8* @sc8, i8 %x acquire +; X64: andb +; X64: notb +; X64: lock +; X64: cmpxchgb +; X32: andb +; X32: notb +; X32: lock +; X32: cmpxchgb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_max8(i8 %x) nounwind { + %t1 = atomicrmw max i8* @sc8, i8 %x acquire +; X64: cmpb +; X64: cmov +; X64: lock +; X64: cmpxchgb + +; X32: cmpb +; X32: cmov +; X32: lock +; X32: cmpxchgb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_min8(i8 %x) nounwind { + %t1 = atomicrmw min i8* @sc8, i8 %x acquire +; X64: cmpb +; X64: cmov +; X64: lock +; X64: cmpxchgb + +; X32: cmpb +; X32: cmov +; X32: lock +; X32: cmpxchgb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umax8(i8 %x) nounwind { + %t1 = atomicrmw umax i8* @sc8, i8 %x acquire +; X64: cmpb +; X64: cmov +; X64: lock +; X64: cmpxchgb + +; X32: cmpb +; X32: cmov +; X32: lock +; X32: cmpxchgb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_umin8(i8 %x) nounwind { + %t1 = atomicrmw umin i8* @sc8, i8 %x acquire +; X64: cmpb +; X64: cmov +; X64: lock +; X64: cmpxchgb +; X32: cmpb +; X32: cmov +; X32: lock +; X32: cmpxchgb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_cmpxchg8() nounwind { + %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire +; X64: lock +; X64: cmpxchgb +; X32: lock +; X32: cmpxchgb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_store8(i8 %x) nounwind { + store atomic i8 %x, i8* @sc8 release, align 4 +; X64-NOT: lock +; X64: movb +; X32-NOT: lock +; X32: movb + ret void +; X64: ret +; X32: ret +} + +define void @atomic_fetch_swap8(i8 %x) nounwind { + %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire +; X64-NOT: lock +; X64: xchgb +; X32-NOT: lock +; X32: xchgb + ret void +; X64: ret +; X32: ret +} diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll index 152bece424..c5fa07d07d 100644 --- a/test/CodeGen/X86/atomic_op.ll +++ b/test/CodeGen/X86/atomic_op.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -107,13 +107,12 @@ entry: ; CHECK: cmpxchgl %17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic store i32 %17, i32* %old + ; CHECK: movl [[R17atomic:.*]], %eax ; CHECK: movl $1401, %[[R17mask:[a-z]*]] - ; CHECK: movl [[R17atomic:.*]], %eax - ; CHECK: movl %eax, %[[R17newval:[a-z]*]] - ; CHECK: andl %[[R17mask]], %[[R17newval]] - ; CHECK: notl %[[R17newval]] + ; CHECK: andl %eax, %[[R17mask]] + ; CHECK: notl %[[R17mask]] ; CHECK: lock - ; CHECK: cmpxchgl %[[R17newval]], [[R17atomic]] + ; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]] ; CHECK: jne ; CHECK: movl %eax, %18 = atomicrmw nand i32* %val2, i32 1401 monotonic diff --git a/test/CodeGen/X86/pr13458.ll b/test/CodeGen/X86/pr13458.ll new file mode 100644 index 0000000000..55548b3c3b --- /dev/null +++ b/test/CodeGen/X86/pr13458.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin11.4.2" + +%v8_uniform_Stats.0.2.4.10 = type { i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, [7 x i32], [7 x i64] } + +@globalStats = external global %v8_uniform_Stats.0.2.4.10 + +define void @MergeStats() nounwind { +allocas: + %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst + ret void +} |