summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp991
-rw-r--r--lib/Target/X86/X86ISelLowering.h41
-rw-r--r--lib/Target/X86/X86InstrCompiler.td9
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--lib/Target/X86/X86InstrInfo.h3
-rw-r--r--test/CodeGen/X86/2010-01-08-Atomic64Bug.ll13
-rw-r--r--test/CodeGen/X86/atomic16.ll250
-rw-r--r--test/CodeGen/X86/atomic32.ll250
-rw-r--r--test/CodeGen/X86/atomic64.ll216
-rw-r--r--test/CodeGen/X86/atomic6432.ll209
-rw-r--r--test/CodeGen/X86/atomic8.ll251
-rw-r--r--test/CodeGen/X86/atomic_op.ll11
-rw-r--r--test/CodeGen/X86/pr13458.ll14
13 files changed, 1709 insertions, 553 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index baa83c6511..6130603bce 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11911,385 +11911,498 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
//===----------------------------------------------------------------------===//
// private utility function
+
+// Get CMPXCHG opcode for the specified data type.
+static unsigned getCmpXChgOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::LCMPXCHG8;
+ case MVT::i16: return X86::LCMPXCHG16;
+ case MVT::i32: return X86::LCMPXCHG32;
+ case MVT::i64: return X86::LCMPXCHG64;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
+
+// Get LOAD opcode for the specified data type.
+static unsigned getLoadOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::MOV8rm;
+ case MVT::i16: return X86::MOV16rm;
+ case MVT::i32: return X86::MOV32rm;
+ case MVT::i64: return X86::MOV64rm;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction.
+static unsigned getNonAtomicOpcode(unsigned Opc) {
+ switch (Opc) {
+ case X86::ATOMAND8: return X86::AND8rr;
+ case X86::ATOMAND16: return X86::AND16rr;
+ case X86::ATOMAND32: return X86::AND32rr;
+ case X86::ATOMAND64: return X86::AND64rr;
+ case X86::ATOMOR8: return X86::OR8rr;
+ case X86::ATOMOR16: return X86::OR16rr;
+ case X86::ATOMOR32: return X86::OR32rr;
+ case X86::ATOMOR64: return X86::OR64rr;
+ case X86::ATOMXOR8: return X86::XOR8rr;
+ case X86::ATOMXOR16: return X86::XOR16rr;
+ case X86::ATOMXOR32: return X86::XOR32rr;
+ case X86::ATOMXOR64: return X86::XOR64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction with
+// extra opcode.
+static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
+ case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
+ case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
+ case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
+ case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
+ case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
+ case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
+ case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
+ case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
+ case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
+ case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
+ case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target.
+static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
+ switch (Opc) {
+ case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
+ case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
+ case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
+ case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
+ case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
+ case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target with extra opcode.
+static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
+ unsigned &HiOpc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND6432:
+ ExtraOpc = X86::NOT32r;
+ HiOpc = X86::AND32rr;
+ return X86::AND32rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get pseudo CMOV opcode from the specified data type.
+static unsigned getPseudoCMOVOpc(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i16: return X86::CMOV_GR16;
+ case MVT::i32: return X86::CMOV_GR32;
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown CMOV opcode!");
+}
+
+// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
+// They will be translated into a spin-loop or compare-exchange loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// EAX = LOAD MI.addr
+// loop:
+// t1 = OP MI.val, EAX
+// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+// JNE loop
+// sink:
+// dst = EAX
+// ...
MachineBasicBlock *
-X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
- MachineBasicBlock *MBB,
- unsigned regOpc,
- unsigned immOpc,
- unsigned LoadOpc,
- unsigned CXchgOpc,
- unsigned notOpc,
- unsigned EAXreg,
- const TargetRegisterClass *RC,
- bool Invert) const {
- // For the atomic bitwise operator, we generate
- // thisMBB:
- // newMBB:
- // ld t1 = [bitinstr.addr]
- // op t2 = t1, [bitinstr.val]
- // not t3 = t2 (if Invert)
- // mov EAX = t1
- // lcs dest = [bitinstr.addr], t3 [EAX is implicit]
- // bz newMBB
- // fallthrough -->nextMBB
+X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op to have one memoperand");
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg, SrcReg;
+ unsigned MemOpndSlot;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcReg = MI->getOperand(CurOp++).getReg();
+
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ EVT VT = *RC->vt_begin();
+ unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
+
+ unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
+ unsigned LOADOpc = getLoadOpcode(VT);
+
+ // For the atomic load-arith operator, we generate
+ //
+ // thisMBB:
+ // EAX = LOAD [MI.addr]
+ // mainMBB:
+ // t1 = OP MI.val, EAX
+ // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+ // JNE mainMBB
+ // sinkMBB:
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(bInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to itself and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- // Insert instructions into newMBB based on incoming instruction
- assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
- "unexpected number of operands");
- DebugLoc dl = bInstr->getDebugLoc();
- MachineOperand& destOper = bInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- int numArgs = bInstr->getNumOperands() - 1;
- for (int i=0; i < numArgs; ++i)
- argOpers[i] = &bInstr->getOperand(i+1);
-
- // x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
- int valArgIndx = lastAddrIndx + 1;
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
-
- unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
- MIB.addReg(t1);
- (*MIB).addOperand(*argOpers[valArgIndx]);
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
- unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
- if (Invert) {
- MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
+ // thisMBB:
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+ mainMBB->addLiveIn(AccPhyReg);
+
+ // Copy AccPhyReg as it is used more than once.
+ unsigned AccReg = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
+ .addReg(AccPhyReg);
+
+ unsigned t1 = MRI.createVirtualRegister(RC);
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+ case X86::ATOMAND8:
+ case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ case X86::ATOMOR8:
+ case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ case X86::ATOMXOR8:
+ case X86::ATOMXOR16:
+ case X86::ATOMXOR32:
+ case X86::ATOMXOR64: {
+ unsigned ARITHOpc = getNonAtomicOpcode(Opc);
+ BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
+ .addReg(AccReg);
+ break;
+ }
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64: {
+ unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned NOTOpc;
+ unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
+ BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
+ .addReg(AccReg);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+ break;
+ }
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64: {
+ unsigned CMPOpc;
+ unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
+
+ BuildMI(mainMBB, DL, TII->get(CMPOpc))
+ .addReg(SrcReg)
+ .addReg(AccReg);
+
+ if (Subtarget->hasCMov()) {
+ // Native support
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+ .addReg(SrcReg)
+ .addReg(AccReg);
+ } else {
+ // Use pseudo select and lower them.
+ assert((VT == MVT::i16 || VT == MVT::i32) &&
+ "Invalid atomic-load-op transformation!");
+ unsigned SelOpc = getPseudoCMOVOpc(VT);
+ X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
+ assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
+ MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
+ .addReg(SrcReg).addReg(AccReg)
+ .addImm(CC);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
}
- else
- t3 = t2;
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
+ // Copy AccPhyReg back from virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
+ .addReg(AccReg);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
MIB.addReg(t1);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
- MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t3);
- assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(bInstr->memoperands_begin(),
- bInstr->memoperands_end());
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
- MIB.addReg(EAXreg);
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ // sinkMBB:
+ sinkMBB->addLiveIn(AccPhyReg);
- bInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(AccPhyReg);
+
+ MI->eraseFromParent();
+ return sinkMBB;
}
-// private utility function: 64 bit atomics on 32 bit host.
+// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
+// instructions. They will be translated into a spin-loop or compare-exchange
+// loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// EAX = LOAD [MI.addr + 0]
+// EDX = LOAD [MI.addr + 4]
+// loop:
+// EBX = OP MI.val.lo, EAX
+// ECX = OP MI.val.hi, EDX
+// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+// JNE loop
+// sink:
+// dst = EDX:EAX
+// ...
MachineBasicBlock *
-X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
- MachineBasicBlock *MBB,
- unsigned regOpcL,
- unsigned regOpcH,
- unsigned immOpcL,
- unsigned immOpcH,
- bool Invert) const {
- // For the atomic bitwise operator, we generate
- // thisMBB (instructions are in pairs, except cmpxchg8b)
- // ld t1,t2 = [bitinstr.addr]
- // newMBB:
- // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
- // op t5, t6 <- out1, out2, [bitinstr.val]
- // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
- // neg t7, t8 < t5, t6 (if Invert)
- // mov ECX, EBX <- t5, t6
- // mov EAX, EDX <- t1, t2
- // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
- // mov t3, t4 <- EAX, EDX
- // bz newMBB
- // result in out1, out2
- // fallthrough -->nextMBB
-
- const TargetRegisterClass *RC = &X86::GR32RegClass;
- const unsigned LoadOpc = X86::MOV32rm;
- const unsigned NotOpc = X86::NOT32r;
+X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ DebugLoc DL = MI->getDebugLoc();
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(bInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to itself and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- DebugLoc dl = bInstr->getDebugLoc();
- // Insert instructions into newMBB based on incoming instruction
- // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
- assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
- "unexpected number of operands");
- MachineOperand& dest1Oper = bInstr->getOperand(0);
- MachineOperand& dest2Oper = bInstr->getOperand(1);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
- argOpers[i] = &bInstr->getOperand(i+2);
-
- // We use some of the operands multiple times, so conservatively just
- // clear any kill flags that might be present.
- if (argOpers[i]->isReg() && argOpers[i]->isUse())
- argOpers[i]->setIsKill(false);
- }
-
- // x86 address has 5 operands: base, index, scale, displacement, and segment.
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
- // add 4 to displacement.
- for (int i=0; i <= lastAddrIndx-2; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MachineOperand newOp3 = *(argOpers[3]);
- if (newOp3.isImm())
- newOp3.setImm(newOp3.getImm()+4);
- else
- newOp3.setOffset(newOp3.getOffset()+4);
- (*MIB).addOperand(newOp3);
- (*MIB).addOperand(*argOpers[lastAddrIndx]);
-
- // t3/4 are defined later, at the bottom of the loop
- unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
- unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
- BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
- BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
- .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
-
- // The subsequent operations should be using the destination registers of
- // the PHI instructions.
- t1 = dest1Oper.getReg();
- t2 = dest2Oper.getReg();
-
- int valArgIndx = lastAddrIndx + 1;
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
- unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
- unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
- if (regOpcL != X86::MOV32rr)
- MIB.addReg(t1);
- (*MIB).addOperand(*argOpers[valArgIndx]);
- assert(argOpers[valArgIndx + 1]->isReg() ==
- argOpers[valArgIndx]->isReg());
- assert(argOpers[valArgIndx + 1]->isImm() ==
- argOpers[valArgIndx]->isImm());
- if (argOpers[valArgIndx + 1]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
- if (regOpcH != X86::MOV32rr)
- MIB.addReg(t2);
- (*MIB).addOperand(*argOpers[valArgIndx + 1]);
-
- unsigned t7, t8;
- if (Invert) {
- t7 = F->getRegInfo().createVirtualRegister(RC);
- t8 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
- } else {
- t7 = t5;
- t8 = t6;
- }
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
- MIB.addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
- MIB.addReg(t2);
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
- MIB.addReg(t7);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
- MIB.addReg(t8);
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ "Unexpected number of operands");
- MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op32 to have one memoperand");
- assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(bInstr->memoperands_begin(),
- bInstr->memoperands_end());
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
- MIB.addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
- MIB.addReg(X86::EDX);
+ unsigned DstLoReg, DstHiReg;
+ unsigned SrcLoReg, SrcHiReg;
+ unsigned MemOpndSlot;
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ unsigned CurOp = 0;
- bInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
-}
+ DstLoReg = MI->getOperand(CurOp++).getReg();
+ DstHiReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcLoReg = MI->getOperand(CurOp++).getReg();
+ SrcHiReg = MI->getOperand(CurOp++).getReg();
-// private utility function
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
- MachineBasicBlock *MBB,
- unsigned cmovOpc) const {
- // For the atomic min/max operator, we generate
- // thisMBB:
- // newMBB:
- // ld t1 = [min/max.addr]
- // mov t2 = [min/max.val]
- // cmp t1, t2
- // cmov[cond] t2 = t1
- // mov EAX = t1
- // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
- // bz newMBB
- // fallthrough -->nextMBB
+ const TargetRegisterClass *RC = &X86::GR32RegClass;
+
+ unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
+ unsigned LOADOpc = X86::MOV32rm;
+
+ // For the atomic load-arith operator, we generate
//
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ // thisMBB:
+ // EAX = LOAD [MI.addr + 0]
+ // EDX = LOAD [MI.addr + 4]
+ // mainMBB:
+ // EBX = OP MI.vallo, EAX
+ // ECX = OP MI.valhi, EDX
+ // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+ // JNE mainMBB
+ // sinkMBB:
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(mInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to newMBB and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- DebugLoc dl = mInstr->getDebugLoc();
- // Insert instructions into newMBB based on incoming instruction
- assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
- "unexpected number of operands");
- MachineOperand& destOper = mInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- int numArgs = mInstr->getNumOperands() - 1;
- for (int i=0; i < numArgs; ++i)
- argOpers[i] = &mInstr->getOperand(i+1);
-
- // x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
- int valArgIndx = lastAddrIndx + 1;
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
-
- // We only support register and immediate values
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
-
- unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
- else
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
- (*MIB).addOperand(*argOpers[valArgIndx]);
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
- MIB.addReg(t1);
+ MachineInstrBuilder MIB;
- MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
- MIB.addReg(t1);
- MIB.addReg(t2);
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ // Lo
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Hi
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
+ else
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
- // Generate movc
- unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
- MIB.addReg(t2);
- MIB.addReg(t1);
+ thisMBB->addSuccessor(mainMBB);
- // Cmp and exchange if none has modified the memory location
- MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t3);
- assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(mInstr->memoperands_begin(),
- mInstr->memoperands_end());
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+ mainMBB->addLiveIn(X86::EAX);
+ mainMBB->addLiveIn(X86::EDX);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
- MIB.addReg(X86::EAX);
+ // Copy EDX:EAX as they are used more than once.
+ unsigned LoReg = MRI.createVirtualRegister(RC);
+ unsigned HiReg = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ unsigned t1L = MRI.createVirtualRegister(RC);
+ unsigned t1H = MRI.createVirtualRegister(RC);
- mInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
+ case X86::ATOMAND6432:
+ case X86::ATOMOR6432:
+ case X86::ATOMXOR6432:
+ case X86::ATOMADD6432:
+ case X86::ATOMSUB6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg);
+ break;
+ }
+ case X86::ATOMNAND6432: {
+ unsigned HiOpc, NOTOpc;
+ unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
+ unsigned t2L = MRI.createVirtualRegister(RC);
+ unsigned t2H = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+ break;
+ }
+ case X86::ATOMSWAP6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+ break;
+ }
+ }
+
+ // Copy EDX:EAX back from HiReg:LoReg
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+ // Copy ECX:EBX from t1H:t1L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
+
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ sinkMBB->addLiveIn(X86::EAX);
+ sinkMBB->addLiveIn(X86::EDX);
+
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstLoReg)
+ .addReg(X86::EAX);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstHiReg)
+ .addReg(X86::EDX);
+
+ MI->eraseFromParent();
+ return sinkMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
@@ -13176,130 +13289,42 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitMonitor(MI, BB);
// Atomic Lowering.
- case X86::ATOMMIN32:
- case X86::ATOMMAX32:
- case X86::ATOMUMIN32:
- case X86::ATOMUMAX32:
- case X86::ATOMMIN16:
- case X86::ATOMMAX16:
- case X86::ATOMUMIN16:
- case X86::ATOMUMAX16:
- case X86::ATOMMIN64:
- case X86::ATOMMAX64:
- case X86::ATOMUMIN64:
- case X86::ATOMUMAX64: {
- unsigned Opc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
- case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
- case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
- case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
- case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
- case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
- case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
- case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
- case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
- case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
- case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
- case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
- // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
- }
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
- }
-
- case X86::ATOMAND32:
- case X86::ATOMOR32:
- case X86::ATOMXOR32:
- case X86::ATOMNAND32: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND32:
- RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
- case X86::ATOMOR32:
- RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
- case X86::ATOMXOR32:
- RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
- case X86::ATOMNAND32:
- RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV32rm, X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass, Invert);
- }
-
+ case X86::ATOMAND8:
case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ // Fall through
+ case X86::ATOMOR8:
case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ // Fall through
case X86::ATOMXOR16:
- case X86::ATOMNAND16: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND16:
- RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
- case X86::ATOMOR16:
- RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
- case X86::ATOMXOR16:
- RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
- case X86::ATOMNAND16:
- RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV16rm, X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass, Invert);
- }
-
- case X86::ATOMAND8:
- case X86::ATOMOR8:
case X86::ATOMXOR8:
- case X86::ATOMNAND8: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND8:
- RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
- case X86::ATOMOR8:
- RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
- case X86::ATOMXOR8:
- RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
- case X86::ATOMNAND8:
- RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV8rm, X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass, Invert);
- }
-
- // This group is for 64-bit host.
- case X86::ATOMAND64:
- case X86::ATOMOR64:
+ case X86::ATOMXOR32:
case X86::ATOMXOR64:
- case X86::ATOMNAND64: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND64:
- RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
- case X86::ATOMOR64:
- RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
- case X86::ATOMXOR64:
- RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
- case X86::ATOMNAND64:
- RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV64rm, X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass, Invert);
- }
+ // Fall through
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64:
+ // Fall through
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ // Fall through
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ // Fall through
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ // Fall through
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64:
+ return EmitAtomicLoadArith(MI, BB);
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
@@ -13308,44 +13333,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMNAND6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432:
- case X86::ATOMSWAP6432: {
- bool Invert = false;
- unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND6432:
- RegOpcL = RegOpcH = X86::AND32rr;
- ImmOpcL = ImmOpcH = X86::AND32ri;
- break;
- case X86::ATOMOR6432:
- RegOpcL = RegOpcH = X86::OR32rr;
- ImmOpcL = ImmOpcH = X86::OR32ri;
- break;
- case X86::ATOMXOR6432:
- RegOpcL = RegOpcH = X86::XOR32rr;
- ImmOpcL = ImmOpcH = X86::XOR32ri;
- break;
- case X86::ATOMNAND6432:
- RegOpcL = RegOpcH = X86::AND32rr;
- ImmOpcL = ImmOpcH = X86::AND32ri;
- Invert = true;
- break;
- case X86::ATOMADD6432:
- RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
- ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
- break;
- case X86::ATOMSUB6432:
- RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
- ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
- break;
- case X86::ATOMSWAP6432:
- RegOpcL = RegOpcH = X86::MOV32rr;
- ImmOpcL = ImmOpcH = X86::MOV32ri;
- break;
- }
- return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
- ImmOpcL, ImmOpcH, Invert);
- }
+ case X86::ATOMSWAP6432:
+ return EmitAtomicLoadArith6432(MI, BB);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9c73777022..653654f28a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -861,36 +861,17 @@ namespace llvm {
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
- /// Utility function to emit atomic bitwise operations (and, or, xor).
- /// It takes the bitwise instruction to expand, the associated machine basic
- /// block, and the associated X86 opcodes for reg/reg and reg/imm.
- MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
- MachineInstr *BInstr,
- MachineBasicBlock *BB,
- unsigned regOpc,
- unsigned immOpc,
- unsigned loadOpc,
- unsigned cxchgOpc,
- unsigned notOpc,
- unsigned EAXreg,
- const TargetRegisterClass *RC,
- bool Invert = false) const;
-
- MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
- MachineInstr *BInstr,
- MachineBasicBlock *BB,
- unsigned regOpcL,
- unsigned regOpcH,
- unsigned immOpcL,
- unsigned immOpcH,
- bool Invert = false) const;
-
- /// Utility function to emit atomic min and max. It takes the min/max
- /// instruction to expand, the associated basic block, and the associated
- /// cmov opcode for moving the min or max value.
- MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
- MachineBasicBlock *BB,
- unsigned cmovOpc) const;
+ /// Utility function to emit atomic-load-arith operations (and, or, xor,
+ /// nand, max, min, umax, umin). It takes the corresponding instruction to
+ /// expand, the associated machine basic block, and the associated X86
+ /// opcodes for reg/reg.
+ MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// Utility function to emit atomic-load-arith operations (and, or, xor,
+ /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
+ MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
// Utility function to emit the low-level va_arg code for X86-64.
MachineBasicBlock *EmitVAARG64WithCustomInserter(
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 75ceeb9b18..9131d30bd0 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -483,8 +483,7 @@ def CMOV_RFP80 : I<0, Pseudo,
//===----------------------------------------------------------------------===//
// Atomic exchange, and, or, xor
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomInserter = 1 in {
+let usesCustomInserter = 1 in {
def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
"#ATOMAND8 PSEUDO!",
@@ -578,11 +577,7 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
[(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
}
-let Constraints = "$val1 = $dst1, $val2 = $dst2",
- Defs = [EFLAGS, EAX, EBX, ECX, EDX],
- Uses = [EAX, EBX, ECX, EDX],
- mayLoad = 1, mayStore = 1,
- usesCustomInserter = 1 in {
+let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in {
def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
(ins i64mem:$ptr, GR32:$val1, GR32:$val2),
"#ATOMAND6432 PSEUDO!", []>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4a9be39fa5..820ac06dc1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2266,7 +2266,7 @@ static X86::CondCode getCondFromSETOpc(unsigned Opc) {
}
/// getCondFromCmovOpc - return condition code of a CMov opcode.
-static X86::CondCode getCondFromCMovOpc(unsigned Opc) {
+X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
switch (Opc) {
default: return X86::COND_INVALID;
case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
@@ -3314,7 +3314,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
if (OldCC != X86::COND_INVALID)
OpcIsSET = true;
else
- OldCC = getCondFromCMovOpc(Instr.getOpcode());
+ OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
}
if (OldCC == X86::COND_INVALID) return false;
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index b6f69af037..260f054d69 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -61,6 +61,9 @@ namespace X86 {
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
+ // Turn CMov opcode into condition code.
+ CondCode getCondFromCMovOpc(unsigned Opc);
+
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
index 8b55bd79aa..e969b13302 100644
--- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -7,17 +7,16 @@
define void @t(i64* nocapture %p) nounwind ssp {
entry:
; CHECK: t:
-; CHECK: movl $1
-; CHECK: movl (%ebp), %eax
-; CHECK: movl 4(%ebp), %edx
+; CHECK: movl ([[REG:%[a-z]+]]), %eax
+; CHECK: movl 4([[REG]]), %edx
; CHECK: LBB0_1:
-; CHECK-NOT: movl $1
-; CHECK-NOT: movl $0
+; CHECK: movl $1
; CHECK: addl
+; CHECK: movl $0
; CHECK: adcl
; CHECK: lock
-; CHECK: cmpxchg8b
-; CHECK: jne
+; CHECK-NEXT: cmpxchg8b ([[REG]])
+; CHECK-NEXT: jne
%0 = atomicrmw add i64* %p, i64 1 seq_cst
ret void
}
diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll
new file mode 100644
index 0000000000..e276d47e34
--- /dev/null
+++ b/test/CodeGen/X86/atomic16.ll
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+
+@sc16 = external global i16
+
+define void @atomic_fetch_add16() nounwind {
+; X64: atomic_fetch_add16
+; X32: atomic_fetch_add16
+entry:
+; 32-bit
+ %t1 = atomicrmw add i16* @sc16, i16 1 acquire
+; X64: lock
+; X64: incw
+; X32: lock
+; X32: incw
+ %t2 = atomicrmw add i16* @sc16, i16 3 acquire
+; X64: lock
+; X64: addw $3
+; X32: lock
+; X32: addw $3
+ %t3 = atomicrmw add i16* @sc16, i16 5 acquire
+; X64: lock
+; X64: xaddw
+; X32: lock
+; X32: xaddw
+ %t4 = atomicrmw add i16* @sc16, i16 %t3 acquire
+; X64: lock
+; X64: addw
+; X32: lock
+; X32: addw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_sub16() nounwind {
+; X64: atomic_fetch_sub16
+; X32: atomic_fetch_sub16
+ %t1 = atomicrmw sub i16* @sc16, i16 1 acquire
+; X64: lock
+; X64: decw
+; X32: lock
+; X32: decw
+ %t2 = atomicrmw sub i16* @sc16, i16 3 acquire
+; X64: lock
+; X64: subw $3
+; X32: lock
+; X32: subw $3
+ %t3 = atomicrmw sub i16* @sc16, i16 5 acquire
+; X64: lock
+; X64: xaddw
+; X32: lock
+; X32: xaddw
+ %t4 = atomicrmw sub i16* @sc16, i16 %t3 acquire
+; X64: lock
+; X64: subw
+; X32: lock
+; X32: subw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_and16() nounwind {
+; X64: atomic_fetch_and16
+; X32: atomic_fetch_and16
+ %t1 = atomicrmw and i16* @sc16, i16 3 acquire
+; X64: lock
+; X64: andw $3
+; X32: lock
+; X32: andw $3
+ %t2 = atomicrmw and i16* @sc16, i16 5 acquire
+; X64: andw
+; X64: lock
+; X64: cmpxchgw
+; X32: andw
+; X32: lock
+; X32: cmpxchgw
+ %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
+; X64: lock
+; X64: andw
+; X32: lock
+; X32: andw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_or16() nounwind {
+; X64: atomic_fetch_or16
+; X32: atomic_fetch_or16
+ %t1 = atomicrmw or i16* @sc16, i16 3 acquire
+; X64: lock
+; X64: orw $3
+; X32: lock
+; X32: orw $3
+ %t2 = atomicrmw or i16* @sc16, i16 5 acquire
+; X64: orw
+; X64: lock
+; X64: cmpxchgw
+; X32: orw
+; X32: lock
+; X32: cmpxchgw
+ %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
+; X64: lock
+; X64: orw
+; X32: lock
+; X32: orw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_xor16() nounwind {
+; X64: atomic_fetch_xor16
+; X32: atomic_fetch_xor16
+ %t1 = atomicrmw xor i16* @sc16, i16 3 acquire
+; X64: lock
+; X64: xorw $3
+; X32: lock
+; X32: xorw $3
+ %t2 = atomicrmw xor i16* @sc16, i16 5 acquire
+; X64: xorw
+; X64: lock
+; X64: cmpxchgw
+; X32: xorw
+; X32: lock
+; X32: cmpxchgw
+ %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
+; X64: lock
+; X64: xorw
+; X32: lock
+; X32: xorw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_nand16(i16 %x) nounwind {
+; X64: atomic_fetch_nand16
+; X32: atomic_fetch_nand16
+ %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
+; X64: andw
+; X64: notw
+; X64: lock
+; X64: cmpxchgw
+; X32: andw
+; X32: notw
+; X32: lock
+; X32: cmpxchgw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_max16(i16 %x) nounwind {
+ %t1 = atomicrmw max i16* @sc16, i16 %x acquire
+; X64: cmpw
+; X64: cmov
+; X64: lock
+; X64: cmpxchgw
+
+; X32: cmpw
+; X32: cmov
+; X32: lock
+; X32: cmpxchgw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_min16(i16 %x) nounwind {
+ %t1 = atomicrmw min i16* @sc16, i16 %x acquire
+; X64: cmpw
+; X64: cmov
+; X64: lock
+; X64: cmpxchgw
+
+; X32: cmpw
+; X32: cmov
+; X32: lock
+; X32: cmpxchgw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umax16(i16 %x) nounwind {
+ %t1 = atomicrmw umax i16* @sc16, i16 %x acquire
+; X64: cmpw
+; X64: cmov
+; X64: lock
+; X64: cmpxchgw
+
+; X32: cmpw
+; X32: cmov
+; X32: lock
+; X32: cmpxchgw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umin16(i16 %x) nounwind {
+ %t1 = atomicrmw umin i16* @sc16, i16 %x acquire
+; X64: cmpw
+; X64: cmov
+; X64: lock
+; X64: cmpxchgw
+; X32: cmpw
+; X32: cmov
+; X32: lock
+; X32: cmpxchgw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_cmpxchg16() nounwind {
+ %t1 = cmpxchg i16* @sc16, i16 0, i16 1 acquire
+; X64: lock
+; X64: cmpxchgw
+; X32: lock
+; X32: cmpxchgw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_store16(i16 %x) nounwind {
+ store atomic i16 %x, i16* @sc16 release, align 4
+; X64-NOT: lock
+; X64: movw
+; X32-NOT: lock
+; X32: movw
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_swap16(i16 %x) nounwind {
+ %t1 = atomicrmw xchg i16* @sc16, i16 %x acquire
+; X64-NOT: lock
+; X64: xchgw
+; X32-NOT: lock
+; X32: xchgw
+ ret void
+; X64: ret
+; X32: ret
+}
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
new file mode 100644
index 0000000000..dc927d8cb6
--- /dev/null
+++ b/test/CodeGen/X86/atomic32.ll
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+
+@sc32 = external global i32
+
+define void @atomic_fetch_add32() nounwind {
+; X64: atomic_fetch_add32
+; X32: atomic_fetch_add32
+entry:
+; 32-bit
+ %t1 = atomicrmw add i32* @sc32, i32 1 acquire
+; X64: lock
+; X64: incl
+; X32: lock
+; X32: incl
+ %t2 = atomicrmw add i32* @sc32, i32 3 acquire
+; X64: lock
+; X64: addl $3
+; X32: lock
+; X32: addl $3
+ %t3 = atomicrmw add i32* @sc32, i32 5 acquire
+; X64: lock
+; X64: xaddl
+; X32: lock
+; X32: xaddl
+ %t4 = atomicrmw add i32* @sc32, i32 %t3 acquire
+; X64: lock
+; X64: addl
+; X32: lock
+; X32: addl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_sub32() nounwind {
+; X64: atomic_fetch_sub32
+; X32: atomic_fetch_sub32
+ %t1 = atomicrmw sub i32* @sc32, i32 1 acquire
+; X64: lock
+; X64: decl
+; X32: lock
+; X32: decl
+ %t2 = atomicrmw sub i32* @sc32, i32 3 acquire
+; X64: lock
+; X64: subl $3
+; X32: lock
+; X32: subl $3
+ %t3 = atomicrmw sub i32* @sc32, i32 5 acquire
+; X64: lock
+; X64: xaddl
+; X32: lock
+; X32: xaddl
+ %t4 = atomicrmw sub i32* @sc32, i32 %t3 acquire
+; X64: lock
+; X64: subl
+; X32: lock
+; X32: subl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_and32() nounwind {
+; X64: atomic_fetch_and32
+; X32: atomic_fetch_and32
+ %t1 = atomicrmw and i32* @sc32, i32 3 acquire
+; X64: lock
+; X64: andl $3
+; X32: lock
+; X32: andl $3
+ %t2 = atomicrmw and i32* @sc32, i32 5 acquire
+; X64: andl
+; X64: lock
+; X64: cmpxchgl
+; X32: andl
+; X32: lock
+; X32: cmpxchgl
+ %t3 = atomicrmw and i32* @sc32, i32 %t2 acquire
+; X64: lock
+; X64: andl
+; X32: lock
+; X32: andl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_or32() nounwind {
+; X64: atomic_fetch_or32
+; X32: atomic_fetch_or32
+ %t1 = atomicrmw or i32* @sc32, i32 3 acquire
+; X64: lock
+; X64: orl $3
+; X32: lock
+; X32: orl $3
+ %t2 = atomicrmw or i32* @sc32, i32 5 acquire
+; X64: orl
+; X64: lock
+; X64: cmpxchgl
+; X32: orl
+; X32: lock
+; X32: cmpxchgl
+ %t3 = atomicrmw or i32* @sc32, i32 %t2 acquire
+; X64: lock
+; X64: orl
+; X32: lock
+; X32: orl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_xor32() nounwind {
+; X64: atomic_fetch_xor32
+; X32: atomic_fetch_xor32
+ %t1 = atomicrmw xor i32* @sc32, i32 3 acquire
+; X64: lock
+; X64: xorl $3
+; X32: lock
+; X32: xorl $3
+ %t2 = atomicrmw xor i32* @sc32, i32 5 acquire
+; X64: xorl
+; X64: lock
+; X64: cmpxchgl
+; X32: xorl
+; X32: lock
+; X32: cmpxchgl
+ %t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire
+; X64: lock
+; X64: xorl
+; X32: lock
+; X32: xorl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_nand32(i32 %x) nounwind {
+; X64: atomic_fetch_nand32
+; X32: atomic_fetch_nand32
+ %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
+; X64: andl
+; X64: notl
+; X64: lock
+; X64: cmpxchgl
+; X32: andl
+; X32: notl
+; X32: lock
+; X32: cmpxchgl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_max32(i32 %x) nounwind {
+ %t1 = atomicrmw max i32* @sc32, i32 %x acquire
+; X64: cmpl
+; X64: cmov
+; X64: lock
+; X64: cmpxchgl
+
+; X32: cmpl
+; X32: cmov
+; X32: lock
+; X32: cmpxchgl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_min32(i32 %x) nounwind {
+ %t1 = atomicrmw min i32* @sc32, i32 %x acquire
+; X64: cmpl
+; X64: cmov
+; X64: lock
+; X64: cmpxchgl
+
+; X32: cmpl
+; X32: cmov
+; X32: lock
+; X32: cmpxchgl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umax32(i32 %x) nounwind {
+ %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
+; X64: cmpl
+; X64: cmov
+; X64: lock
+; X64: cmpxchgl
+
+; X32: cmpl
+; X32: cmov
+; X32: lock
+; X32: cmpxchgl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umin32(i32 %x) nounwind {
+ %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
+; X64: cmpl
+; X64: cmov
+; X64: lock
+; X64: cmpxchgl
+; X32: cmpl
+; X32: cmov
+; X32: lock
+; X32: cmpxchgl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_cmpxchg32() nounwind {
+ %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire
+; X64: lock
+; X64: cmpxchgl
+; X32: lock
+; X32: cmpxchgl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_store32(i32 %x) nounwind {
+ store atomic i32 %x, i32* @sc32 release, align 4
+; X64-NOT: lock
+; X64: movl
+; X32-NOT: lock
+; X32: movl
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_swap32(i32 %x) nounwind {
+ %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
+; X64-NOT: lock
+; X64: xchgl
+; X32-NOT: lock
+; X32: xchgl
+ ret void
+; X64: ret
+; X32: ret
+}
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
new file mode 100644
index 0000000000..45785cc8fe
--- /dev/null
+++ b/test/CodeGen/X86/atomic64.ll
@@ -0,0 +1,216 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+
+@sc64 = external global i64
+
+define void @atomic_fetch_add64() nounwind {
+; X64: atomic_fetch_add64
+entry:
+ %t1 = atomicrmw add i64* @sc64, i64 1 acquire
+; X64: lock
+; X64: incq
+ %t2 = atomicrmw add i64* @sc64, i64 3 acquire
+; X64: lock
+; X64: addq $3
+ %t3 = atomicrmw add i64* @sc64, i64 5 acquire
+; X64: lock
+; X64: xaddq
+ %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire
+; X64: lock
+; X64: addq
+ ret void
+; X64: ret
+}
+
+define void @atomic_fetch_sub64() nounwind {
+; X64: atomic_fetch_sub64
+ %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
+; X64: lock
+; X64: decq
+ %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
+; X64: lock
+; X64: subq $3
+ %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
+; X64: lock
+; X64: xaddq
+ %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
+; X64: lock
+; X64: subq
+ ret void
+; X64: ret
+}
+
+define void @atomic_fetch_and64() nounwind {
+; X64: atomic_fetch_and64
+ %t1 = atomicrmw and i64* @sc64, i64 3 acquire
+; X64: lock
+; X64: andq $3
+ %t2 = atomicrmw and i64* @sc64, i64 5 acquire
+; X64: andq
+; X64: lock
+; X64: cmpxchgq
+ %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
+; X64: lock
+; X64: andq
+ ret void
+; X64: ret
+}
+
+define void @atomic_fetch_or64() nounwind {
+; X64: atomic_fetch_or64
+ %t1 = atomicrmw or i64* @sc64, i64 3 acquire
+; X64: lock
+; X64: orq $3
+ %t2 = atomicrmw or i64* @sc64, i64 5 acquire
+; X64: orq
+; X64: lock
+; X64: cmpxchgq
+ %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
+; X64: lock
+; X64: orq
+ ret void
+; X64: ret
+}
+
+define void @atomic_fetch_xor64() nounwind {
+; X64: atomic_fetch_xor64
+ %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
+; X64: lock
+; X64: xorq $3
+ %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
+; X64: xorq
+; X64: lock
+; X64: cmpxchgq
+ %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
+; X64: lock
+; X64: xorq
+ ret void
+; X64: ret
+}
+
+define void @atomic_fetch_nand64(i64 %x) nounwind {
+; X64: atomic_fetch_nand64
+; X32: atomic_fetch_nand64
+ %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
+; X64: andq
+; X64: notq
+; X64: lock
+; X64: cmpxchgq
+; X32: andl
+; X32: andl
+; X32: notl
+; X32: notl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_max64(i64 %x) nounwind {
+ %t1 = atomicrmw max i64* @sc64, i64 %x acquire
+; X64: cmpq
+; X64: cmov
+; X64: lock
+; X64: cmpxchgq
+
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_min64(i64 %x) nounwind {
+ %t1 = atomicrmw min i64* @sc64, i64 %x acquire
+; X64: cmpq
+; X64: cmov
+; X64: lock
+; X64: cmpxchgq
+
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umax64(i64 %x) nounwind {
+ %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
+; X64: cmpq
+; X64: cmov
+; X64: lock
+; X64: cmpxchgq
+
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umin64(i64 %x) nounwind {
+ %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
+; X64: cmpq
+; X64: cmov
+; X64: lock
+; X64: cmpxchgq
+
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_cmpxchg64() nounwind {
+ %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
+; X64: lock
+; X64: cmpxchgq
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_store64(i64 %x) nounwind {
+ store atomic i64 %x, i64* @sc64 release, align 8
+; X64-NOT: lock
+; X64: movq
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_swap64(i64 %x) nounwind {
+ %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
+; X64-NOT: lock
+; X64: xchgq
+; X32: lock
+; X32: xchg8b
+ ret void
+; X64: ret
+; X32: ret
+}
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
new file mode 100644
index 0000000000..556c36ebfd
--- /dev/null
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -0,0 +1,209 @@
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; XFAIL: *
+
+@sc64 = external global i64
+
+define void @atomic_fetch_add64() nounwind {
+; X32: atomic_fetch_add64
+entry:
+ %t1 = atomicrmw add i64* @sc64, i64 1 acquire
+; X32: addl
+; X32: adcl
+; X32: lock
+; X32: cmpxchg8b
+ %t2 = atomicrmw add i64* @sc64, i64 3 acquire
+; X32: addl
+; X32: adcl
+; X32: lock
+; X32: cmpxchg8b
+ %t3 = atomicrmw add i64* @sc64, i64 5 acquire
+; X32: addl
+; X32: adcl
+; X32: lock
+; X32: cmpxchg8b
+ %t4 = atomicrmw add i64* @sc64, i64 %t3 acquire
+; X32: addl
+; X32: adcl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_sub64() nounwind {
+; X32: atomic_fetch_sub64
+ %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
+; X32: subl
+; X32: sbbl
+; X32: lock
+; X32: cmpxchg8b
+ %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
+; X32: subl
+; X32: sbbl
+; X32: lock
+; X32: cmpxchg8b
+ %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
+; X32: subl
+; X32: sbbl
+; X32: lock
+; X32: cmpxchg8b
+ %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
+; X32: subl
+; X32: sbbl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_and64() nounwind {
+; X32: atomic_fetch_and64
+ %t1 = atomicrmw and i64* @sc64, i64 3 acquire
+; X32: andl
+; X32: andl
+; X32: lock
+; X32: cmpxchg8b
+ %t2 = atomicrmw and i64* @sc64, i64 5 acquire
+; X32: andl
+; X32: andl
+; X32: lock
+; X32: cmpxchg8b
+ %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
+; X32: andl
+; X32: andl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_or64() nounwind {
+; X32: atomic_fetch_or64
+ %t1 = atomicrmw or i64* @sc64, i64 3 acquire
+; X32: orl
+; X32: orl
+; X32: lock
+; X32: cmpxchg8b
+ %t2 = atomicrmw or i64* @sc64, i64 5 acquire
+; X32: orl
+; X32: orl
+; X32: lock
+; X32: cmpxchg8b
+ %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
+; X32: orl
+; X32: orl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_xor64() nounwind {
+; X32: atomic_fetch_xor64
+ %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
+; X32: xorl
+; X32: xorl
+; X32: lock
+; X32: cmpxchg8b
+ %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
+; X32: xorl
+; X32: xorl
+; X32: lock
+; X32: cmpxchg8b
+ %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
+; X32: xorl
+; X32: xorl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_nand64(i64 %x) nounwind {
+; X32: atomic_fetch_nand64
+ %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
+; X32: andl
+; X32: andl
+; X32: notl
+; X32: notl
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_max64(i64 %x) nounwind {
+ %t1 = atomicrmw max i64* @sc64, i64 %x acquire
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_min64(i64 %x) nounwind {
+ %t1 = atomicrmw min i64* @sc64, i64 %x acquire
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_umax64(i64 %x) nounwind {
+ %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_umin64(i64 %x) nounwind {
+ %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
+; X32: cmpl
+; X32: cmpl
+; X32: cmov
+; X32: cmov
+; X32: cmov
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_cmpxchg64() nounwind {
+ %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_store64(i64 %x) nounwind {
+ store atomic i64 %x, i64* @sc64 release, align 8
+; X32: lock
+; X32: cmpxchg8b
+ ret void
+; X32: ret
+}
+
+define void @atomic_fetch_swap64(i64 %x) nounwind {
+ %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
+; X32: lock
+; X32: xchg8b
+ ret void
+; X32: ret
+}
diff --git a/test/CodeGen/X86/atomic8.ll b/test/CodeGen/X86/atomic8.ll
new file mode 100644
index 0000000000..035a28dbff
--- /dev/null
+++ b/test/CodeGen/X86/atomic8.ll
@@ -0,0 +1,251 @@
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 | FileCheck %s --check-prefix X64
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 | FileCheck %s --check-prefix X32
+; XFAIL: *
+
+@sc8 = external global i8
+
+define void @atomic_fetch_add8() nounwind {
+; X64: atomic_fetch_add8
+; X32: atomic_fetch_add8
+entry:
+; 32-bit
+ %t1 = atomicrmw add i8* @sc8, i8 1 acquire
+; X64: lock
+; X64: incb
+; X32: lock
+; X32: incb
+ %t2 = atomicrmw add i8* @sc8, i8 3 acquire
+; X64: lock
+; X64: addb $3
+; X32: lock
+; X32: addb $3
+ %t3 = atomicrmw add i8* @sc8, i8 5 acquire
+; X64: lock
+; X64: xaddb
+; X32: lock
+; X32: xaddb
+ %t4 = atomicrmw add i8* @sc8, i8 %t3 acquire
+; X64: lock
+; X64: addb
+; X32: lock
+; X32: addb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_sub8() nounwind {
+; X64: atomic_fetch_sub8
+; X32: atomic_fetch_sub8
+ %t1 = atomicrmw sub i8* @sc8, i8 1 acquire
+; X64: lock
+; X64: decb
+; X32: lock
+; X32: decb
+ %t2 = atomicrmw sub i8* @sc8, i8 3 acquire
+; X64: lock
+; X64: subb $3
+; X32: lock
+; X32: subb $3
+ %t3 = atomicrmw sub i8* @sc8, i8 5 acquire
+; X64: lock
+; X64: xaddb
+; X32: lock
+; X32: xaddb
+ %t4 = atomicrmw sub i8* @sc8, i8 %t3 acquire
+; X64: lock
+; X64: subb
+; X32: lock
+; X32: subb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_and8() nounwind {
+; X64: atomic_fetch_and8
+; X32: atomic_fetch_and8
+ %t1 = atomicrmw and i8* @sc8, i8 3 acquire
+; X64: lock
+; X64: andb $3
+; X32: lock
+; X32: andb $3
+ %t2 = atomicrmw and i8* @sc8, i8 5 acquire
+; X64: andb
+; X64: lock
+; X64: cmpxchgb
+; X32: andb
+; X32: lock
+; X32: cmpxchgb
+ %t3 = atomicrmw and i8* @sc8, i8 %t2 acquire
+; X64: lock
+; X64: andb
+; X32: lock
+; X32: andb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_or8() nounwind {
+; X64: atomic_fetch_or8
+; X32: atomic_fetch_or8
+ %t1 = atomicrmw or i8* @sc8, i8 3 acquire
+; X64: lock
+; X64: orb $3
+; X32: lock
+; X32: orb $3
+ %t2 = atomicrmw or i8* @sc8, i8 5 acquire
+; X64: orb
+; X64: lock
+; X64: cmpxchgb
+; X32: orb
+; X32: lock
+; X32: cmpxchgb
+ %t3 = atomicrmw or i8* @sc8, i8 %t2 acquire
+; X64: lock
+; X64: orb
+; X32: lock
+; X32: orb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_xor8() nounwind {
+; X64: atomic_fetch_xor8
+; X32: atomic_fetch_xor8
+ %t1 = atomicrmw xor i8* @sc8, i8 3 acquire
+; X64: lock
+; X64: xorb $3
+; X32: lock
+; X32: xorb $3
+ %t2 = atomicrmw xor i8* @sc8, i8 5 acquire
+; X64: xorb
+; X64: lock
+; X64: cmpxchgb
+; X32: xorb
+; X32: lock
+; X32: cmpxchgb
+ %t3 = atomicrmw xor i8* @sc8, i8 %t2 acquire
+; X64: lock
+; X64: xorb
+; X32: lock
+; X32: xorb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_nand8(i8 %x) nounwind {
+; X64: atomic_fetch_nand8
+; X32: atomic_fetch_nand8
+ %t1 = atomicrmw nand i8* @sc8, i8 %x acquire
+; X64: andb
+; X64: notb
+; X64: lock
+; X64: cmpxchgb
+; X32: andb
+; X32: notb
+; X32: lock
+; X32: cmpxchgb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_max8(i8 %x) nounwind {
+ %t1 = atomicrmw max i8* @sc8, i8 %x acquire
+; X64: cmpb
+; X64: cmov
+; X64: lock
+; X64: cmpxchgb
+
+; X32: cmpb
+; X32: cmov
+; X32: lock
+; X32: cmpxchgb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_min8(i8 %x) nounwind {
+ %t1 = atomicrmw min i8* @sc8, i8 %x acquire
+; X64: cmpb
+; X64: cmov
+; X64: lock
+; X64: cmpxchgb
+
+; X32: cmpb
+; X32: cmov
+; X32: lock
+; X32: cmpxchgb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umax8(i8 %x) nounwind {
+ %t1 = atomicrmw umax i8* @sc8, i8 %x acquire
+; X64: cmpb
+; X64: cmov
+; X64: lock
+; X64: cmpxchgb
+
+; X32: cmpb
+; X32: cmov
+; X32: lock
+; X32: cmpxchgb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_umin8(i8 %x) nounwind {
+ %t1 = atomicrmw umin i8* @sc8, i8 %x acquire
+; X64: cmpb
+; X64: cmov
+; X64: lock
+; X64: cmpxchgb
+; X32: cmpb
+; X32: cmov
+; X32: lock
+; X32: cmpxchgb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_cmpxchg8() nounwind {
+ %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire
+; X64: lock
+; X64: cmpxchgb
+; X32: lock
+; X32: cmpxchgb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_store8(i8 %x) nounwind {
+ store atomic i8 %x, i8* @sc8 release, align 4
+; X64-NOT: lock
+; X64: movb
+; X32-NOT: lock
+; X32: movb
+ ret void
+; X64: ret
+; X32: ret
+}
+
+define void @atomic_fetch_swap8(i8 %x) nounwind {
+ %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
+; X64-NOT: lock
+; X64: xchgb
+; X32-NOT: lock
+; X32: xchgb
+ ret void
+; X64: ret
+; X32: ret
+}
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 152bece424..c5fa07d07d 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -107,13 +107,12 @@ entry:
; CHECK: cmpxchgl
%17 = cmpxchg i32* %val2, i32 1976, i32 1 monotonic
store i32 %17, i32* %old
+ ; CHECK: movl [[R17atomic:.*]], %eax
; CHECK: movl $1401, %[[R17mask:[a-z]*]]
- ; CHECK: movl [[R17atomic:.*]], %eax
- ; CHECK: movl %eax, %[[R17newval:[a-z]*]]
- ; CHECK: andl %[[R17mask]], %[[R17newval]]
- ; CHECK: notl %[[R17newval]]
+ ; CHECK: andl %eax, %[[R17mask]]
+ ; CHECK: notl %[[R17mask]]
; CHECK: lock
- ; CHECK: cmpxchgl %[[R17newval]], [[R17atomic]]
+ ; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]]
; CHECK: jne
; CHECK: movl %eax,
%18 = atomicrmw nand i32* %val2, i32 1401 monotonic
diff --git a/test/CodeGen/X86/pr13458.ll b/test/CodeGen/X86/pr13458.ll
new file mode 100644
index 0000000000..55548b3c3b
--- /dev/null
+++ b/test/CodeGen/X86/pr13458.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.4.2"
+
+%v8_uniform_Stats.0.2.4.10 = type { i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i64, [7 x i32], [7 x i64] }
+
+@globalStats = external global %v8_uniform_Stats.0.2.4.10
+
+define void @MergeStats() nounwind {
+allocas:
+ %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst
+ ret void
+}