summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2009-07-30 08:33:02 +0000
committerEvan Cheng <evan.cheng@apple.com>2009-07-30 08:33:02 +0000
commit37b7387da90ffd42d28ad0f08fca00b684294b2c (patch)
tree04a5091e8f343355eabbad04f432011bc9211a83
parent7f93dc8345fb33652973e35cae4c3b58addf4f87 (diff)
downloadllvm-37b7387da90ffd42d28ad0f08fca00b684294b2c.tar.gz
llvm-37b7387da90ffd42d28ad0f08fca00b684294b2c.tar.bz2
llvm-37b7387da90ffd42d28ad0f08fca00b684294b2c.tar.xz
Optimize some common usage patterns of atomic built-ins __sync_add_and_fetch() and __sync_sub_and_fetch.
When the return value is not used (i.e. only care about the value in the memory), x86 does not have to use add to implement these. Instead, it can use add, sub, inc, dec instructions with the "lock" prefix. This is currently implemented using a bit of instruction selection trick. The issue is the target independent pattern produces one output and a chain and we want to map it into one that just output a chain. The current trick is to select it into a merge_values with the first definition being an implicit_def. The proper solution is to add new ISD opcodes for the no-output variant. DAG combiner can then transform the node before it gets to target node selection. Problem #2 is we are adding a whole bunch of x86 atomic instructions when in fact these instructions are identical to the non-lock versions. We need a way to add target specific information to target nodes and have this information carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77582 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp1
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp155
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp31
-rw-r--r--lib/Target/X86/X86ISelLowering.h4
-rw-r--r--lib/Target/X86/X86Instr64bit.td32
-rw-r--r--lib/Target/X86/X86InstrInfo.td72
-rw-r--r--test/CodeGen/X86/2008-08-19-SubAndFetch.ll5
-rw-r--r--test/CodeGen/X86/atomic_add.ll217
8 files changed, 502 insertions, 15 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
index e352f88afc..1d845a5b5e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -558,6 +558,7 @@ void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::EntryToken:
llvm_unreachable("EntryToken should have been excluded from the schedule!");
break;
+ case ISD::MERGE_VALUES:
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1e8429221a..5fb496ba0b 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -176,6 +176,7 @@ namespace {
private:
SDNode *Select(SDValue N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomicLoadAdd(SDNode *Node, MVT NVT);
bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
@@ -1431,6 +1432,153 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
array_lengthof(Ops));
}
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, MVT NVT) {
+ if (Node->hasAnyUseOfValue(0))
+ return 0;
+
+ // Optimize common patterns for __sync_add_and_fetch and
+ // __sync_sub_and_fetch where the result is not used. This allows us
+ // to use "lock" version of add, sub, inc, dec instructions.
+ // FIXME: Do not use special instructions but instead add the "lock"
+ // prefix to the target node somehow. The extra information will then be
+ // transferred to machine instruction and it denotes the prefix.
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ptr = Node->getOperand(1);
+ SDValue Val = Node->getOperand(2);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return 0;
+
+ bool isInc = false, isDec = false, isSub = false, isCN = false;
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+ if (CN) {
+ isCN = true;
+ int64_t CNVal = CN->getSExtValue();
+ if (CNVal == 1)
+ isInc = true;
+ else if (CNVal == -1)
+ isDec = true;
+ else if (CNVal >= 0)
+ Val = CurDAG->getTargetConstant(CNVal, NVT);
+ else {
+ isSub = true;
+ Val = CurDAG->getTargetConstant(-CNVal, NVT);
+ }
+ } else if (Val.hasOneUse() &&
+ Val.getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0))) {
+ isSub = true;
+ Val = Val.getOperand(1);
+ }
+
+ unsigned Opc = 0;
+ switch (NVT.getSimpleVT()) {
+ default: return 0;
+ case MVT::i8:
+ if (isInc)
+ Opc = X86::LOCK_INC8m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC8m;
+ else if (isSub) {
+ if (isCN)
+ Opc = X86::LOCK_SUB8mi;
+ else
+ Opc = X86::LOCK_SUB8mr;
+ } else {
+ if (isCN)
+ Opc = X86::LOCK_ADD8mi;
+ else
+ Opc = X86::LOCK_ADD8mr;
+ }
+ break;
+ case MVT::i16:
+ if (isInc)
+ Opc = X86::LOCK_INC16m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC16m;
+ else if (isSub) {
+ if (isCN) {
+ if (Predicate_i16immSExt8(Val.getNode()))
+ Opc = X86::LOCK_SUB16mi8;
+ else
+ Opc = X86::LOCK_SUB16mi;
+ } else
+ Opc = X86::LOCK_SUB16mr;
+ } else {
+ if (isCN) {
+ if (Predicate_i16immSExt8(Val.getNode()))
+ Opc = X86::LOCK_ADD16mi8;
+ else
+ Opc = X86::LOCK_ADD16mi;
+ } else
+ Opc = X86::LOCK_ADD16mr;
+ }
+ break;
+ case MVT::i32:
+ if (isInc)
+ Opc = X86::LOCK_INC32m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC32m;
+ else if (isSub) {
+ if (isCN) {
+ if (Predicate_i32immSExt8(Val.getNode()))
+ Opc = X86::LOCK_SUB32mi8;
+ else
+ Opc = X86::LOCK_SUB32mi;
+ } else
+ Opc = X86::LOCK_SUB32mr;
+ } else {
+ if (isCN) {
+ if (Predicate_i32immSExt8(Val.getNode()))
+ Opc = X86::LOCK_ADD32mi8;
+ else
+ Opc = X86::LOCK_ADD32mi;
+ } else
+ Opc = X86::LOCK_ADD32mr;
+ }
+ break;
+ case MVT::i64:
+ if (isInc)
+ Opc = X86::LOCK_INC64m;
+ else if (isDec)
+ Opc = X86::LOCK_DEC64m;
+ else if (isSub) {
+ Opc = X86::LOCK_SUB64mr;
+ if (isCN) {
+ if (Predicate_i64immSExt8(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi8;
+ else if (Predicate_i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_SUB64mi32;
+ }
+ } else {
+ Opc = X86::LOCK_ADD64mr;
+ if (isCN) {
+ if (Predicate_i64immSExt8(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi8;
+ else if (Predicate_i64immSExt32(Val.getNode()))
+ Opc = X86::LOCK_ADD64mi32;
+ }
+ }
+ break;
+ }
+
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue Undef = SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
+ dl, NVT), 0);
+ SDValue MemOp = CurDAG->getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
+ if (isInc || isDec) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, MemOp, Chain };
+ SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, MemOp, Chain };
+ SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 8), 0);
+ SDValue RetVals[] = { Undef, Ret };
+ return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ }
+}
+
SDNode *X86DAGToDAGISel::Select(SDValue N) {
SDNode *Node = N.getNode();
MVT NVT = Node->getValueType(0);
@@ -1475,6 +1623,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
case X86ISD::ATOMSWAP64_DAG:
return SelectAtomic64(Node, X86::ATOMSWAP6432);
+ case ISD::ATOMIC_LOAD_ADD: {
+ SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+ if (RetVal)
+ return RetVal;
+ break;
+ }
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c8eed721eb..c4ed89e384 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2746,6 +2746,15 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
return Mask;
}
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
@@ -2852,15 +2861,6 @@ static bool isSplatVector(SDNode *N) {
return true;
}
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
-static inline bool isZeroNode(SDValue Elt) {
- return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
- (isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
-}
-
/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an zero vector.
/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
@@ -2874,13 +2874,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
unsigned Opc = V2.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
continue;
- if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
return false;
} else if (Idx >= 0) {
unsigned Opc = V1.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
continue;
- if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+ if (Opc != ISD::BUILD_VECTOR ||
+ !X86::isZeroNode(V1.getOperand(Idx)))
return false;
}
}
@@ -3048,7 +3050,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
continue;
}
SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
- if (Elt.getNode() && isZeroNode(Elt))
+ if (Elt.getNode() && X86::isZeroNode(Elt))
++NumZeros;
else
break;
@@ -3221,7 +3223,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
- if (isZeroNode(Elt))
+ if (X86::isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
@@ -3298,7 +3300,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// Is it a vector logical left shift?
if (NumElems == 2 && Idx == 1 &&
- isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ X86::isZeroNode(Op.getOperand(0)) &&
+ !X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index c3da894f0e..579b42fab2 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -336,6 +336,10 @@ namespace llvm {
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
/// instructions.
unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+ /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+ /// constant +0.0.
+ bool isZeroNode(SDValue Elt);
}
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 1dd7e07964..427f6db51c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1380,11 +1380,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
"xadd\t$val, $ptr",
[(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
TB, LOCK;
+
def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
"xchg\t$val, $ptr",
[(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
}
+// Optimized codegen when the non-memory output is not used.
+// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
+def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+ (ins i64mem:$dst, i64i32imm :$src2),
+ "lock\n\t"
+ "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+ (ins i64mem:$dst, i64i8imm :$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+ (ins i64mem:$dst, i64i32imm:$src2),
+ "lock\n\t"
+ "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "inc{q}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "lock\n\t"
+ "dec{q}\t$dst", []>, LOCK;
+
// Atomic exchange, and, or, xor
let Constraints = "$val = $dst", Defs = [EFLAGS],
usesCustomDAGSchedInserter = 1 in {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index c547c76ec1..49da970ca2 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -3255,6 +3255,78 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
TB, LOCK;
}
+// Optimized codegen when the non-memory output is not used.
+// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
+def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+ "lock\n\t"
+ "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "inc{l}\t$dst", []>, LOCK;
+
+def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+ "lock\n\t"
+ "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "lock\n\t"
+ "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "lock\n\t"
+ "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "lock\n\t"
+ "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "lock\n\t"
+ "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "lock\n\t"
+ "dec{l}\t$dst", []>, LOCK;
+
// Atomic exchange, and, or, xor
let Constraints = "$val = $dst", Defs = [EFLAGS],
usesCustomDAGSchedInserter = 1 in {
diff --git a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
index 00bcdf82e8..72efa16866 100644
--- a/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
+++ b/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
@@ -1,9 +1,12 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep xadd
+; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
@var = external global i64 ; <i64*> [#uses=1]
define i32 @main() nounwind {
entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 ) ; <i64>:0 [#uses=0]
unreachable
}
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 0000000000..c0092108f1
--- /dev/null
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -0,0 +1,217 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+ %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v) ; <i32> [#uses=0]
+ ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+ %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0]
+ ret void
+}
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+ %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2) ; <i64> [#uses=0]
+ ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+ %0 = sext i32 %v to i64 ; <i64> [#uses=1]
+ %1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0) ; <i64> [#uses=0]
+ ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+ %0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1) ; <i8> [#uses=0]
+ ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+ %0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2) ; <i8> [#uses=0]
+ ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+ %0 = trunc i32 %v to i8 ; <i8> [#uses=1]
+ %1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0) ; <i8> [#uses=0]
+ ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+ %0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1) ; <i16> [#uses=0]
+ ret void
+}
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+ %0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2) ; <i16> [#uses=0]
+ ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+ %0 = trunc i32 %v to i16 ; <i16> [#uses=1]
+ %1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0) ; <i16> [#uses=0]
+ ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+ %0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1) ; <i32> [#uses=0]
+ ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+ %0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2) ; <i32> [#uses=0]
+ ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+ %0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v) ; <i32> [#uses=0]
+ ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+ %0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0]
+ ret void
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+ %0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2) ; <i64> [#uses=0]
+ ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+ %0 = sext i32 %v to i64 ; <i64> [#uses=1]
+ %1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0) ; <i64> [#uses=0]
+ ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+ %0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1) ; <i8> [#uses=0]
+ ret void
+}
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+ %0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2) ; <i8> [#uses=0]
+ ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+ %0 = trunc i32 %v to i8 ; <i8> [#uses=1]
+ %1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0) ; <i8> [#uses=0]
+ ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+ %0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1) ; <i16> [#uses=0]
+ ret void
+}
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+ %0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2) ; <i16> [#uses=0]
+ ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: subw
+ %0 = trunc i32 %v to i16 ; <i16> [#uses=1]
+ %1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0) ; <i16> [#uses=0]
+ ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+ %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1) ; <i32> [#uses=0]
+ ret void
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+ %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2) ; <i32> [#uses=0]
+ ret void
+}