summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp14
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp44
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td124
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td4
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-add-05.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-add-06.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-and-05.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-and-06.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-or-05.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-or-06.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-sub-05.ll69
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-sub-06.ll69
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-xor-05.ll64
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-xor-06.ll64
15 files changed, 768 insertions, 69 deletions
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 567a6b7451..d95361eed1 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1093,20 +1093,6 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
}
break;
- case ISD::ATOMIC_LOAD_SUB:
- // Try to convert subtractions of constants to additions.
- if (ConstantSDNode *Op2 = dyn_cast<ConstantSDNode>(Node->getOperand(2))) {
- uint64_t Value = -Op2->getZExtValue();
- EVT VT = Node->getValueType(0);
- if (VT == MVT::i32 || isInt<32>(Value)) {
- SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1),
- CurDAG->getConstant(int32_t(Value), VT) };
- Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD,
- Node->getVTList(), Ops, array_lengthof(Ops));
- }
- }
- break;
-
case SystemZISD::SELECT_CCMASK: {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index e916771efa..25972007df 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -160,6 +160,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
+ // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
+ // available, or if the operand is constant.
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+
// No special instructions for these.
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
@@ -2266,6 +2270,44 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
return DAG.getMergeValues(RetOps, 2, DL);
}
+// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
+// two into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
+// operations into additions.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = Node->getMemoryVT();
+ if (MemVT == MVT::i32 || MemVT == MVT::i64) {
+ // A full-width operation.
+ assert(Op.getValueType() == MemVT && "Mismatched VTs");
+ SDValue Src2 = Node->getVal();
+ SDValue NegSrc2;
+ SDLoc DL(Src2);
+
+ if (ConstantSDNode *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
+ // Use an addition if the operand is constant and either LAA(G) is
+ // available or the negative value is in the range of A(G)FHI.
+ int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
+ if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1())
+ NegSrc2 = DAG.getConstant(Value, MemVT);
+ } else if (TM.getSubtargetImpl()->hasInterlockedAccess1())
+ // Use LAA(G) if available.
+ NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT),
+ Src2);
+
+ if (NegSrc2.getNode())
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
+ Node->getChain(), Node->getBasePtr(), NegSrc2,
+ Node->getMemOperand(), Node->getOrdering(),
+ Node->getSynchScope());
+
+ // Use the node as-is.
+ return Op;
+ }
+
+ return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+}
+
// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
// into a fullword ATOMIC_CMP_SWAPW operation.
SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
@@ -2394,7 +2436,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
- return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+ return lowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
case ISD::ATOMIC_LOAD_OR:
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 01f09c3737..2caa0bcb6d 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -283,6 +283,7 @@ private:
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
+ SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 91c3d3c3d0..e1af0932c2 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1204,70 +1204,86 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
- def LAA : LoadAndOpRSY<"laa", 0xEBF8, null_frag, GR32>;
- def LAAG : LoadAndOpRSY<"laag", 0xEBE8, null_frag, GR64>;
+ def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>;
+ def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>;
def LAAL : LoadAndOpRSY<"laal", 0xEBFA, null_frag, GR32>;
def LAALG : LoadAndOpRSY<"laalg", 0xEBEA, null_frag, GR64>;
- def LAN : LoadAndOpRSY<"lan", 0xEBF4, null_frag, GR32>;
- def LANG : LoadAndOpRSY<"lang", 0xEBE4, null_frag, GR64>;
- def LAO : LoadAndOpRSY<"lao", 0xEBF6, null_frag, GR32>;
- def LAOG : LoadAndOpRSY<"laog", 0xEBE6, null_frag, GR64>;
- def LAX : LoadAndOpRSY<"lax", 0xEBF7, null_frag, GR32>;
- def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, null_frag, GR64>;
+ def LAN : LoadAndOpRSY<"lan", 0xEBF4, atomic_load_and_32, GR32>;
+ def LANG : LoadAndOpRSY<"lang", 0xEBE4, atomic_load_and_64, GR64>;
+ def LAO : LoadAndOpRSY<"lao", 0xEBF6, atomic_load_or_32, GR32>;
+ def LAOG : LoadAndOpRSY<"laog", 0xEBE6, atomic_load_or_64, GR64>;
+ def LAX : LoadAndOpRSY<"lax", 0xEBF7, atomic_load_xor_32, GR32>;
+ def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, atomic_load_xor_64, GR64>;
}
-def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>;
-def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>;
-def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>;
-
-def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
-def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
-def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>;
-def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
-def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
-def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>;
-def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
-def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
-
-def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
-def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>;
-def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
-
-def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
-def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
-def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
-def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
-def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
-def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
-def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
-def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
-def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
-def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
-def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
-def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
-def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>;
+def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>;
+def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>;
+
+def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
+def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>;
+ def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
+ def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
+ def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>;
+ def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
+ def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
+}
+
+def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
+def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>;
+def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
+
+def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
+def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
+ def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32,
+ imm32ll16c>;
+ def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32,
+ imm32lh16c>;
+ def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+ def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
+ def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64ll16c>;
+ def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64lh16c>;
+ def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64hl16c>;
+ def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64hh16c>;
+ def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64lf32c>;
+ def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64,
+ imm64hf32c>;
+}
def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
-def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
-def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
-def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
-def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
-def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
-def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
-def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
-def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
-def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
-def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
-def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
+ def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+ def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+ def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+ def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
+ def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+ def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+ def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+ def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+ def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+ def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+}
def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
-def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
-def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
-def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
-def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
-def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+let Predicates = [FeatureNoInterlockedAccess1] in {
+ def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
+ def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+ def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
+ def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+ def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+}
def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index b3df317bc8..e6b58f17b0 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -16,6 +16,9 @@ class SystemZFeature<string extname, string intname, string desc>
AssemblerPredicate<"Feature"##intname, extname>,
SubtargetFeature<extname, "Has"##intname, "true", desc>;
+class SystemZMissingFeature<string intname>
+ : Predicate<"!Subtarget.has"##intname##"()">;
+
def FeatureDistinctOps : SystemZFeature<
"distinct-ops", "DistinctOps",
"Assume that the distinct-operands facility is installed"
@@ -45,6 +48,7 @@ def FeatureInterlockedAccess1 : SystemZFeature<
"interlocked-access1", "InterlockedAccess1",
"Assume that interlocked-access facility 1 is installed"
>;
+def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-05.ll b/test/CodeGen/SystemZ/atomicrmw-add-05.ll
new file mode 100644
index 0000000000..956c0d9642
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-add-05.ll
@@ -0,0 +1,64 @@
+; Test 32-bit atomic additions, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check addition of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: laa %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw add i32 *%src, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check addition of 1, which needs a temporary.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[TMP:%r[0-5]]], 1
+; CHECK: laa %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw add i32 *%src, i32 1 seq_cst
+ ret i32 %res
+}
+
+; Check the high end of the LAA range.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: laa %r2, %r4, 524284(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131071
+ %res = atomicrmw add i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: laa %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131072
+ %res = atomicrmw add i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the low end of the LAA range.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: laa %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131072
+ %res = atomicrmw add i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524292
+; CHECK: laa %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131073
+ %res = atomicrmw add i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-06.ll b/test/CodeGen/SystemZ/atomicrmw-add-06.ll
new file mode 100644
index 0000000000..f508858d15
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-add-06.ll
@@ -0,0 +1,64 @@
+; Test 64-bit atomic additions, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check addition of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: laag %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw add i64 *%src, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check addition of 1, which needs a temporary.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lghi [[TMP:%r[0-5]]], 1
+; CHECK: laag %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw add i64 *%src, i64 1 seq_cst
+ ret i64 %res
+}
+
+; Check the high end of the LAAG range.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: laag %r2, %r4, 524280(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65535
+ %res = atomicrmw add i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: laag %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65536
+ %res = atomicrmw add i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the low end of the LAAG range.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: laag %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65536
+ %res = atomicrmw add i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524296
+; CHECK: laag %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65537
+ %res = atomicrmw add i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-05.ll b/test/CodeGen/SystemZ/atomicrmw-and-05.ll
new file mode 100644
index 0000000000..f0b999c604
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-and-05.ll
@@ -0,0 +1,64 @@
+; Test 32-bit atomic ANDs, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check AND of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: lan %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw and i32 *%src, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check AND of 1, which needs a temporary.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[TMP:%r[0-5]]], 1
+; CHECK: lan %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw and i32 *%src, i32 1 seq_cst
+ ret i32 %res
+}
+
+; Check the high end of the LAN range.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: lan %r2, %r4, 524284(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131071
+ %res = atomicrmw and i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: lan %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131072
+ %res = atomicrmw and i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the low end of the LAN range.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: lan %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131072
+ %res = atomicrmw and i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524292
+; CHECK: lan %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131073
+ %res = atomicrmw and i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-06.ll b/test/CodeGen/SystemZ/atomicrmw-and-06.ll
new file mode 100644
index 0000000000..e5b71945d5
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-and-06.ll
@@ -0,0 +1,64 @@
+; Test 64-bit atomic ANDs, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check AND of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: lang %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw and i64 *%src, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check AND of -2, which needs a temporary.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lghi [[TMP:%r[0-5]]], -2
+; CHECK: lang %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw and i64 *%src, i64 -2 seq_cst
+ ret i64 %res
+}
+
+; Check the high end of the LANG range.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: lang %r2, %r4, 524280(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65535
+ %res = atomicrmw and i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: lang %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65536
+ %res = atomicrmw and i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the low end of the LANG range.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: lang %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65536
+ %res = atomicrmw and i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524296
+; CHECK: lang %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65537
+ %res = atomicrmw and i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-05.ll b/test/CodeGen/SystemZ/atomicrmw-or-05.ll
new file mode 100644
index 0000000000..b38654ca6f
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-or-05.ll
@@ -0,0 +1,64 @@
+; Test 32-bit atomic ORs, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check OR of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: lao %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw or i32 *%src, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check OR of 1, which needs a temporary.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[TMP:%r[0-5]]], 1
+; CHECK: lao %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw or i32 *%src, i32 1 seq_cst
+ ret i32 %res
+}
+
+; Check the high end of the LAO range.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: lao %r2, %r4, 524284(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131071
+ %res = atomicrmw or i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: lao %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131072
+ %res = atomicrmw or i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the low end of the LAO range.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: lao %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131072
+ %res = atomicrmw or i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524292
+; CHECK: lao %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131073
+ %res = atomicrmw or i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-06.ll b/test/CodeGen/SystemZ/atomicrmw-or-06.ll
new file mode 100644
index 0000000000..30874abfe4
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-or-06.ll
@@ -0,0 +1,64 @@
+; Test 64-bit atomic ORs, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check OR of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: laog %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw or i64 *%src, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check OR of 1, which needs a temporary.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lghi [[TMP:%r[0-5]]], 1
+; CHECK: laog %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw or i64 *%src, i64 1 seq_cst
+ ret i64 %res
+}
+
+; Check the high end of the LAOG range.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: laog %r2, %r4, 524280(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65535
+ %res = atomicrmw or i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: laog %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65536
+ %res = atomicrmw or i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the low end of the LAOG range.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: laog %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65536
+ %res = atomicrmw or i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524296
+; CHECK: laog %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65537
+ %res = atomicrmw or i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-05.ll b/test/CodeGen/SystemZ/atomicrmw-sub-05.ll
new file mode 100644
index 0000000000..7668f0e2a7
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-05.ll
@@ -0,0 +1,69 @@
+; Test 32-bit atomic subtractions, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check addition of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: lcr [[NEG:%r[0-5]]], %r4
+; CHECK: laa %r2, [[NEG]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw sub i32 *%src, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check addition of 1, which needs a temporary.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[TMP:%r[0-5]]], -1
+; CHECK: laa %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw sub i32 *%src, i32 1 seq_cst
+ ret i32 %res
+}
+
+; Check the high end of the LAA range.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: lcr [[NEG:%r[0-5]]], %r4
+; CHECK: laa %r2, [[NEG]], 524284(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131071
+ %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: lcr [[NEG:%r[0-5]]], %r4
+; CHECK-DAG: agfi %r3, 524288
+; CHECK: laa %r2, [[NEG]], 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131072
+ %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the low end of the LAA range.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: lcr [[NEG:%r[0-5]]], %r4
+; CHECK: laa %r2, [[NEG]], -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131072
+ %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: lcr [[NEG:%r[0-5]]], %r4
+; CHECK-DAG: agfi %r3, -524292
+; CHECK: laa %r2, [[NEG]], 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131073
+ %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-06.ll b/test/CodeGen/SystemZ/atomicrmw-sub-06.ll
new file mode 100644
index 0000000000..5d11bdf96c
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-06.ll
@@ -0,0 +1,69 @@
+; Test 64-bit atomic subtractions, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check addition of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: lcgr [[NEG:%r[0-5]]], %r4
+; CHECK: laag %r2, [[NEG]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw sub i64 *%src, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check addition of 1, which needs a temporary.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lghi [[TMP:%r[0-5]]], -1
+; CHECK: laag %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw sub i64 *%src, i64 1 seq_cst
+ ret i64 %res
+}
+
+; Check the high end of the LAAG range.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: lcgr [[NEG:%r[0-5]]], %r4
+; CHECK: laag %r2, [[NEG]], 524280(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65535
+ %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: lcgr [[NEG:%r[0-5]]], %r4
+; CHECK-DAG: agfi %r3, 524288
+; CHECK: laag %r2, [[NEG]], 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65536
+ %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the low end of the LAAG range.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: lcgr [[NEG:%r[0-5]]], %r4
+; CHECK: laag %r2, [[NEG]], -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65536
+ %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: lcgr [[NEG:%r[0-5]]], %r4
+; CHECK-DAG: agfi %r3, -524296
+; CHECK: laag %r2, [[NEG]], 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65537
+ %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-05.ll b/test/CodeGen/SystemZ/atomicrmw-xor-05.ll
new file mode 100644
index 0000000000..e9e7d30b35
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-05.ll
@@ -0,0 +1,64 @@
+; Test 32-bit atomic ORs, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check OR of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: lax %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw xor i32 *%src, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check OR of 1, which needs a temporary.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[TMP:%r[0-5]]], 1
+; CHECK: lax %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw xor i32 *%src, i32 1 seq_cst
+ ret i32 %res
+}
+
+; Check the high end of the LAX range.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: lax %r2, %r4, 524284(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131071
+ %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: lax %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 131072
+ %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the low end of the LAX range.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: lax %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131072
+ %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524292
+; CHECK: lax %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%src, i32 -131073
+ %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-06.ll b/test/CodeGen/SystemZ/atomicrmw-xor-06.ll
new file mode 100644
index 0000000000..0870c6476f
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-06.ll
@@ -0,0 +1,64 @@
+; Test 64-bit atomic XORs, z196 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+; Check XOR of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: laxg %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw xor i64 *%src, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check XOR of 1, which needs a temporary.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lghi [[TMP:%r[0-5]]], 1
+; CHECK: laxg %r2, [[TMP]], 0(%r3)
+; CHECK: br %r14
+ %res = atomicrmw xor i64 *%src, i64 1 seq_cst
+ ret i64 %res
+}
+
+; Check the high end of the LAXG range.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: laxg %r2, %r4, 524280(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65535
+ %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: laxg %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 65536
+ %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the low end of the LAXG range.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: laxg %r2, %r4, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65536
+ %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524296
+; CHECK: laxg %r2, %r4, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%src, i64 -65537
+ %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
+ ret i64 %res
+}