summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/SystemZ/README.txt4
-rw-r--r--lib/Target/SystemZ/SystemZ.h4
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp26
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h8
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td20
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td7
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td6
-rw-r--r--test/CodeGen/SystemZ/prefetch-01.ll87
-rw-r--r--test/MC/Disassembler/SystemZ/insns-pcrel.txt32
-rw-r--r--test/MC/Disassembler/SystemZ/insns.txt30
-rw-r--r--test/MC/SystemZ/insn-bad.s34
-rw-r--r--test/MC/SystemZ/insn-good.s59
12 files changed, 312 insertions, 5 deletions
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 4107685e30..d5361fb658 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -35,10 +35,6 @@ performance measurements.
--
-We don't support prefetching yet.
-
---
-
There is no scheduling support.
--
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index bb6ceac83b..ea6c7d17fa 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -57,6 +57,10 @@ namespace llvm {
const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
+ // Mask assignments for PFD.
+ const unsigned PFD_READ = 1;
+ const unsigned PFD_WRITE = 2;
+
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 65b2ad2164..20afab782a 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+ // Handle prefetches with PFD or PFDRL.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -1806,6 +1809,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SystemZ::R15D, Op.getOperand(1));
}
+SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (!IsData)
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
+ MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDValue Ops[] = {
+ Op.getOperand(0),
+ DAG.getConstant(Code, MVT::i32),
+ Op.getOperand(1)
+ };
+ return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+ Node->getVTList(), Ops, array_lengthof(Ops),
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1869,6 +1892,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKSAVE(Op, DAG);
case ISD::STACKRESTORE:
return lowerSTACKRESTORE(Op, DAG);
+ case ISD::PREFETCH:
+ return lowerPREFETCH(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -1909,6 +1934,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(PREFETCH);
}
return NULL;
#undef OPCODE
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 604453d2dd..f6a2ce041e 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -132,7 +132,12 @@ namespace SystemZISD {
// operand into the high bits
// Operand 4: the negative of operand 2, for rotating the other way
// Operand 5: the width of the field in bits (8 or 16)
- ATOMIC_CMP_SWAPW
+ ATOMIC_CMP_SWAPW,
+
+ // Prefetch from the second operand using the 4-bit control code in
+ // the first operand. The code is 1 for a load prefetch and 2 for
+ // a store prefetch.
+ PREFETCH
};
}
@@ -225,6 +230,7 @@ private:
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index f53833210e..a7e18ec681 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -540,6 +540,10 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// One output operand and five input operands. The first two operands
// are registers and the other three are immediates.
//
+// Prefetch:
+// One 4-bit immediate operand and one address operand. The immediate
+// operand is 1 for a load prefetch and 2 for a store prefetch.
+//
// The format determines which input operands are tied to output operands,
// and also determines the shape of any address operand.
//
@@ -1304,6 +1308,22 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let DisableEncoding = "$R1src";
}
+class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
+ : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
+ mnemonic##"\t$R1, $XBD2",
+ [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
+
+class PrefetchRILPC<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator>
+ : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
+ mnemonic##"\t$R1, $I2",
+ [(operator uimm8zx4:$R1, pcrel32:$I2)]> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
// A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index d857a572fb..8e1f5ac3c9 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1035,6 +1035,13 @@ let mayLoad = 1, Defs = [CC], Uses = [R0W] in
defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
//===----------------------------------------------------------------------===//
+// Prefetch
+//===----------------------------------------------------------------------===//
+
+def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
+def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+
+//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 5745e29cd8..e2c43d6e58 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -64,6 +64,9 @@ def SDT_ZString : SDTypeProfile<1, 3,
SDTCisPtrTy<2>,
SDTCisVT<3, i32>]>;
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_ZPrefetch : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -130,6 +133,9 @@ def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
[SDNPInGlue]>;
+def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Pattern fragments
diff --git a/test/CodeGen/SystemZ/prefetch-01.ll b/test/CodeGen/SystemZ/prefetch-01.ll
new file mode 100644
index 0000000000..bb7fea99ca
--- /dev/null
+++ b/test/CodeGen/SystemZ/prefetch-01.ll
@@ -0,0 +1,87 @@
+; Test data prefetching.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+@g = global [4096 x i8] zeroinitializer
+
+; Check that instruction read prefetches are ignored.
+define void @f1(i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+ call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0)
+ ret void
+}
+
+; Check that instruction write prefetches are ignored.
+define void @f2(i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK: br %r14
+ call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0)
+ ret void
+}
+
+; Check data read prefetches.
+define void @f3(i8 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: pfd 1, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1)
+ ret void
+}
+
+; Check data write prefetches.
+define void @f4(i8 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: pfd 2, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+ ret void
+}
+
+; Check an address at the negative end of the range.
+define void @f5(i8 *%base, i64 %index) {
+; CHECK-LABEL: f5:
+; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+ %add = add i64 %index, -524288
+ %ptr = getelementptr i8 *%base, i64 %add
+ call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+ ret void
+}
+
+; Check an address at the positive end of the range.
+define void @f6(i8 *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+ %add = add i64 %index, 524287
+ %ptr = getelementptr i8 *%base, i64 %add
+ call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+ ret void
+}
+
+; Check that the next address up still compiles.
+define void @f7(i8 *%base, i64 %index) {
+; CHECK-LABEL: f7:
+; CHECK: 524288
+; CHECK: pfd 2,
+; CHECK: br %r14
+ %add = add i64 %index, 524288
+ %ptr = getelementptr i8 *%base, i64 %add
+ call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+ ret void
+}
+
+; Check pc-relative prefetches.
+define void @f8() {
+; CHECK-LABEL: f8:
+; CHECK: pfdrl 2, g
+; CHECK: br %r14
+ %ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0
+ call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
+ ret void
+}
diff --git a/test/MC/Disassembler/SystemZ/insns-pcrel.txt b/test/MC/Disassembler/SystemZ/insns-pcrel.txt
index c565b6e47b..c250f19979 100644
--- a/test/MC/Disassembler/SystemZ/insns-pcrel.txt
+++ b/test/MC/Disassembler/SystemZ/insns-pcrel.txt
@@ -1330,3 +1330,35 @@
# 0x0000077c:
# CHECK: brctg %r15, 0x1077a
0xa7 0xf7 0x7f 0xff
+
+# 0x00000780:
+# CHECK: pfdrl 0, 0x780
+0xc6 0x02 0x00 0x00 0x00 0x00
+
+# 0x00000786:
+# CHECK: pfdrl 15, 0x786
+0xc6 0xf2 0x00 0x00 0x00 0x00
+
+# 0x0000078c:
+# CHECK: pfdrl 0, 0x78a
+0xc6 0x02 0xff 0xff 0xff 0xff
+
+# 0x00000792:
+# CHECK: pfdrl 15, 0x790
+0xc6 0xf2 0xff 0xff 0xff 0xff
+
+# 0x00000798:
+# CHECK: pfdrl 0, 0xffffffff00000798
+0xc6 0x02 0x80 0x00 0x00 0x00
+
+# 0x0000079e:
+# CHECK: pfdrl 15, 0xffffffff0000079e
+0xc6 0xf2 0x80 0x00 0x00 0x00
+
+# 0x000007a4:
+# CHECK: pfdrl 0, 0x1000007a2
+0xc6 0x02 0x7f 0xff 0xff 0xff
+
+# 0x000007aa:
+# CHECK: pfdrl 15, 0x1000007a8
+0xc6 0xf2 0x7f 0xff 0xff 0xff
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 3f4f6c3dd1..360785e94b 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -5329,6 +5329,36 @@
# CHECK: oy %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x56
+# CHECK: pfd 0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x36
+
+# CHECK: pfd 0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x36
+
+# CHECK: pfd 0, 0
+0xe3 0x00 0x00 0x00 0x00 0x36
+
+# CHECK: pfd 0, 1
+0xe3 0x00 0x00 0x01 0x00 0x36
+
+# CHECK: pfd 0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x36
+
+# CHECK: pfd 0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x36
+
+# CHECK: pfd 0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x36
+
+# CHECK: pfd 0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x36
+
+# CHECK: pfd 0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x36
+
+# CHECK: pfd 15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x36
+
# CHECK: risbg %r0, %r0, 0, 0, 0
0xec 0x00 0x00 0x00 0x00 0x55
diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s
index aa3f4c9d83..1c478caa03 100644
--- a/test/MC/SystemZ/insn-bad.s
+++ b/test/MC/SystemZ/insn-bad.s
@@ -2276,6 +2276,40 @@
oy %r0, 524288
#CHECK: error: invalid operand
+#CHECK: pfd -1, 0
+#CHECK: error: invalid operand
+#CHECK: pfd 16, 0
+#CHECK: error: invalid operand
+#CHECK: pfd 1, -524289
+#CHECK: error: invalid operand
+#CHECK: pfd 1, 524288
+
+ pfd -1, 0
+ pfd 16, 0
+ pfd 1, -524289
+ pfd 1, 524288
+
+#CHECK: error: invalid operand
+#CHECK: pfdrl -1, 0
+#CHECK: error: invalid operand
+#CHECK: pfdrl 16, 0
+#CHECK: error: offset out of range
+#CHECK: pfdrl 1, -0x1000000002
+#CHECK: error: offset out of range
+#CHECK: pfdrl 1, -1
+#CHECK: error: offset out of range
+#CHECK: pfdrl 1, 1
+#CHECK: error: offset out of range
+#CHECK: pfdrl 1, 0x100000000
+
+ pfdrl -1, 0
+ pfdrl 16, 0
+ pfdrl 1, -0x1000000002
+ pfdrl 1, -1
+ pfdrl 1, 1
+ pfdrl 1, 0x100000000
+
+#CHECK: error: invalid operand
#CHECK: risbg %r0,%r0,0,0,-1
#CHECK: error: invalid operand
#CHECK: risbg %r0,%r0,0,0,64
diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s
index 2c52d3ae60..9930d8ce6d 100644
--- a/test/MC/SystemZ/insn-good.s
+++ b/test/MC/SystemZ/insn-good.s
@@ -6106,6 +6106,65 @@
oy %r0, 524287(%r15,%r1)
oy %r15, 0
+#CHECK: pfd 0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36]
+#CHECK: pfd 0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36]
+#CHECK: pfd 0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36]
+#CHECK: pfd 0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x36]
+#CHECK: pfd 0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x36]
+#CHECK: pfd 0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x36]
+#CHECK: pfd 0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x36]
+#CHECK: pfd 0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x36]
+#CHECK: pfd 0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x36]
+#CHECK: pfd 15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x36]
+
+ pfd 0, -524288
+ pfd 0, -1
+ pfd 0, 0
+ pfd 0, 1
+ pfd 0, 524287
+ pfd 0, 0(%r1)
+ pfd 0, 0(%r15)
+ pfd 0, 524287(%r1,%r15)
+ pfd 0, 524287(%r15,%r1)
+ pfd 15, 0
+
+#CHECK: pfdrl 0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL
+ pfdrl 0, -0x100000000
+#CHECK: pfdrl 0, .[[LAB:L.*]]-2 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL
+ pfdrl 0, -2
+#CHECK: pfdrl 0, .[[LAB:L.*]] # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL
+ pfdrl 0, 0
+#CHECK: pfdrl 0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x02,A,A,A,A]
+#CHECK: fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL
+ pfdrl 0, 0xfffffffe
+
+#CHECK: pfdrl 0, foo # encoding: [0xc6,0x02,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl 15, foo # encoding: [0xc6,0xf2,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+ pfdrl 0, foo
+ pfdrl 15, foo
+
+#CHECK: pfdrl 3, bar+100 # encoding: [0xc6,0x32,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl 4, bar+100 # encoding: [0xc6,0x42,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+ pfdrl 3, bar+100
+ pfdrl 4, bar+100
+
+#CHECK: pfdrl 7, frob@PLT # encoding: [0xc6,0x72,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: pfdrl 8, frob@PLT # encoding: [0xc6,0x82,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+ pfdrl 7, frob@PLT
+ pfdrl 8, frob@PLT
+
#CHECK: risbg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
#CHECK: risbg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
#CHECK: risbg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]