summaryrefslogtreecommitdiff
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2013-07-01 12:58:52 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2013-07-01 12:58:52 +0000
commit9bc8feeb4fd15883949900194c93fd1704c404b4 (patch)
treeac26a33d687daf0c98ed011d3c7aa15a38f686a5 /lib/Target/NVPTX
parent30b13ebd0b3d29d5f2d3dcbccee31d3a55917277 (diff)
downloadllvm-9bc8feeb4fd15883949900194c93fd1704c404b4.tar.gz
llvm-9bc8feeb4fd15883949900194c93fd1704c404b4.tar.bz2
llvm-9bc8feeb4fd15883949900194c93fd1704c404b4.tar.xz
[NVPTX] Add isel patterns for [reg+offset] form of ldg/ldu.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185329 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp476
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td66
2 files changed, 430 insertions, 112 deletions
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4457ec349c..03a3aa4f4d 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -787,194 +787,476 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
unsigned Opcode;
SDLoc DL(N);
SDNode *LD;
-
MemSDNode *Mem = cast<MemSDNode>(N);
+ SDValue Base, Offset, Addr;
- EVT RetVT = Mem->getMemoryVT().getVectorElementType();
+ EVT EltVT = Mem->getMemoryVT().getVectorElementType();
- // Select opcode
- if (Subtarget.is64Bit()) {
+ if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
return NULL;
case NVPTXISD::LDGV2:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
break;
case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
break;
case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
break;
case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
break;
case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
break;
case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
break;
}
break;
- case NVPTXISD::LDGV4:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ case NVPTXISD::LDUV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
break;
case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
break;
case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
break;
case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
break;
}
break;
- case NVPTXISD::LDUV2:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ case NVPTXISD::LDGV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
break;
case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
break;
case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
break;
case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
break;
}
break;
case NVPTXISD::LDUV4:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
default:
return NULL;
case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
break;
case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
break;
case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
break;
case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
break;
}
break;
}
- } else {
- switch (N->getOpcode()) {
- default:
- return NULL;
- case NVPTXISD::LDGV2:
- switch (RetVT.getSimpleVT().SimpleTy) {
+
+ SDValue Ops[] = { Addr, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
+ ArrayRef<SDValue>(Ops, 2));
+ } else if (Subtarget.is64Bit()
+ ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
default:
return NULL;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+ case NVPTXISD::LDGV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
+ break;
+ }
break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+ case NVPTXISD::LDUV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
+ break;
+ }
break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+ case NVPTXISD::LDGV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
+ break;
+ }
break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+ case NVPTXISD::LDUV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
+ break;
+ }
break;
}
- break;
- case NVPTXISD::LDGV4:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ } else {
+ switch (N->getOpcode()) {
default:
return NULL;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+ case NVPTXISD::LDGV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
+ break;
+ }
break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+ case NVPTXISD::LDUV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
+ break;
+ }
break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+ case NVPTXISD::LDGV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
+ break;
+ }
break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+ case NVPTXISD::LDUV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
+ break;
+ }
break;
}
- break;
- case NVPTXISD::LDUV2:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ }
+
+ SDValue Ops[] = { Base, Offset, Chain };
+
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
+ ArrayRef<SDValue>(Ops, 3));
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
default:
return NULL;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+ case NVPTXISD::LDGV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
+ break;
+ }
break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+ case NVPTXISD::LDUV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
+ break;
+ }
break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+ case NVPTXISD::LDGV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
+ break;
+ }
break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+ case NVPTXISD::LDUV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
+ break;
+ }
break;
}
- break;
- case NVPTXISD::LDUV4:
- switch (RetVT.getSimpleVT().SimpleTy) {
+ } else {
+ switch (N->getOpcode()) {
default:
return NULL;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+ case NVPTXISD::LDGV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
+ break;
+ }
break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+ case NVPTXISD::LDUV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
+ break;
+ }
break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+ case NVPTXISD::LDGV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
+ break;
+ }
break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+ case NVPTXISD::LDUV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
+ break;
+ }
break;
}
- break;
}
- }
- SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ SDValue Ops[] = { Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
+ ArrayRef<SDValue>(Ops, 2));
+ }
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 93cdfef1fe..14049b1cf8 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1342,20 +1342,38 @@ int_nvvm_ldu_global_p>;
// Elementized vector ldu
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
- def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins Int32Regs:$src),
+ def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins MEMri:$src),
!strconcat("ldu.global.", TyStr), []>;
- def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
- (ins Int64Regs:$src),
+ def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins MEMri64:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins imemAny:$src),
!strconcat("ldu.global.", TyStr), []>;
}
-multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
- def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins Int32Regs:$src),
+multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins MEMri:$src),
!strconcat("ldu.global.", TyStr), []>;
- def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
- regclass:$dst4), (ins Int64Regs:$src),
+ def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins MEMri64:$src),
+ !strconcat("ldu.global.", TyStr), []>;
+ def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins imemAny:$src),
!strconcat("ldu.global.", TyStr), []>;
}
@@ -1452,20 +1470,38 @@ defm INT_PTX_LDG_GLOBAL_p64
// Elementized vector ldg
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
- def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
- def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr), []>;
+ def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins imemAny:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
- def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
- regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
+ def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr), []>;
- def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
- regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
+ def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
+ regclass:$dst4), (ins imemAny:$src),
!strconcat("ld.global.nc.", TyStr), []>;
}