summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2013-02-12 14:18:49 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2013-02-12 14:18:49 +0000
commit7eacad03efda36e09ebd96e95d7891cadaaa9087 (patch)
treec66658286eca956701f8334550a8edefe236b468 /lib/Target
parentc8a196ae8fad3cba7a777e2e7916fd36ebf70fe6 (diff)
downloadllvm-7eacad03efda36e09ebd96e95d7891cadaaa9087.tar.gz
llvm-7eacad03efda36e09ebd96e95d7891cadaaa9087.tar.bz2
llvm-7eacad03efda36e09ebd96e95d7891cadaaa9087.tar.xz
[NVPTX] Disable vector registers
Vectors were being manually scalarized by the backend. Instead, let the target-independent code do all of the work. The manual scalarization was from a time before good target-independent support for scalarization in LLVM. However, this forces us to specially-handle vector loads and stores, which we can turn into PTX instructions that produce/consume multiple operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174968 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/NVPTX.h1
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp46
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp809
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h7
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp458
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h21
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td96
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td145
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp156
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td44
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp1
-rw-r--r--lib/Target/NVPTX/VectorElementize.cpp1239
-rw-r--r--lib/Target/NVPTX/gen-register-defs.py202
17 files changed, 1274 insertions, 1997 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7cb16b4dd8..47baef6696 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,7 +22,6 @@ set(NVPTXCodeGen_sources
NVPTXAllocaHoisting.cpp
NVPTXAsmPrinter.cpp
NVPTXUtilities.cpp
- VectorElementize.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 097b50aa4e..b46ea881c4 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -53,7 +53,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
-FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3f99d1d0e4..0115e1f5d3 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -503,21 +503,7 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
O << getNVPTXRegClassStr(RC) << mapped_vr;
return;
}
- // Vector virtual register
- if (getNVPTXVectorSize(RC) == 4)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
- << "}";
- else if (getNVPTXVectorSize(RC) == 2)
- O << "{"
- << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
- << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
- << "}";
- else
- llvm_unreachable("Unsupported vector size");
+ report_fatal_error("Bad register!");
}
void
@@ -2024,29 +2010,9 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::StoreParamI64: case NVPTX::StoreParamI8:
case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8:
case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16:
- case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64:
- case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32:
- case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8:
- case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16:
- case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8:
- case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64:
- case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32:
- case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8:
- case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16:
- case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8:
case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64:
case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32:
case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8:
- case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64:
- case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32:
- case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8:
- case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16:
- case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8:
- case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64:
- case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32:
- case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8:
- case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16:
- case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8:
case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64:
case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32:
case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64:
@@ -2057,16 +2023,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64:
case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32:
case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8:
- case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64:
- case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32:
- case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8:
- case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16:
- case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8:
- case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64:
- case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32:
- case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8:
- case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16:
- case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8:
case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE:
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 36ab7f5c15..481f13afd1 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -105,6 +105,21 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
ResNode = SelectStore(N);
break;
+ case NVPTXISD::LoadV2:
+ case NVPTXISD::LoadV4:
+ ResNode = SelectLoadVector(N);
+ break;
+ case NVPTXISD::LDGV2:
+ case NVPTXISD::LDGV4:
+ case NVPTXISD::LDUV2:
+ case NVPTXISD::LDUV4:
+ ResNode = SelectLDGLDUVector(N);
+ break;
+ case NVPTXISD::StoreV2:
+ case NVPTXISD::StoreV4:
+ ResNode = SelectStoreVector(N);
+ break;
+ default: break;
}
if (ResNode)
return ResNode;
@@ -214,16 +229,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_avar; break;
case MVT::f32: Opcode = NVPTX::LD_f32_avar; break;
case MVT::f64: Opcode = NVPTX::LD_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@@ -244,16 +249,6 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
case MVT::i64: Opcode = NVPTX::LD_i64_asi; break;
case MVT::f32: Opcode = NVPTX::LD_f32_asi; break;
case MVT::f64: Opcode = NVPTX::LD_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { getI32Imm(isVolatile),
@@ -267,24 +262,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N1.getNode(), N1, Base, Offset):
SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_ari; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@@ -296,24 +293,26 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
MVT::Other, Ops, 8);
}
else {
- switch (TargetVT) {
- case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (TargetVT) {
+ case MVT::i8: Opcode = NVPTX::LD_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::LD_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::LD_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::LD_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::LD_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::LD_f64_areg; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { getI32Imm(isVolatile),
getI32Imm(codeAddrSpace),
@@ -334,6 +333,370 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return NVPTXLD;
}
+SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT LoadedVT = MemSD->getMemoryVT();
+
+
+ if (!LoadedVT.isSimple())
+ return NULL;
+
+ // Address Space Setting
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Vector Setting
+ MVT SimpleVT = LoadedVT.getSimpleVT();
+
+ // Type Setting: fromType + fromTypeWidth
+ //
+ // Sign : ISD::SEXTLOAD
+ // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
+ // type is integer
+ // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT ScalarVT = SimpleVT.getScalarType();
+ unsigned FromTypeWidth = ScalarVT.getSizeInBits();
+ unsigned int FromType;
+ // The last operand holds the original LoadSDNode::getExtensionType() value
+ unsigned ExtensionType =
+ cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
+ if (ExtensionType == ISD::SEXTLOAD)
+ FromType = NVPTX::PTXLdStInstCode::Signed;
+ else if (ScalarVT.isFloatingPoint())
+ FromType = NVPTX::PTXLdStInstCode::Float;
+ else
+ FromType = NVPTX::PTXLdStInstCode::Unsigned;
+
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break;
+ case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break;
+ default: return NULL;
+ }
+
+ EVT EltVT = N->getValueType(0);
+
+ if (SelectDirectAddr(Op1, Addr)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Addr, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
+ SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break;
+ }
+ break;
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Base, Offset, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
+ SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Base, Offset, Chain };
+
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LoadV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break;
+ case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break;
+ case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break;
+ }
+ break;
+ case NVPTXISD::LoadV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break;
+ case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break;
+ case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break;
+ case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { getI32Imm(IsVolatile),
+ getI32Imm(CodeAddrSpace),
+ getI32Imm(VecType),
+ getI32Imm(FromType),
+ getI32Imm(FromTypeWidth),
+ Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ }
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *LD;
+
+ EVT RetVT = N->getValueType(0);
+
+ // Select opcode
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::LDGV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDGV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDUV2:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
+ case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
+ case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
+ }
+ break;
+ case NVPTXISD::LDUV4:
+ switch (RetVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
+ case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
+ case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
+ case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
+ }
+ break;
+ }
+ }
+
+ SDValue Ops[] = { Op1, Chain };
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return LD;
+}
+
+
SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -400,16 +763,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_avar; break;
case MVT::f32: Opcode = NVPTX::ST_f32_avar; break;
case MVT::f64: Opcode = NVPTX::ST_f64_avar; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@@ -431,16 +784,6 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
case MVT::i64: Opcode = NVPTX::ST_i64_asi; break;
case MVT::f32: Opcode = NVPTX::ST_f32_asi; break;
case MVT::f64: Opcode = NVPTX::ST_f64_asi; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break;
default: return NULL;
}
SDValue Ops[] = { N1,
@@ -455,24 +798,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
} else if (Subtarget.is64Bit()?
SelectADDRri64(N2.getNode(), N2, Base, Offset):
SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_ari; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_ari; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_ari; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_ari; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_ari; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_ari; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@@ -484,24 +829,26 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
NVPTXST = CurDAG->getMachineNode(Opcode, dl,
MVT::Other, Ops, 9);
} else {
- switch (SourceVT) {
- case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
- case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
- case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
- case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
- case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
- case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
- case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break;
- case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break;
- case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break;
- case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break;
- case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break;
- case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break;
- case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break;
- case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break;
- case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break;
- case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break;
- default: return NULL;
+ if (Subtarget.is64Bit()) {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break;
+ default: return NULL;
+ }
+ } else {
+ switch (SourceVT) {
+ case MVT::i8: Opcode = NVPTX::ST_i8_areg; break;
+ case MVT::i16: Opcode = NVPTX::ST_i16_areg; break;
+ case MVT::i32: Opcode = NVPTX::ST_i32_areg; break;
+ case MVT::i64: Opcode = NVPTX::ST_i64_areg; break;
+ case MVT::f32: Opcode = NVPTX::ST_f32_areg; break;
+ case MVT::f64: Opcode = NVPTX::ST_f64_areg; break;
+ default: return NULL;
+ }
}
SDValue Ops[] = { N1,
getI32Imm(isVolatile),
@@ -523,6 +870,244 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return NVPTXST;
}
+SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Addr, Offset, Base;
+ unsigned Opcode;
+ DebugLoc DL = N->getDebugLoc();
+ SDNode *ST;
+ EVT EltVT = Op1.getValueType();
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+ EVT StoreVT = MemSD->getMemoryVT();
+
+ // Address Space Setting
+ unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+
+ if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
+ report_fatal_error("Cannot store to pointer that points to constant "
+ "memory space");
+ }
+
+ // Volatile Setting
+ // - .volatile is only availalble for .global and .shared
+ bool IsVolatile = MemSD->isVolatile();
+ if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
+ CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
+ IsVolatile = false;
+
+ // Type Setting: toType + toTypeWidth
+ // - for integer type, always use 'u'
+ assert(StoreVT.isSimple() && "Store value is not simple");
+ MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
+ unsigned ToTypeWidth = ScalarVT.getSizeInBits();
+ unsigned ToType;
+ if (ScalarVT.isFloatingPoint())
+ ToType = NVPTX::PTXLdStInstCode::Float;
+ else
+ ToType = NVPTX::PTXLdStInstCode::Unsigned;
+
+
+ SmallVector<SDValue, 12> StOps;
+ SDValue N2;
+ unsigned VecType;
+
+ switch (N->getOpcode()) {
+ case NVPTXISD::StoreV2:
+ VecType = NVPTX::PTXLdStInstCode::V2;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ N2 = N->getOperand(3);
+ break;
+ case NVPTXISD::StoreV4:
+ VecType = NVPTX::PTXLdStInstCode::V4;
+ StOps.push_back(N->getOperand(1));
+ StOps.push_back(N->getOperand(2));
+ StOps.push_back(N->getOperand(3));
+ StOps.push_back(N->getOperand(4));
+ N2 = N->getOperand(5);
+ break;
+ default: return NULL;
+ }
+
+ StOps.push_back(getI32Imm(IsVolatile));
+ StOps.push_back(getI32Imm(CodeAddrSpace));
+ StOps.push_back(getI32Imm(VecType));
+ StOps.push_back(getI32Imm(ToType));
+ StOps.push_back(getI32Imm(ToTypeWidth));
+
+ if (SelectDirectAddr(N2, Addr)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break;
+ }
+ break;
+ }
+ StOps.push_back(Addr);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRsi64(N2.getNode(), N2, Base, Offset):
+ SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break;
+ }
+ break;
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else if (Subtarget.is64Bit()?
+ SelectADDRri64(N2.getNode(), N2, Base, Offset):
+ SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(Base);
+ StOps.push_back(Offset);
+ } else {
+ if (Subtarget.is64Bit()) {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break;
+ }
+ break;
+ }
+ } else {
+ switch (N->getOpcode()) {
+ default: return NULL;
+ case NVPTXISD::StoreV2:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break;
+ case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break;
+ case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break;
+ }
+ break;
+ case NVPTXISD::StoreV4:
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default: return NULL;
+ case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break;
+ case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break;
+ case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break;
+ case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break;
+ }
+ break;
+ }
+ }
+ StOps.push_back(N2);
+ }
+
+ StOps.push_back(Chain);
+
+ ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return ST;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 14f2091a3f..4ec924117a 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -72,8 +72,11 @@ private:
#include "NVPTXGenDAGISel.inc"
SDNode *Select(SDNode *N);
- SDNode* SelectLoad(SDNode *N);
- SDNode* SelectStore(SDNode *N);
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectLoadVector(SDNode *N);
+ SDNode *SelectLDGLDUVector(SDNode *N);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectStoreVector(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2699cea8f6..9ba2a1d984 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -45,15 +45,27 @@ using namespace llvm;
static unsigned int uniqueCallSite = 0;
static cl::opt<bool>
-RetainVectorOperands("nvptx-codegen-vectors",
- cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"),
- cl::init(true));
-
-static cl::opt<bool>
sched4reg("nvptx-sched4reg",
cl::desc("NVPTX Specific: schedule for register pressue"),
cl::init(false));
+static bool IsPTXVectorType(MVT VT) {
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::v2i8:
+ case MVT::v4i8:
+ case MVT::v2i16:
+ case MVT::v4i16:
+ case MVT::v2i32:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return true;
+ }
+}
+
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
: TargetLowering(TM, new NVPTXTargetObjectFile()),
@@ -87,41 +99,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
- if (RetainVectorOperands) {
- addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass);
- addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass);
- addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass);
- addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass);
- addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass);
- addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass);
- addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass);
- addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass);
- addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass);
- addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom);
-
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom);
- }
-
// Operations not directly supported by NVPTX.
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
@@ -191,42 +168,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- // By default, CONCAT_VECTORS is implemented via store/load
- // through stack. It is slow and uses local memory. We need
- // to custom-lowering them.
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom);
-
- // Expand vector int to float and float to int conversions
- // - For SINT_TO_FP and UINT_TO_FP, the src type
- // (Node->getOperand(0).getValueType())
- // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT,
- // the dest type (Node->getValueType(0)) is used.
- //
- // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector
- // case, and
- // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case.
- //
- // That is why v4i32 or v2i32 are used here.
- //
- // The expansion for vectors happens in VectorLegalizer::LegalizeOp()
- // (LegalizeVectorOps.cpp).
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ // Register custom handling for vector loads/stores
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ if (IsPTXVectorType(VT)) {
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
+ }
+ }
// Now deduce the information based on the above mentioned
// actions
@@ -268,6 +219,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
+ case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
+ case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
+ case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
+ case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
+ case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
+ case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
+ case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
}
}
@@ -868,12 +827,19 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
+SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::i1)
+ return LowerLOADi1(Op, DAG);
+ else
+ return SDValue();
+}
+
// v = ld i1* addr
// =>
// v1 = ld i8* addr
// v = trunc v1 to i1
SDValue NVPTXTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
DebugLoc dl = Node->getDebugLoc();
@@ -893,12 +859,109 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ EVT ValVT = Op.getOperand(1).getValueType();
+ if (ValVT == MVT::i1)
+ return LowerSTOREi1(Op, DAG);
+ else if (ValVT.isVector())
+ return LowerSTOREVector(Op, DAG);
+ else
+ return SDValue();
+}
+
+SDValue
+NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
+ SDNode *N = Op.getNode();
+ SDValue Val = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT ValVT = Val.getValueType();
+
+ if (ValVT.isVector()) {
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 stores of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ if (!ValVT.isSimple())
+ return SDValue();
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ unsigned Opcode = 0;
+ EVT EltVT = ValVT.getVectorElementType();
+ unsigned NumElts = ValVT.getVectorNumElements();
+
+ // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // stored type to i16 and propogate the "real" type as the memory type.
+ bool NeedExt = false;
+ if (EltVT.getSizeInBits() < 16)
+ NeedExt = true;
+
+ switch (NumElts) {
+ default: return SDValue();
+ case 2:
+ Opcode = NVPTXISD::StoreV2;
+ break;
+ case 4: {
+ Opcode = NVPTXISD::StoreV4;
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> Ops;
+
+ // First is the chain
+ Ops.push_back(N->getOperand(0));
+
+ // Then the split values
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+ DAG.getIntPtrConstant(i));
+ if (NeedExt)
+ // ANY_EXTEND is correct here since the store will only look at the
+ // lower-order bits anyway.
+ ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ Ops.push_back(ExtVal);
+ }
+
+ // Then any remaining arguments
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+
+ MemSDNode *MemSD = cast<MemSDNode>(N);
+
+ SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
+ DAG.getVTList(MVT::Other), &Ops[0],
+ Ops.size(), MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
+
+
+ //return DCI.CombineTo(N, NewSt, true);
+ return NewSt;
+ }
+
+ return SDValue();
+}
+
// st i1 v, addr
// =>
// v1 = zxt v to i8
// st i8, addr
SDValue NVPTXTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -1348,3 +1411,242 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
}
+
+/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
+static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue>& Results) {
+ EVT ResVT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ assert(ResVT.isVector() && "Vector load must have vector type");
+
+ // We only handle "native" vector sizes for now, e.g. <4 x double> is not
+ // legal. We can (and should) split that into 2 loads of <2 x double> here
+ // but I'm leaving that as a TODO for now.
+ assert(ResVT.isSimple() && "Can only handle simple types");
+ switch (ResVT.getSimpleVT().SimpleTy) {
+ default: return;
+ case MVT::v2i8:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v2i64:
+ case MVT::v2f32:
+ case MVT::v2f64:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ // This is a "native" vector type
+ break;
+ }
+
+ EVT EltVT = ResVT.getVectorElementType();
+ unsigned NumElts = ResVT.getVectorNumElements();
+
+ // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default: return;
+ case 2:
+ Opcode = NVPTXISD::LoadV2;
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ Opcode = NVPTXISD::LoadV4;
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+
+ // The select routine does not have access to the LoadSDNode instance, so
+ // pass along the extension information
+ OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+ OtherOps.size(), LD->getMemoryVT(),
+ LD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+}
+
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Intrin = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Get the intrinsic ID
+ unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
+ switch(IntrinNo) {
+ default: return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p: {
+ EVT ResVT = N->getValueType(0);
+
+ if (ResVT.isVector()) {
+ // Vector LDG/LDU
+
+ unsigned NumElts = ResVT.getVectorNumElements();
+ EVT EltVT = ResVT.getVectorElementType();
+
+ // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
+ // Therefore, we must ensure the type is legal. For i1 and i8, we set the
+ // loaded type to i16 and propogate the "real" type as the memory type.
+ bool NeedTrunc = false;
+ if (EltVT.getSizeInBits() < 16) {
+ EltVT = MVT::i16;
+ NeedTrunc = true;
+ }
+
+ unsigned Opcode = 0;
+ SDVTList LdResVTs;
+
+ switch (NumElts) {
+ default: return;
+ case 2:
+ switch(IntrinNo) {
+ default: return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV2;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV2;
+ break;
+ }
+ LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
+ break;
+ case 4: {
+ switch(IntrinNo) {
+ default: return;
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p:
+ Opcode = NVPTXISD::LDGV4;
+ break;
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_p:
+ Opcode = NVPTXISD::LDUV4;
+ break;
+ }
+ EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
+ LdResVTs = DAG.getVTList(ListVTs, 5);
+ break;
+ }
+ }
+
+ SmallVector<SDValue, 8> OtherOps;
+
+ // Copy regular operands
+
+ OtherOps.push_back(Chain); // Chain
+ // Skip operand 1 (intrinsic ID)
+ // Others
+ for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
+ OtherOps.push_back(N->getOperand(i));
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
+ OtherOps.size(), MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
+
+ SmallVector<SDValue, 4> ScalarRes;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
+
+ SDValue LoadChain = NewLD.getValue(NumElts);
+
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+
+ Results.push_back(BuildVec);
+ Results.push_back(LoadChain);
+ } else {
+ // i8 LDG/LDU
+ assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
+ "Custom handling of non-i8 ldu/ldg?");
+
+ // Just copy all operands as-is
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
+ // Force output to i16
+ SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
+
+ MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
+
+ // We make sure the memory type is i8, which will be used during isel
+ // to select the proper instruction.
+ SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
+ LdResVTs, &Ops[0],
+ Ops.size(), MVT::i8,
+ MemSD->getMemOperand());
+
+ Results.push_back(NewLD.getValue(0));
+ Results.push_back(NewLD.getValue(1));
+ }
+ }
+ }
+}
+
+void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default: report_fatal_error("Unhandled custom legalization");
+ case ISD::LOAD:
+ ReplaceLoadVector(N, DAG, Results);
+ return;
+ case ISD::INTRINSIC_W_CHAIN:
+ ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
+ return;
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 0a1833a7c9..95e7b55c48 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -58,7 +58,16 @@ enum NodeType {
RETURN,
CallSeqBegin,
CallSeqEnd,
- Dummy
+ Dummy,
+
+ LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LoadV4,
+ LDGV2, // LDG.v2
+ LDGV4, // LDG.v4
+ LDUV2, // LDU.v2
+ LDUV4, // LDU.v4
+ StoreV2,
+ StoreV4
};
}
@@ -143,8 +152,16 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 6fe654cb49..9e73d80c28 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -65,46 +65,6 @@ void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
NVPTX::Float64RegsRegClass.contains(SrcReg))
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4F32RegsRegClass.contains(DestReg) &&
- NVPTX::V4F32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I32RegsRegClass.contains(DestReg) &&
- NVPTX::V4I32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2F32RegsRegClass.contains(DestReg) &&
- NVPTX::V2F32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I32RegsRegClass.contains(DestReg) &&
- NVPTX::V2I32RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I8RegsRegClass.contains(DestReg) &&
- NVPTX::V4I8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I8RegsRegClass.contains(DestReg) &&
- NVPTX::V2I8RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V4I16RegsRegClass.contains(DestReg) &&
- NVPTX::V4I16RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I16RegsRegClass.contains(DestReg) &&
- NVPTX::V2I16RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2I64RegsRegClass.contains(DestReg) &&
- NVPTX::V2I64RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (NVPTX::V2F64RegsRegClass.contains(DestReg) &&
- NVPTX::V2F64RegsRegClass.contains(SrcReg))
- BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
else {
llvm_unreachable("Don't know how to copy a register");
}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 8a410b8729..f43abe283b 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
def hasVote : Predicate<"Subtarget.hasVote()">;
def hasDouble : Predicate<"Subtarget.hasDouble()">;
def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
+def hasLDG : Predicate<"Subtarget.hasLDG()">;
def hasLDU : Predicate<"Subtarget.hasLDU()">;
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
@@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr];"), []>;
+ def _areg_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr];"), []>;
def _ari : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t$dst, [$addr+$offset];"), []>;
+ def _ari_64 : NVPTXInst<(outs regclass:$dst),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
+ " \t$dst, [$addr+$offset];"), []>;
def _asi : NVPTXInst<(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
-let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in {
-defm LD_v2i8 : LD<V2I8Regs>;
-defm LD_v4i8 : LD<V4I8Regs>;
-defm LD_v2i16 : LD<V2I16Regs>;
-defm LD_v4i16 : LD<V4I16Regs>;
-defm LD_v2i32 : LD<V2I32Regs>;
-defm LD_v4i32 : LD<V4I32Regs>;
-defm LD_v2f32 : LD<V2F32Regs>;
-defm LD_v4f32 : LD<V4F32Regs>;
-defm LD_v2i64 : LD<V2I64Regs>;
-defm LD_v2f64 : LD<V2F64Regs>;
-}
-
multiclass ST<NVPTXRegClass regclass> {
def _avar : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
@@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> {
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr], $src;"), []>;
+ def _areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr], $src;"), []>;
def _ari : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
" \t[$addr+$offset], $src;"), []>;
+ def _ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
+ LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
+ "\t[$addr+$offset], $src;"), []>;
def _asi : NVPTXInst<(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
@@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
-let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in {
-defm ST_v2i8 : ST<V2I8Regs>;
-defm ST_v4i8 : ST<V4I8Regs>;
-defm ST_v2i16 : ST<V2I16Regs>;
-defm ST_v4i16 : ST<V4I16Regs>;
-defm ST_v2i32 : ST<V2I32Regs>;
-defm ST_v4i32 : ST<V4I32Regs>;
-defm ST_v2f32 : ST<V2F32Regs>;
-defm ST_v4f32 : ST<V4F32Regs>;
-defm ST_v2i64 : ST<V2I64Regs>;
-defm ST_v2f64 : ST<V2F64Regs>;
-}
-
// The following is used only in and after vector elementizations.
// Vector elementization happens at the machine instruction level, so the
// following instruction
@@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),
@@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
+ def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4),
+ (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
+ []>;
def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
+ def _v2_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
def _v2_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
i32imm:$offset),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
+ def _v2_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
+ i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
def _v2_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
@@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
i32imm:$fromWidth, Int32Regs:$addr),
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
+ def _v4_areg_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
def _v4_ari : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
+ def _v4_ari_64 : NVPTXInst<(outs),
+ (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
+ LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
+ i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
+ !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
+ "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
+ []>;
def _v4_asi : NVPTXInst<(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
@@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins),
"trap;",
[(trap)]>;
-include "NVPTXVector.td"
-
include "NVPTXIntrinsics.td"
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 028a94bfd1..49e2568dfa 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
-// Vector ldu
-multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp,
- NVPTXInst eleInst, NVPTXInst eleInst64> {
- def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>,
- Requires<[hasLDU]>;
- def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>,
- Requires<[hasLDU]>;
+
+//-----------------------------------
+// Support for ldg on sm_35 or later
+//-----------------------------------
+
+def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ return M->getMemoryVT() == MVT::i8;
+}]>;
+
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDG]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ld.global.nc.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+}
+
+defm INT_PTX_LDG_GLOBAL_i8
+ : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
+defm INT_PTX_LDG_GLOBAL_i16
+ : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_i64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
+defm INT_PTX_LDG_GLOBAL_f32
+ : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_f64
+ : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+defm INT_PTX_LDG_GLOBAL_p32
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
+defm INT_PTX_LDG_GLOBAL_p64
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
+
+// vector
+
+// Elementized vector ldg
+multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
+ (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
-let VecInstType=isVecLD.Value in {
-defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];",
- V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32,
- INT_PTX_LDU_G_v2i8_ELE_64>;
-defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];",
- V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32,
- INT_PTX_LDU_G_v4i8_ELE_64>;
-defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];",
- V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32,
- INT_PTX_LDU_G_v2i16_ELE_64>;
-defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];",
- V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32,
- INT_PTX_LDU_G_v4i16_ELE_64>;
-defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];",
- V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32,
- INT_PTX_LDU_G_v2i32_ELE_64>;
-defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];",
- V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32,
- INT_PTX_LDU_G_v4i32_ELE_64>;
-defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];",
- V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32,
- INT_PTX_LDU_G_v2f32_ELE_64>;
-defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];",
- V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32,
- INT_PTX_LDU_G_v4f32_ELE_64>;
-defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];",
- V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32,
- INT_PTX_LDU_G_v2i64_ELE_64>;
-defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];",
- V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32,
- INT_PTX_LDU_G_v2f64_ELE_64>;
+multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
+ def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
+ def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
+ regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src),
+ !strconcat("ld.global.nc.", TyStr), []>;
}
+// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
+defm INT_PTX_LDG_G_v2i8_ELE
+ : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i16_ELE
+ : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v2i32_ELE
+ : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f32_ELE
+ : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
+defm INT_PTX_LDG_G_v2i64_ELE
+ : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
+defm INT_PTX_LDG_G_v2f64_ELE
+ : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
+defm INT_PTX_LDG_G_v4i8_ELE
+ : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i16_ELE
+ : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
+defm INT_PTX_LDG_G_v4i32_ELE
+ : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f32_ELE
+ : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index e0c9161f31..8e105b5028 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -54,36 +54,6 @@ std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
- else if (RC == &NVPTX::V2F32RegsRegClass) {
- return ".v2.f32";
- }
- else if (RC == &NVPTX::V4F32RegsRegClass) {
- return ".v4.f32";
- }
- else if (RC == &NVPTX::V2I32RegsRegClass) {
- return ".v2.s32";
- }
- else if (RC == &NVPTX::V4I32RegsRegClass) {
- return ".v4.s32";
- }
- else if (RC == &NVPTX::V2F64RegsRegClass) {
- return ".v2.f64";
- }
- else if (RC == &NVPTX::V2I64RegsRegClass) {
- return ".v2.s64";
- }
- else if (RC == &NVPTX::V2I16RegsRegClass) {
- return ".v2.s16";
- }
- else if (RC == &NVPTX::V4I16RegsRegClass) {
- return ".v4.s16";
- }
- else if (RC == &NVPTX::V2I8RegsRegClass) {
- return ".v2.s16";
- }
- else if (RC == &NVPTX::V4I8RegsRegClass) {
- return ".v4.s16";
- }
else {
return "INTERNAL";
}
@@ -115,137 +85,11 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
else if (RC == &NVPTX::SpecialRegsRegClass) {
return "!Special!";
}
- else if (RC == &NVPTX::V2F32RegsRegClass) {
- return "%v2f";
- }
- else if (RC == &NVPTX::V4F32RegsRegClass) {
- return "%v4f";
- }
- else if (RC == &NVPTX::V2I32RegsRegClass) {
- return "%v2r";
- }
- else if (RC == &NVPTX::V4I32RegsRegClass) {
- return "%v4r";
- }
- else if (RC == &NVPTX::V2F64RegsRegClass) {
- return "%v2fd";
- }
- else if (RC == &NVPTX::V2I64RegsRegClass) {
- return "%v2rd";
- }
- else if (RC == &NVPTX::V2I16RegsRegClass) {
- return "%v2s";
- }
- else if (RC == &NVPTX::V4I16RegsRegClass) {
- return "%v4rs";
- }
- else if (RC == &NVPTX::V2I8RegsRegClass) {
- return "%v2rc";
- }
- else if (RC == &NVPTX::V4I8RegsRegClass) {
- return "%v4rc";
- }
else {
return "INTERNAL";
}
return "";
}
-
-bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return true;
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return true;
- return false;
-}
-
-std::string getNVPTXElemClassName(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass);
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass);
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass);
- llvm_unreachable("Not a vector register class");
-}
-
-const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return (&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return (&NVPTX::Float64RegsRegClass);
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return (&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return (&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return (&NVPTX::Int64RegsRegClass);
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return (&NVPTX::Int8RegsRegClass);
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return (&NVPTX::Float32RegsRegClass);
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return (&NVPTX::Int16RegsRegClass);
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return (&NVPTX::Int32RegsRegClass);
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return (&NVPTX::Int8RegsRegClass);
- llvm_unreachable("Not a vector register class");
-}
-
-int getNVPTXVectorSize(TargetRegisterClass const *RC) {
- if (RC->getID() == NVPTX::V2F32RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2F64RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I16RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I32RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I64RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V2I8RegsRegClassID)
- return 2;
- if (RC->getID() == NVPTX::V4F32RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I16RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I32RegsRegClassID)
- return 4;
- if (RC->getID() == NVPTX::V4I8RegsRegClassID)
- return 4;
- llvm_unreachable("Not a vector register class");
-}
}
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index a3e1252b49..56e6289b1b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -81,10 +81,6 @@ public:
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
-bool isNVPTXVectorRegClass (const TargetRegisterClass *RC);
-std::string getNVPTXElemClassName (const TargetRegisterClass *RC);
-int getNVPTXVectorSize (const TargetRegisterClass *RC);
-const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC);
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index ba158258b9..8d100d6316 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -37,9 +37,6 @@ foreach i = 0-395 in {
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
- // Vectors
- foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in
- def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>;
// Arguments
def ia#i : NVPTXReg<"%ia"#i>;
@@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
- NVPTXRegClass sClass,
- int e,
- string n>
- : NVPTXRegClass<regTypes, alignment, regList>
-{
- NVPTXRegClass scalarClass=sClass;
- int elems=e;
- string name=n;
-}
-def V2F32Regs
- : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)),
- Float32Regs, 2, ".v2.f32">;
-def V4F32Regs
- : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)),
- Float32Regs, 4, ".v4.f32">;
-def V2I32Regs
- : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)),
- Int32Regs, 2, ".v2.u32">;
-def V4I32Regs
- : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)),
- Int32Regs, 4, ".v4.u32">;
-def V2F64Regs
- : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)),
- Float64Regs, 2, ".v2.f64">;
-def V2I64Regs
- : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)),
- Int64Regs, 2, ".v2.u64">;
-def V2I16Regs
- : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)),
- Int16Regs, 2, ".v2.u16">;
-def V4I16Regs
- : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)),
- Int16Regs, 4, ".v4.u16">;
-def V2I8Regs
- : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)),
- Int8Regs, 2, ".v2.u8">;
-def V4I8Regs
- : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)),
- Int8Regs, 4, ".v4.u8">;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index e6cb7c2def..beea77e38d 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -57,6 +57,7 @@ public:
bool hasF32FTZ() const { return SmVersion >= 20; }
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
+ bool hasLDG() const { return SmVersion >= 32; }
bool hasLDU() const { return SmVersion >= 20; }
bool hasGenericLdSt() const { return SmVersion >= 20; }
inline bool hasHWROT32() const { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index b4e049ea3e..cd765fa8cb 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -123,7 +123,6 @@ bool NVPTXPassConfig::addInstSelector() {
addPass(createSplitBBatBarPass());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
- addPass(createVectorElementizePass(getNVPTXTargetMachine()));
return false;
}
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
deleted file mode 100644
index f1b285dd78..0000000000
--- a/lib/Target/NVPTX/VectorElementize.cpp
+++ /dev/null
@@ -1,1239 +0,0 @@
-//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts operations on vector types to operations on their
-// element types.
-//
-// For generic binary and unary vector instructions, the conversion is simple.
-// Suppose we have
-// av = bv Vop cv
-// where av, bv, and cv are vector virtual registers, and Vop is a vector op.
-// This gets converted to the following :
-// a1 = b1 Sop c1
-// a2 = b2 Sop c2
-//
-// VectorToScalarMap maintains the vector vreg to scalar vreg mapping.
-// For the above example, the map will look as follows:
-// av => [a1, a2]
-// bv => [b1, b2]
-//
-// In addition, initVectorInfo creates the following opcode->opcode map.
-// Vop => Sop
-// OtherVop => OtherSop
-// ...
-//
-// For vector specific instructions like vecbuild, vecshuffle etc, the
-// conversion is different. Look at comments near the functions with
-// prefix createVec<...>.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass {
- virtual bool runOnMachineFunction(MachineFunction &F);
-
- NVPTXTargetMachine &TM;
- MachineRegisterInfo *MRI;
- const NVPTXRegisterInfo *RegInfo;
- const NVPTXInstrInfo *InstrInfo;
-
- llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *>
- RegClassMap;
- llvm::DenseMap<unsigned, bool> SimpleMoveMap;
-
- llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap;
-
- bool isVectorInstr(MachineInstr *);
-
- SmallVector<unsigned, 4> getScalarRegisters(unsigned);
- unsigned getScalarVersion(unsigned);
- unsigned getScalarVersion(MachineInstr *);
-
- bool isVectorRegister(unsigned);
- const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC);
- unsigned numCopiesNeeded(MachineInstr *);
-
- void createLoadCopy(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
- void createStoreCopy(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- void createVecDest(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- void createCopies(MachineFunction&, MachineInstr *,
- std::vector<MachineInstr *>&);
-
- unsigned copyProp(MachineFunction&);
- unsigned removeDeadMoves(MachineFunction&);
-
- void elementize(MachineFunction&);
-
- bool isSimpleMove(MachineInstr *);
-
- void createVecShuffle(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecExtract(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecInsert(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
- void createVecBuild(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies);
-
-public:
-
- static char ID; // Pass identification, replacement for typeid
- VectorElementize(NVPTXTargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) {}
-
- virtual const char *getPassName() const {
- return "Convert LLVM vector types to their element types";
- }
-};
-
-char VectorElementize::ID = 1;
-}
-
-static cl::opt<bool>
-RemoveRedundantMoves("nvptx-remove-redundant-moves",
- cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"),
- cl::init(true));
-
-#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \
- >> NVPTX::VecInstTypeShift)
-#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP)
-#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad)
-#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore)
-#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild)
-#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle)
-#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract)
-#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert)
-#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest)
-
-bool VectorElementize::isSimpleMove(MachineInstr *mi) {
- if (mi->isCopy())
- return true;
- unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask)
- >> NVPTX::SimpleMoveShift;
- return (TSFlags == 1);
-}
-
-bool VectorElementize::isVectorInstr(MachineInstr *mi) {
- if ((mi->getOpcode() == NVPTX::PHI) ||
- (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) {
- MachineOperand dest = mi->getOperand(0);
- return isVectorRegister(dest.getReg());
- }
- return ISVECINST(mi);
-}
-
-unsigned VectorElementize::getScalarVersion(MachineInstr *mi) {
- return getScalarVersion(mi->getOpcode());
-}
-
-///=============================================================================
-///Instr is assumed to be a vector instruction. For most vector instructions,
-///the size of the destination vector register gives the number of scalar copies
-///needed. For VecStore, size of getOperand(1) gives the number of scalar copies
-///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the
-///number of scalar copies needed.
-///=============================================================================
-unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) {
- unsigned numDefs=0;
- unsigned def;
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
-
- if (!oper.isReg()) continue;
- if (!oper.isDef()) continue;
- def = i;
- numDefs++;
- }
- assert((numDefs <= 1) && "Only 0 or 1 defs supported");
-
- if (numDefs == 1) {
- unsigned regnum = Instr->getOperand(def).getReg();
- if (ISVECEXTRACT(Instr))
- regnum = Instr->getOperand(1).getReg();
- return getNVPTXVectorSize(MRI->getRegClass(regnum));
- }
- else if (numDefs == 0) {
- assert(ISVECSTORE(Instr)
- && "Only 0 def instruction supported is vector store");
-
- unsigned regnum = Instr->getOperand(0).getReg();
- return getNVPTXVectorSize(MRI->getRegClass(regnum));
- }
- return 1;
-}
-
-const TargetRegisterClass *VectorElementize::
-getScalarRegClass(const TargetRegisterClass *RC) {
- assert(isNVPTXVectorRegClass(RC) &&
- "Not a vector register class");
- return getNVPTXElemClass(RC);
-}
-
-bool VectorElementize::isVectorRegister(unsigned reg) {
- const TargetRegisterClass *RC=MRI->getRegClass(reg);
- return isNVPTXVectorRegClass(RC);
-}
-
-///=============================================================================
-///For every vector register 'v' that is not already in the VectorToScalarMap,
-///create n scalar registers of the corresponding element type, where n
-///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap.
-///=============================================================================
-SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) {
- assert(isVectorRegister(regnum) && "Expecting a vector register here");
- // Create the scalar registers and put them in the map, if not already there.
- if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) {
- const TargetRegisterClass *vecClass = MRI->getRegClass(regnum);
- const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass);
-
- SmallVector<unsigned, 4> temp;
-
- for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i)
- temp.push_back(MRI->createVirtualRegister(scalarClass));
-
- VectorToScalarMap[regnum] = temp;
- }
- return VectorToScalarMap[regnum];
-}
-
-///=============================================================================
-///For a vector load of the form
-///va <= ldv2 [addr]
-///the following multi output instruction is created :
-///[v1, v2] <= LD [addr]
-///Look at NVPTXVector.td for the definitions of multi output loads.
-///=============================================================================
-void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstrBuilder copy(F, copies[0]);
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- // Remove the dest, that should be a vector operand.
- MachineOperand dest = copy->getOperand(0);
- unsigned regnum = dest.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy.addReg(scalarRegs[i], RegState::Define);
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy.addOperand(otherOperands[i]);
-
-}
-
-///=============================================================================
-///For a vector store of the form
-///stv2 va, [addr]
-///the following multi input instruction is created :
-///ST v1, v2, [addr]
-///Look at NVPTXVector.td for the definitions of multi input stores.
-///=============================================================================
-void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstrBuilder copy(F, copies[0]);
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- MachineOperand src = copy->getOperand(0);
- unsigned regnum = src.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy.addReg(scalarRegs[i]);
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy.addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///va <= shufflev2 vb, vc, <i1>, <i2>
-///gets converted to 2 moves into a1 and a2. The source of the moves depend on
-///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For
-///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move
-///instructions will be
-///a1 <= c2
-///a2 <= b1
-///=============================================================================
-void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
- unsigned src1regnum = Instr->getOperand(1).getReg();
- unsigned src2regnum = Instr->getOperand(2).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
- SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum);
- SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum);
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstrBuilder copy =
- BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
- MachineOperand which=Instr->getOperand(3+i);
- assert(which.isImm() && "Shuffle operand not a constant");
-
- int src=which.getImm();
- int elem=src%numcopies;
-
- if (which.getImm() < numcopies)
- copy.addReg(src1[elem]);
- else
- copy.addReg(src2[elem]);
- copies.push_back(copy);
- }
-}
-
-///=============================================================================
-///a <= extractv2 va, <i1>
-///gets turned into a simple move to the scalar register a. The source depends
-///on i1.
-///=============================================================================
-void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned srcregnum = Instr->getOperand(1).getReg();
-
- SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
- MachineOperand which = Instr->getOperand(2);
- assert(which.isImm() && "Extract operand not a constant");
-
- DebugLoc DL = Instr->getDebugLoc();
- copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
- Instr->getOperand(0).getReg())
- .addReg(src[which.getImm()]));
-}
-
-///=============================================================================
-///va <= vecinsertv2 vb, c, <i1>
-///This instruction copies all elements of vb to va, except the 'i1'th element.
-///The scalar value c becomes the 'i1'th element of va.
-///This gets translated to 2 (4 for vecinsertv4) moves.
-///=============================================================================
-void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
- unsigned srcregnum = Instr->getOperand(1).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
- SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum);
-
- MachineOperand which=Instr->getOperand(3);
- assert(which.isImm() && "Insert operand not a constant");
- unsigned int elem=which.getImm();
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++) {
- MachineInstrBuilder copy =
- BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]);
-
- if (i != elem)
- copy.addReg(src[i]);
- else
- copy.addOperand(Instr->getOperand(2));
-
- copies.push_back(copy);
- }
-
-}
-
-///=============================================================================
-///va <= buildv2 b1, b2
-///gets translated to
-///a1 <= b1
-///a2 <= b2
-///=============================================================================
-void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- unsigned numcopies=numCopiesNeeded(Instr);
-
- unsigned destregnum = Instr->getOperand(0).getReg();
-
- SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum);
-
- DebugLoc DL = Instr->getDebugLoc();
-
- for (unsigned i=0; i<numcopies; i++)
- copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)),
- dest[i])
- .addOperand(Instr->getOperand(1+i)));
-}
-
-///=============================================================================
-///For a tex inst of the form
-///va <= op [scalar operands]
-///the following multi output instruction is created :
-///[v1, v2] <= op' [scalar operands]
-///=============================================================================
-void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- copies.push_back(F.CloneMachineInstr(Instr));
-
- MachineInstrBuilder copy(F, copies[0]);
- copy->setDesc(InstrInfo->get(getScalarVersion(copy)));
-
- // Remove the dest, that should be a vector operand.
- MachineOperand dest = copy->getOperand(0);
- unsigned regnum = dest.getReg();
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy->RemoveOperand(0);
-
- std::vector<MachineOperand> otherOperands;
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- otherOperands.push_back(copy->getOperand(i));
-
- for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i)
- copy->RemoveOperand(0);
-
- for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i)
- copy.addReg(scalarRegs[i], RegState::Define);
-
- for (unsigned i=0, e=otherOperands.size(); i!=e; ++i)
- copy.addOperand(otherOperands[i]);
-}
-
-///=============================================================================
-///Look at the vector instruction type and dispatch to the createVec<...>
-///function that creates the scalar copies.
-///=============================================================================
-void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr,
- std::vector<MachineInstr *>& copies) {
- if (ISVECLOAD(Instr)) {
- createLoadCopy(F, Instr, copies);
- return;
- }
- if (ISVECSTORE(Instr)) {
- createStoreCopy(F, Instr, copies);
- return;
- }
- if (ISVECSHUFFLE(Instr)) {
- createVecShuffle(F, Instr, copies);
- return;
- }
- if (ISVECEXTRACT(Instr)) {
- createVecExtract(F, Instr, copies);
- return;
- }
- if (ISVECINSERT(Instr)) {
- createVecInsert(F, Instr, copies);
- return;
- }
- if (ISVECDEST(Instr)) {
- createVecDest(F, Instr, copies);
- return;
- }
- if (ISVECBUILD(Instr)) {
- createVecBuild(F, Instr, copies);
- return;
- }
-
- unsigned numcopies=numCopiesNeeded(Instr);
-
- for (unsigned i=0; i<numcopies; ++i)
- copies.push_back(F.CloneMachineInstr(Instr));
-
- for (unsigned i=0; i<numcopies; ++i) {
- MachineInstrBuilder copy(F, copies[i]);
-
- std::vector<MachineOperand> allOperands;
- std::vector<bool> isDef;
-
- for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) {
- MachineOperand oper = copy->getOperand(j);
- allOperands.push_back(oper);
- if (oper.isReg())
- isDef.push_back(oper.isDef());
- else
- isDef.push_back(false);
- }
-
- for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j)
- copy->RemoveOperand(0);
-
- copy->setDesc(InstrInfo->get(getScalarVersion(Instr)));
-
- for (unsigned j=0, e=allOperands.size(); j!=e; ++j) {
- MachineOperand oper=allOperands[j];
- if (oper.isReg()) {
- unsigned regnum = oper.getReg();
- if (isVectorRegister(regnum)) {
-
- SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum);
- copy.addReg(scalarRegs[i], getDefRegState(isDef[j]));
- }
- else
- copy.addOperand(oper);
- }
- else
- copy.addOperand(oper);
- }
- }
-}
-
-///=============================================================================
-///Scan through all basic blocks, looking for vector instructions.
-///For each vector instruction I, insert the scalar copies before I, and
-///add I into toRemove vector. Finally remove all instructions in toRemove.
-///=============================================================================
-void VectorElementize::elementize(MachineFunction &F) {
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend();
- BI!=BE; ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- std::vector<MachineInstr *> copies;
- std::vector<MachineInstr *> toRemove;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end();
- II!=IE; ++II) {
- MachineInstr *Instr = &*II;
-
- if (!isVectorInstr(Instr))
- continue;
-
- copies.clear();
- createCopies(F, Instr, copies);
- for (unsigned i=0, e=copies.size(); i!=e; ++i)
- BB->insert(II, copies[i]);
-
- assert((copies.size() > 0) && "Problem in createCopies");
- toRemove.push_back(Instr);
- }
- for (unsigned i=0, e=toRemove.size(); i!=e; ++i)
- F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i]));
- }
-}
-
-///=============================================================================
-///a <= b
-///...
-///...
-///x <= op(a, ...)
-///gets converted to
-///
-///x <= op(b, ...)
-///The original move is still present. This works on SSA form machine code.
-///Note that a <= b should be a simple vreg-to-vreg move instruction.
-///TBD : I didn't find a function that can do replaceOperand, so I remove
-///all operands and add all of them again, replacing the one while adding.
-///=============================================================================
-unsigned VectorElementize::copyProp(MachineFunction &F) {
- unsigned numReplacements = 0;
-
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
- ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
- ++II) {
- MachineInstr *Instr = &*II;
-
- // Don't do copy propagation on PHI as it will cause unnecessary
- // live range overlap.
- if ((Instr->getOpcode() == TargetOpcode::PHI) ||
- (Instr->getOpcode() == TargetOpcode::DBG_VALUE))
- continue;
-
- bool needsReplacement = false;
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
- if (!oper.isReg()) continue;
- if (oper.isDef()) continue;
- if (!RegInfo->isVirtualRegister(oper.getReg())) continue;
-
- MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
-
- if (!defInstr) continue;
-
- if (!isSimpleMove(defInstr)) continue;
-
- MachineOperand defSrc = defInstr->getOperand(1);
- if (!defSrc.isReg()) continue;
- if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue;
-
- needsReplacement = true;
-
- }
- if (!needsReplacement) continue;
-
- numReplacements++;
-
- std::vector<MachineOperand> operands;
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) {
- MachineOperand oper = Instr->getOperand(i);
- bool flag = false;
- do {
- if (!(oper.isReg()))
- break;
- if (oper.isDef())
- break;
- if (!(RegInfo->isVirtualRegister(oper.getReg())))
- break;
- MachineInstr *defInstr = MRI->getVRegDef(oper.getReg());
- if (!(isSimpleMove(defInstr)))
- break;
- MachineOperand defSrc = defInstr->getOperand(1);
- if (!(defSrc.isReg()))
- break;
- if (!(RegInfo->isVirtualRegister(defSrc.getReg())))
- break;
- operands.push_back(defSrc);
- flag = true;
- } while (0);
- if (flag == false)
- operands.push_back(oper);
- }
-
- for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i)
- Instr->RemoveOperand(0);
- for (unsigned i=0, e=operands.size(); i!=e; ++i)
- Instr->addOperand(F, operands[i]);
-
- }
- }
- return numReplacements;
-}
-
-///=============================================================================
-///Look for simple vreg-to-vreg instructions whose use_empty() is true, add
-///them to deadMoves vector. Then remove all instructions in deadMoves.
-///=============================================================================
-unsigned VectorElementize::removeDeadMoves(MachineFunction &F) {
- std::vector<MachineInstr *> deadMoves;
- for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE;
- ++BI) {
- MachineBasicBlock *BB = &*BI;
-
- for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE;
- ++II) {
- MachineInstr *Instr = &*II;
-
- if (!isSimpleMove(Instr)) continue;
-
- MachineOperand dest = Instr->getOperand(0);
- assert(dest.isReg() && "dest of move not a register");
- assert(RegInfo->isVirtualRegister(dest.getReg()) &&
- "dest of move not a virtual register");
-
- if (MRI->use_empty(dest.getReg())) {
- deadMoves.push_back(Instr);
- }
- }
- }
-
- for (unsigned i=0, e=deadMoves.size(); i!=e; ++i)
- F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i]));
-
- return deadMoves.size();
-}
-
-///=============================================================================
-///Main function for this pass.
-///=============================================================================
-bool VectorElementize::runOnMachineFunction(MachineFunction &F) {
- MRI = &F.getRegInfo();
-
- RegInfo = TM.getRegisterInfo();
- InstrInfo = TM.getInstrInfo();
-
- VectorToScalarMap.clear();
-
- elementize(F);
-
- if (RemoveRedundantMoves)
- while (1) {
- if (copyProp(F) == 0) break;
- removeDeadMoves(F);
- }
-
- return true;
-}
-
-FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) {
- return new VectorElementize(tm);
-}
-
-unsigned VectorElementize::getScalarVersion(unsigned opcode) {
- if (opcode == NVPTX::PHI)
- return opcode;
- if (opcode == NVPTX::IMPLICIT_DEF)
- return opcode;
- switch(opcode) {
- default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td");
- case TargetOpcode::COPY: return TargetOpcode::COPY;
- case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr;
- case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr;
- case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr;
- case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr;
- case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr;
- case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr;
- case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr;
- case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr;
- case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr;
- case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr;
- case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr;
- case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr;
- case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr;
- case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr;
- case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32;
- case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32;
- case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32;
- case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32;
- case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32;
- case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr;
- case NVPTX::F32MADV2: return NVPTX::FMAD32rrr;
- case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr;
- case NVPTX::F32MADV4: return NVPTX::FMAD32rrr;
- case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr;
- case NVPTX::F32FMAV2: return NVPTX::FMA32rrr;
- case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr;
- case NVPTX::F32FMAV4: return NVPTX::FMA32rrr;
- case NVPTX::F64FMAV2: return NVPTX::FMA64rrr;
- case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32;
- case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64;
- case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32;
- case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32;
- case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64;
- case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32;
- case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32;
- case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64;
- case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32;
- case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32;
- case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64;
- case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32;
- case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32;
- case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64;
- case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32;
- case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32;
- case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64;
- case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32;
- case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32;
- case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64;
- case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32;
- case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32;
- case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64;
- case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32;
- case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32;
- case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64;
- case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32;
- case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32;
- case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64;
- case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32;
- case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32;
- case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64;
- case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32;
- case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32;
- case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64;
- case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32;
- case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32;
- case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64;
- case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32;
- case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32;
- case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64;
- case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32;
- case NVPTX::I16MADV2: return NVPTX::MAD16rrr;
- case NVPTX::I16MADV4: return NVPTX::MAD16rrr;
- case NVPTX::I32MADV2: return NVPTX::MAD32rrr;
- case NVPTX::I32MADV4: return NVPTX::MAD32rrr;
- case NVPTX::I64MADV2: return NVPTX::MAD64rrr;
- case NVPTX::I8MADV2: return NVPTX::MAD8rrr;
- case NVPTX::I8MADV4: return NVPTX::MAD8rrr;
- case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr;
- case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr;
- case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr;
- case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr;
- case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr;
- case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr;
- case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr;
- case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr;
- case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr;
- case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr;
- case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr;
- case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr;
- case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr;
- case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr;
- case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr;
- case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr;
- case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr;
- case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr;
- case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr;
- case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr;
- case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr;
- case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr;
- case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr;
- case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr;
- case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr;
- case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
- case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec;
- case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz;
- case NVPTX::V2F32Div: return NVPTX::FDIV32rr;
- case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr;
- case NVPTX::V2F64Div: return NVPTX::FDIV64rr;
- case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr;
- case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr;
- case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr;
- case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr;
- case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr;
- case NVPTX::V2f32Extract: return NVPTX::FMOV32rr;
- case NVPTX::V2f32Insert: return NVPTX::FMOV32rr;
- case NVPTX::V2f32Mov: return NVPTX::FMOV32rr;
- case NVPTX::V2f64Extract: return NVPTX::FMOV64rr;
- case NVPTX::V2f64Insert: return NVPTX::FMOV64rr;
- case NVPTX::V2f64Mov: return NVPTX::FMOV64rr;
- case NVPTX::V2i16Extract: return NVPTX::IMOV16rr;
- case NVPTX::V2i16Insert: return NVPTX::IMOV16rr;
- case NVPTX::V2i16Mov: return NVPTX::IMOV16rr;
- case NVPTX::V2i32Extract: return NVPTX::IMOV32rr;
- case NVPTX::V2i32Insert: return NVPTX::IMOV32rr;
- case NVPTX::V2i32Mov: return NVPTX::IMOV32rr;
- case NVPTX::V2i64Extract: return NVPTX::IMOV64rr;
- case NVPTX::V2i64Insert: return NVPTX::IMOV64rr;
- case NVPTX::V2i64Mov: return NVPTX::IMOV64rr;
- case NVPTX::V2i8Extract: return NVPTX::IMOV8rr;
- case NVPTX::V2i8Insert: return NVPTX::IMOV8rr;
- case NVPTX::V2i8Mov: return NVPTX::IMOV8rr;
- case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz;
- case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec;
- case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz;
- case NVPTX::V4F32Div: return NVPTX::FDIV32rr;
- case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr;
- case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr;
- case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr;
- case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr;
- case NVPTX::V4f32Extract: return NVPTX::FMOV32rr;
- case NVPTX::V4f32Insert: return NVPTX::FMOV32rr;
- case NVPTX::V4f32Mov: return NVPTX::FMOV32rr;
- case NVPTX::V4i16Extract: return NVPTX::IMOV16rr;
- case NVPTX::V4i16Insert: return NVPTX::IMOV16rr;
- case NVPTX::V4i16Mov: return NVPTX::IMOV16rr;
- case NVPTX::V4i32Extract: return NVPTX::IMOV32rr;
- case NVPTX::V4i32Insert: return NVPTX::IMOV32rr;
- case NVPTX::V4i32Mov: return NVPTX::IMOV32rr;
- case NVPTX::V4i8Extract: return NVPTX::IMOV8rr;
- case NVPTX::V4i8Insert: return NVPTX::IMOV8rr;
- case NVPTX::V4i8Mov: return NVPTX::IMOV8rr;
- case NVPTX::VAddV2I16: return NVPTX::ADDi16rr;
- case NVPTX::VAddV2I32: return NVPTX::ADDi32rr;
- case NVPTX::VAddV2I64: return NVPTX::ADDi64rr;
- case NVPTX::VAddV2I8: return NVPTX::ADDi8rr;
- case NVPTX::VAddV4I16: return NVPTX::ADDi16rr;
- case NVPTX::VAddV4I32: return NVPTX::ADDi32rr;
- case NVPTX::VAddV4I8: return NVPTX::ADDi8rr;
- case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr;
- case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz;
- case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr;
- case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr;
- case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz;
- case NVPTX::VAndV2I16: return NVPTX::ANDb16rr;
- case NVPTX::VAndV2I32: return NVPTX::ANDb32rr;
- case NVPTX::VAndV2I64: return NVPTX::ANDb64rr;
- case NVPTX::VAndV2I8: return NVPTX::ANDb8rr;
- case NVPTX::VAndV4I16: return NVPTX::ANDb16rr;
- case NVPTX::VAndV4I32: return NVPTX::ANDb32rr;
- case NVPTX::VAndV4I8: return NVPTX::ANDb8rr;
- case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz;
- case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr;
- case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr;
- case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz;
- case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr;
- case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr;
- case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr;
- case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr;
- case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr;
- case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr;
- case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr;
- case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr;
- case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr;
- case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr;
- case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr;
- case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr;
- case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr;
- case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr;
- case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr;
- case NVPTX::VMultV2I16: return NVPTX::MULTi16rr;
- case NVPTX::VMultV2I32: return NVPTX::MULTi32rr;
- case NVPTX::VMultV2I64: return NVPTX::MULTi64rr;
- case NVPTX::VMultV2I8: return NVPTX::MULTi8rr;
- case NVPTX::VMultV4I16: return NVPTX::MULTi16rr;
- case NVPTX::VMultV4I32: return NVPTX::MULTi32rr;
- case NVPTX::VMultV4I8: return NVPTX::MULTi8rr;
- case NVPTX::VNegV2I16: return NVPTX::INEG16;
- case NVPTX::VNegV2I32: return NVPTX::INEG32;
- case NVPTX::VNegV2I64: return NVPTX::INEG64;
- case NVPTX::VNegV2I8: return NVPTX::INEG8;
- case NVPTX::VNegV4I16: return NVPTX::INEG16;
- case NVPTX::VNegV4I32: return NVPTX::INEG32;
- case NVPTX::VNegV4I8: return NVPTX::INEG8;
- case NVPTX::VNegv2f32: return NVPTX::FNEGf32;
- case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz;
- case NVPTX::VNegv2f64: return NVPTX::FNEGf64;
- case NVPTX::VNegv4f32: return NVPTX::FNEGf32;
- case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz;
- case NVPTX::VNotV2I16: return NVPTX::NOT16;
- case NVPTX::VNotV2I32: return NVPTX::NOT32;
- case NVPTX::VNotV2I64: return NVPTX::NOT64;
- case NVPTX::VNotV2I8: return NVPTX::NOT8;
- case NVPTX::VNotV4I16: return NVPTX::NOT16;
- case NVPTX::VNotV4I32: return NVPTX::NOT32;
- case NVPTX::VNotV4I8: return NVPTX::NOT8;
- case NVPTX::VOrV2I16: return NVPTX::ORb16rr;
- case NVPTX::VOrV2I32: return NVPTX::ORb32rr;
- case NVPTX::VOrV2I64: return NVPTX::ORb64rr;
- case NVPTX::VOrV2I8: return NVPTX::ORb8rr;
- case NVPTX::VOrV4I16: return NVPTX::ORb16rr;
- case NVPTX::VOrV4I32: return NVPTX::ORb32rr;
- case NVPTX::VOrV4I8: return NVPTX::ORb8rr;
- case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr;
- case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr;
- case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr;
- case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr;
- case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr;
- case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr;
- case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr;
- case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr;
- case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr;
- case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr;
- case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr;
- case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr;
- case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr;
- case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr;
- case NVPTX::VSubV2I16: return NVPTX::SUBi16rr;
- case NVPTX::VSubV2I32: return NVPTX::SUBi32rr;
- case NVPTX::VSubV2I64: return NVPTX::SUBi64rr;
- case NVPTX::VSubV2I8: return NVPTX::SUBi8rr;
- case NVPTX::VSubV4I16: return NVPTX::SUBi16rr;
- case NVPTX::VSubV4I32: return NVPTX::SUBi32rr;
- case NVPTX::VSubV4I8: return NVPTX::SUBi8rr;
- case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz;
- case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr;
- case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr;
- case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz;
- case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr;
- case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr;
- case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr;
- case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr;
- case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr;
- case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr;
- case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr;
- case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr;
- case NVPTX::VURemV2I16: return NVPTX::UREMi16rr;
- case NVPTX::VURemV2I32: return NVPTX::UREMi32rr;
- case NVPTX::VURemV2I64: return NVPTX::UREMi64rr;
- case NVPTX::VURemV2I8: return NVPTX::UREMi8rr;
- case NVPTX::VURemV4I16: return NVPTX::UREMi16rr;
- case NVPTX::VURemV4I32: return NVPTX::UREMi32rr;
- case NVPTX::VURemV4I8: return NVPTX::UREMi8rr;
- case NVPTX::VXorV2I16: return NVPTX::XORb16rr;
- case NVPTX::VXorV2I32: return NVPTX::XORb32rr;
- case NVPTX::VXorV2I64: return NVPTX::XORb64rr;
- case NVPTX::VXorV2I8: return NVPTX::XORb8rr;
- case NVPTX::VXorV4I16: return NVPTX::XORb16rr;
- case NVPTX::VXorV4I32: return NVPTX::XORb32rr;
- case NVPTX::VXorV4I8: return NVPTX::XORb8rr;
- case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16;
- case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32;
- case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64;
- case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8;
- case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16;
- case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32;
- case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8;
- case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16;
- case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32;
- case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64;
- case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8;
- case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16;
- case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32;
- case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8;
- case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16;
- case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32;
- case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64;
- case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8;
- case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16;
- case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32;
- case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8;
- case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16;
- case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32;
- case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64;
- case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8;
- case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16;
- case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32;
- case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8;
- case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16;
- case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32;
- case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64;
- case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8;
- case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16;
- case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32;
- case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8;
- case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16;
- case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32;
- case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64;
- case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8;
- case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16;
- case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32;
- case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8;
- case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr;
- case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr;
- case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr;
- case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr;
- case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr;
- case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr;
- case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr;
- case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr;
- case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr;
- case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr;
- case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16;
- case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32;
- case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64;
- case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8;
- case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16;
- case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32;
- case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8;
- case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16;
- case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32;
- case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64;
- case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8;
- case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16;
- case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32;
- case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8;
- case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16;
- case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32;
- case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64;
- case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8;
- case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16;
- case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32;
- case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8;
- case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16;
- case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32;
- case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64;
- case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8;
- case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16;
- case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32;
- case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8;
- case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16;
- case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32;
- case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64;
- case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8;
- case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16;
- case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32;
- case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8;
- case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16;
- case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32;
- case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64;
- case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8;
- case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16;
- case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32;
- case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8;
- case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
- case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
- case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
-
- case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32;
- case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16;
- case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8;
- case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64;
- case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32;
- case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16;
- case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8;
- case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32;
- case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32;
- case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64;
- case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32;
- case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16;
- case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8;
- case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64;
- case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32;
- case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16;
- case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8;
- case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32;
- case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32;
- case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64;
- case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32;
- case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16;
- case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8;
- case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64;
- case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32;
- case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16;
- case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8;
- case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32;
- case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32;
- case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64;
- case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8;
- case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16;
- case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8;
- case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16;
- case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32;
- case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32;
-
- case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar;
- case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg;
- case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari;
- case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi;
- case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar;
- case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg;
- case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari;
- case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi;
-
- case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar;
- case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg;
- case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari;
- case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi;
- case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar;
- case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg;
- case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari;
- case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi;
-
- case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar;
- case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg;
- case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari;
- case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi;
- case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar;
- case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg;
- case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari;
- case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi;
-
- case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar;
- case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg;
- case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari;
- case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi;
- case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar;
- case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg;
- case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari;
- case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi;
-
- case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar;
- case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg;
- case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari;
- case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi;
- case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar;
- case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg;
- case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari;
- case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi;
-
- case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar;
- case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg;
- case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari;
- case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi;
- case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar;
- case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg;
- case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari;
- case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi;
-
- case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar;
- case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg;
- case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari;
- case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi;
- case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar;
- case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg;
- case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari;
- case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi;
-
- case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar;
- case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg;
- case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari;
- case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi;
- case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar;
- case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg;
- case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari;
- case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi;
-
- case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar;
- case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg;
- case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari;
- case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi;
- case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar;
- case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg;
- case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari;
- case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi;
-
- case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar;
- case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg;
- case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari;
- case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi;
- case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar;
- case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg;
- case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari;
- case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi;
- }
- return 0;
-}
diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py
deleted file mode 100644
index ed06668231..0000000000
--- a/lib/Target/NVPTX/gen-register-defs.py
+++ /dev/null
@@ -1,202 +0,0 @@
-#!/usr/bin/env python
-
-num_regs = 396
-
-outFile = open('NVPTXRegisterInfo.td', 'w')
-
-outFile.write('''
-//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class NVPTXReg<string n> : Register<n> {
- let Namespace = "NVPTX";
-}
-
-class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
- : RegisterClass <"NVPTX", regTypes, alignment, regList>;
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-
-// Special Registers used as stack pointer
-def VRFrame : NVPTXReg<"%SP">;
-def VRFrameLocal : NVPTXReg<"%SPL">;
-
-// Special Registers used as the stack
-def VRDepot : NVPTXReg<"%Depot">;
-''')
-
-# Predicates
-outFile.write('''
-//===--- Predicate --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i))
-
-# Int8
-outFile.write('''
-//===--- 8-bit ------------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i))
-
-# Int16
-outFile.write('''
-//===--- 16-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i))
-
-# Int32
-outFile.write('''
-//===--- 32-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i))
-
-# Int64
-outFile.write('''
-//===--- 64-bit -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i))
-
-# F32
-outFile.write('''
-//===--- 32-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i))
-
-# F64
-outFile.write('''
-//===--- 64-bit float -----------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i))
-
-# Vector registers
-outFile.write('''
-//===--- Vector -----------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i))
-
-for i in range(0, num_regs):
- outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i))
-
-# Argument registers
-outFile.write('''
-//===--- Arguments --------------------------------------------------------===//
-''')
-for i in range(0, num_regs):
- outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i))
-for i in range(0, num_regs):
- outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i))
-
-outFile.write('''
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1))
-outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1))
-
-outFile.write('''
-// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
-''')
-
-outFile.write('''
-class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList,
- NVPTXRegClass sClass,
- int e,
- string n>
- : NVPTXRegClass<regTypes, alignment, regList>
-{
- NVPTXRegClass scalarClass=sClass;
- int elems=e;
- string name=n;
-}
-''')
-
-
-outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1))
-outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1))
-
-outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1))
-outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1))
-
-outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1))
-outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1))
-
-outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1))
-outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1))
-
-outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1))
-outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1))
-
-outFile.close()
-
-
-outFile = open('NVPTXNumRegisters.h', 'w')
-outFile.write('''
-//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef NVPTX_NUM_REGISTERS_H
-#define NVPTX_NUM_REGISTERS_H
-
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = %d;
-
-}
-
-#endif
-''' % num_regs)
-
-outFile.close()