summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IR/IntrinsicsNVVM.td2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp413
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h5
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp995
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h30
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td609
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td36
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td2
-rw-r--r--test/CodeGen/NVPTX/compare-int.ll40
-rw-r--r--test/CodeGen/NVPTX/ld-addrspace.ll12
-rw-r--r--test/CodeGen/NVPTX/ld-generic.ll4
-rw-r--r--test/CodeGen/NVPTX/pr13291-i1-store.ll16
-rw-r--r--test/CodeGen/NVPTX/st-addrspace.ll12
-rw-r--r--test/CodeGen/NVPTX/st-generic.ll4
17 files changed, 1287 insertions, 905 deletions
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index c248517def..a372c22e43 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -861,8 +861,6 @@ def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
// Move intrinsics, used in nvvm internally
-def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
- "llvm.nvvm.move.i8">;
def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
"llvm.nvvm.move.i16">;
def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 84b088400c..9188262ca9 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -2016,7 +2016,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
case NVPTX::CallArgI32:
case NVPTX::CallArgI32imm:
case NVPTX::CallArgI64:
- case NVPTX::CallArgI8:
case NVPTX::CallArgParam:
case NVPTX::CallVoidInst:
case NVPTX::CallVoidInstReg:
@@ -2050,7 +2049,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
case NVPTX::LastCallArgI32:
case NVPTX::LastCallArgI32imm:
case NVPTX::LastCallArgI64:
- case NVPTX::LastCallArgI8:
case NVPTX::LastCallArgParam:
case NVPTX::LoadParamMemF32:
case NVPTX::LoadParamMemF64:
@@ -2063,7 +2061,6 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
case NVPTX::LoadParamRegI16:
case NVPTX::LoadParamRegI32:
case NVPTX::LoadParamRegI64:
- case NVPTX::LoadParamRegI8:
case NVPTX::PrototypeInst:
case NVPTX::DBG_VALUE:
return true;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ac6dbb9eb6..7a0a59f1ce 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -116,6 +116,23 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreV4:
ResNode = SelectStoreVector(N);
break;
+ case NVPTXISD::LoadParam:
+ case NVPTXISD::LoadParamV2:
+ case NVPTXISD::LoadParamV4:
+ ResNode = SelectLoadParam(N);
+ break;
+ case NVPTXISD::StoreRetval:
+ case NVPTXISD::StoreRetvalV2:
+ case NVPTXISD::StoreRetvalV4:
+ ResNode = SelectStoreRetval(N);
+ break;
+ case NVPTXISD::StoreParam:
+ case NVPTXISD::StoreParamV2:
+ case NVPTXISD::StoreParamV4:
+ case NVPTXISD::StoreParamS32:
+ case NVPTXISD::StoreParamU32:
+ ResNode = SelectStoreParam(N);
+ break;
default:
break;
}
@@ -771,7 +788,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
SDLoc DL(N);
SDNode *LD;
- EVT RetVT = N->getValueType(0);
+ MemSDNode *Mem = cast<MemSDNode>(N);
+
+ EVT RetVT = Mem->getMemoryVT().getVectorElementType();
// Select opcode
if (Subtarget.is64Bit()) {
@@ -1571,6 +1590,398 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
return ST;
}
+SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Offset = Node->getOperand(2);
+ SDValue Flag = Node->getOperand(3);
+ SDLoc DL(Node);
+ MemSDNode *Mem = cast<MemSDNode>(Node);
+
+ unsigned VecSize;
+ switch (Node->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::LoadParam:
+ VecSize = 1;
+ break;
+ case NVPTXISD::LoadParamV2:
+ VecSize = 2;
+ break;
+ case NVPTXISD::LoadParamV4:
+ VecSize = 4;
+ break;
+ }
+
+ EVT EltVT = Node->getValueType(0);
+ EVT MemVT = Mem->getMemoryVT();
+
+ unsigned Opc = 0;
+
+ switch (VecSize) {
+ default:
+ return NULL;
+ case 1:
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opc = NVPTX::LoadParamMemI8;
+ break;
+ case MVT::i8:
+ Opc = NVPTX::LoadParamMemI8;
+ break;
+ case MVT::i16:
+ Opc = NVPTX::LoadParamMemI16;
+ break;
+ case MVT::i32:
+ Opc = NVPTX::LoadParamMemI32;
+ break;
+ case MVT::i64:
+ Opc = NVPTX::LoadParamMemI64;
+ break;
+ case MVT::f32:
+ Opc = NVPTX::LoadParamMemF32;
+ break;
+ case MVT::f64:
+ Opc = NVPTX::LoadParamMemF64;
+ break;
+ }
+ break;
+ case 2:
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opc = NVPTX::LoadParamMemV2I8;
+ break;
+ case MVT::i8:
+ Opc = NVPTX::LoadParamMemV2I8;
+ break;
+ case MVT::i16:
+ Opc = NVPTX::LoadParamMemV2I16;
+ break;
+ case MVT::i32:
+ Opc = NVPTX::LoadParamMemV2I32;
+ break;
+ case MVT::i64:
+ Opc = NVPTX::LoadParamMemV2I64;
+ break;
+ case MVT::f32:
+ Opc = NVPTX::LoadParamMemV2F32;
+ break;
+ case MVT::f64:
+ Opc = NVPTX::LoadParamMemV2F64;
+ break;
+ }
+ break;
+ case 4:
+ switch (MemVT.getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opc = NVPTX::LoadParamMemV4I8;
+ break;
+ case MVT::i8:
+ Opc = NVPTX::LoadParamMemV4I8;
+ break;
+ case MVT::i16:
+ Opc = NVPTX::LoadParamMemV4I16;
+ break;
+ case MVT::i32:
+ Opc = NVPTX::LoadParamMemV4I32;
+ break;
+ case MVT::f32:
+ Opc = NVPTX::LoadParamMemV4F32;
+ break;
+ }
+ break;
+ }
+
+ SDVTList VTs;
+ if (VecSize == 1) {
+ VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
+ } else if (VecSize == 2) {
+ VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
+ } else {
+ EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
+ VTs = CurDAG->getVTList(&EVTs[0], 5);
+ }
+
+ unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(Chain);
+ Ops.push_back(Flag);
+
+ SDNode *Ret =
+ CurDAG->getMachineNode(Opc, DL, Node->getVTList(), Ops);
+ return Ret;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Offset = N->getOperand(1);
+ unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
+ MemSDNode *Mem = cast<MemSDNode>(N);
+
+ // How many elements do we have?
+ unsigned NumElts = 1;
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreRetval:
+ NumElts = 1;
+ break;
+ case NVPTXISD::StoreRetvalV2:
+ NumElts = 2;
+ break;
+ case NVPTXISD::StoreRetvalV4:
+ NumElts = 4;
+ break;
+ }
+
+ // Build vector of operands
+ SmallVector<SDValue, 6> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(N->getOperand(i + 2));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(Chain);
+
+ // Determine target opcode
+ // If we have an i1, use an 8-bit store. The lowering code in
+ // NVPTXISelLowering will have already emitted an upcast.
+ unsigned Opcode = 0;
+ switch (NumElts) {
+ default:
+ return NULL;
+ case 1:
+ switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opcode = NVPTX::StoreRetvalI8;
+ break;
+ case MVT::i8:
+ Opcode = NVPTX::StoreRetvalI8;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::StoreRetvalI16;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::StoreRetvalI32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::StoreRetvalI64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::StoreRetvalF32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::StoreRetvalF64;
+ break;
+ }
+ break;
+ case 2:
+ switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opcode = NVPTX::StoreRetvalV2I8;
+ break;
+ case MVT::i8:
+ Opcode = NVPTX::StoreRetvalV2I8;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::StoreRetvalV2I16;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::StoreRetvalV2I32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::StoreRetvalV2I64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::StoreRetvalV2F32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::StoreRetvalV2F64;
+ break;
+ }
+ break;
+ case 4:
+ switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opcode = NVPTX::StoreRetvalV4I8;
+ break;
+ case MVT::i8:
+ Opcode = NVPTX::StoreRetvalV4I8;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::StoreRetvalV4I16;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::StoreRetvalV4I32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::StoreRetvalV4F32;
+ break;
+ }
+ break;
+ }
+
+ SDNode *Ret =
+ CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return Ret;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Param = N->getOperand(1);
+ unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
+ SDValue Offset = N->getOperand(2);
+ unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
+ MemSDNode *Mem = cast<MemSDNode>(N);
+ SDValue Flag = N->getOperand(N->getNumOperands() - 1);
+
+ // How many elements do we have?
+ unsigned NumElts = 1;
+ switch (N->getOpcode()) {
+ default:
+ return NULL;
+ case NVPTXISD::StoreParamU32:
+ case NVPTXISD::StoreParamS32:
+ case NVPTXISD::StoreParam:
+ NumElts = 1;
+ break;
+ case NVPTXISD::StoreParamV2:
+ NumElts = 2;
+ break;
+ case NVPTXISD::StoreParamV4:
+ NumElts = 4;
+ break;
+ }
+
+ // Build vector of operands
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(N->getOperand(i + 3));
+ Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(Chain);
+ Ops.push_back(Flag);
+
+ // Determine target opcode
+ // If we have an i1, use an 8-bit store. The lowering code in
+ // NVPTXISelLowering will have already emitted an upcast.
+ unsigned Opcode = 0;
+ switch (N->getOpcode()) {
+ default:
+ switch (NumElts) {
+ default:
+ return NULL;
+ case 1:
+ switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opcode = NVPTX::StoreParamI8;
+ break;
+ case MVT::i8:
+ Opcode = NVPTX::StoreParamI8;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::StoreParamI16;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::StoreParamI32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::StoreParamI64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::StoreParamF32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::StoreParamF64;
+ break;
+ }
+ break;
+ case 2:
+ switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opcode = NVPTX::StoreParamV2I8;
+ break;
+ case MVT::i8:
+ Opcode = NVPTX::StoreParamV2I8;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::StoreParamV2I16;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::StoreParamV2I32;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::StoreParamV2I64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::StoreParamV2F32;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::StoreParamV2F64;
+ break;
+ }
+ break;
+ case 4:
+ switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
+ default:
+ return NULL;
+ case MVT::i1:
+ Opcode = NVPTX::StoreParamV4I8;
+ break;
+ case MVT::i8:
+ Opcode = NVPTX::StoreParamV4I8;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::StoreParamV4I16;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::StoreParamV4I32;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::StoreParamV4F32;
+ break;
+ }
+ break;
+ }
+ break;
+ case NVPTXISD::StoreParamU32:
+ Opcode = NVPTX::StoreParamU32I16;
+ break;
+ case NVPTXISD::StoreParamS32:
+ Opcode = NVPTX::StoreParamS32I16;
+ break;
+ }
+
+ SDNode *Ret =
+ CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+ return Ret;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ed16d4450b..428e7b2288 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -80,7 +80,10 @@ private:
SDNode *SelectLDGLDUVector(SDNode *N);
SDNode *SelectStore(SDNode *N);
SDNode *SelectStoreVector(SDNode *N);
-
+ SDNode *SelectLoadParam(SDNode *N);
+ SDNode *SelectStoreRetval(SDNode *N);
+ SDNode *SelectStoreParam(SDNode *N);
+
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 9679b05ab7..0396a6421a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -51,6 +51,8 @@ static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default:
return false;
+ case MVT::v2i1:
+ case MVT::v4i1:
case MVT::v2i8:
case MVT::v4i8:
case MVT::v2i16:
@@ -65,6 +67,37 @@ static bool IsPTXVectorType(MVT VT) {
}
}
+/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
+/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
+/// into their primitive components.
+/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
+/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
+/// LowerCall, and LowerReturn.
+static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets = 0,
+ uint64_t StartingOffset = 0) {
+ SmallVector<EVT, 16> TempVTs;
+ SmallVector<uint64_t, 16> TempOffsets;
+
+ ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
+ for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
+ EVT VT = TempVTs[i];
+ uint64_t Off = TempOffsets[i];
+ if (VT.isVector())
+ for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
+ ValueVTs.push_back(VT.getVectorElementType());
+ if (Offsets)
+ Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
+ }
+ else {
+ ValueVTs.push_back(VT);
+ if (Offsets)
+ Offsets->push_back(Off);
+ }
+ }
+}
+
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
: TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
@@ -90,7 +123,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setSchedulingPreference(Sched::Source);
addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
- addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
@@ -181,6 +213,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
}
}
+ // Custom handling for i8 intrinsics
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+
// Now deduce the information based on the above mentioned
// actions
computeRegisterProperties();
@@ -293,6 +328,7 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
}
+/*
std::string NVPTXTargetLowering::getPrototype(
Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
@@ -442,6 +478,152 @@ std::string NVPTXTargetLowering::getPrototype(
}
O << ");";
return O.str();
+}*/
+
+std::string
+NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ unsigned retAlignment,
+ const ImmutableCallSite *CS) const {
+
+ bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ assert(isABI && "Non-ABI compilation is not supported");
+ if (!isABI)
+ return "";
+
+ std::stringstream O;
+ O << "prototype_" << uniqueCallSite << " : .callprototype ";
+
+ if (retTy->getTypeID() == Type::VoidTyID) {
+ O << "()";
+ } else {
+ O << "(";
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
+ unsigned size = 0;
+ if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ size = ITy->getBitWidth();
+ if (size < 32)
+ size = 32;
+ } else {
+ assert(retTy->isFloatingPointTy() &&
+ "Floating point type expected here");
+ size = retTy->getPrimitiveSizeInBits();
+ }
+
+ O << ".param .b" << size << " _";
+ } else if (isa<PointerType>(retTy)) {
+ O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ } else {
+ if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) {
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, retTy, vtparts);
+ unsigned totalsz = 0;
+ for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[i];
+ if (vtparts[i].isVector()) {
+ elems = vtparts[i].getVectorNumElements();
+ elemtype = vtparts[i].getVectorElementType();
+ }
+ // TODO: no need to loop
+ for (unsigned j = 0, je = elems; j != je; ++j) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ totalsz += sz / 8;
+ }
+ }
+ O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+ } else {
+ assert(false && "Unknown return type");
+ }
+ }
+ O << ") ";
+ }
+ O << "_ (";
+
+ bool first = true;
+ MVT thePointerTy = getPointerTy();
+
+ unsigned OIdx = 0;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
+ Type *Ty = Args[i].Ty;
+ if (!first) {
+ O << ", ";
+ }
+ first = false;
+
+ if (Outs[OIdx].Flags.isByVal() == false) {
+ if (Ty->isAggregateType() || Ty->isVectorTy()) {
+ unsigned align = 0;
+ const CallInst *CallI = cast<CallInst>(CS->getInstruction());
+ const DataLayout *TD = getDataLayout();
+ // +1 because index 0 is reserved for return type alignment
+ if (!llvm::getAlign(*CallI, i + 1, align))
+ align = TD->getABITypeAlignment(Ty);
+ unsigned sz = TD->getTypeAllocSize(Ty);
+ O << ".param .align " << align << " .b8 ";
+ O << "_";
+ O << "[" << sz << "]";
+ // update the index for Outs
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, Ty, vtparts);
+ if (unsigned len = vtparts.size())
+ OIdx += len - 1;
+ continue;
+ }
+ assert(getValueType(Ty) == Outs[OIdx].VT &&
+ "type mismatch between callee prototype and arguments");
+ // scalar type
+ unsigned sz = 0;
+ if (isa<IntegerType>(Ty)) {
+ sz = cast<IntegerType>(Ty)->getBitWidth();
+ if (sz < 32)
+ sz = 32;
+ } else if (isa<PointerType>(Ty))
+ sz = thePointerTy.getSizeInBits();
+ else
+ sz = Ty->getPrimitiveSizeInBits();
+ O << ".param .b" << sz << " ";
+ O << "_";
+ continue;
+ }
+ const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ assert(PTy && "Param with byval attribute should be a pointer type");
+ Type *ETy = PTy->getElementType();
+
+ unsigned align = Outs[OIdx].Flags.getByValAlign();
+ unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
+ O << ".param .align " << align << " .b8 ";
+ O << "_";
+ O << "[" << sz << "]";
+ }
+ O << ");";
+ return O.str();
+}
+
+unsigned
+NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
+ const ImmutableCallSite *CS,
+ Type *Ty,
+ unsigned Idx) const {
+ const DataLayout *TD = getDataLayout();
+ unsigned align = 0;
+ GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
+
+ if (Func) { // direct call
+ assert(CS->getCalledFunction() &&
+ "direct call cannot find callee");
+ if (!llvm::getAlign(*(CS->getCalledFunction()), Idx, align))
+ align = TD->getABITypeAlignment(Ty);
+ }
+ else { // indirect call
+ const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
+ if (!llvm::getAlign(*CallI, Idx, align))
+ align = TD->getABITypeAlignment(Ty);
+ }
+
+ return align;
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -459,54 +641,257 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ImmutableCallSite *CS = CLI.CS;
bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ assert(isABI && "Non-ABI compilation is not supported");
+ if (!isABI)
+ return Chain;
+ const DataLayout *TD = getDataLayout();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ const TargetLowering *TLI = nvTM->getTargetLowering();
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, true),
- dl);
+ Chain =
+ DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+ dl);
SDValue InFlag = Chain.getValue(1);
- assert((Outs.size() == Args.size()) &&
- "Unexpected number of arguments to function call");
unsigned paramCount = 0;
+ // Args.size() and Outs.size() need not match.
+ // Outs.size() will be larger
+ // * if there is an aggregate argument with multiple fields (each field
+ // showing up separately in Outs)
+ // * if there is a vector argument with more than typical vector-length
+ // elements (generally if more than 4) where each vector element is
+ // individually present in Outs.
+ // So a different index should be used for indexing into Outs/OutVals.
+ // See similar issue in LowerFormalArguments.
+ unsigned OIdx = 0;
// Declare the .params or .reg need to pass values
// to the function
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
+ EVT VT = Outs[OIdx].VT;
+ Type *Ty = Args[i].Ty;
- if (Outs[i].Flags.isByVal() == false) {
+ if (Outs[OIdx].Flags.isByVal() == false) {
+ if (Ty->isAggregateType()) {
+ // aggregate
+ SmallVector<EVT, 16> vtparts;
+ ComputeValueVTs(*this, Ty, vtparts);
+
+ unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
+ // declare .param .align <align> .b8 .param<n>[<size>];
+ unsigned sz = TD->getTypeAllocSize(Ty);
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ unsigned curOffset = 0;
+ for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
+ unsigned elems = 1;
+ EVT elemtype = vtparts[j];
+ if (vtparts[j].isVector()) {
+ elems = vtparts[j].getVectorNumElements();
+ elemtype = vtparts[j].getVectorElementType();
+ }
+ for (unsigned k = 0, ke = elems; k != ke; ++k) {
+ unsigned sz = elemtype.getSizeInBits();
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ SDValue StVal = OutVals[OIdx];
+ if (elemtype.getSizeInBits() < 16) {
+ StVal = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, StVal);
+ }
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32),
+ StVal, InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
+ CopyParamVTs, &CopyParamOps[0], 5,
+ elemtype, MachinePointerInfo());
+ InFlag = Chain.getValue(1);
+ curOffset += sz / 8;
+ ++OIdx;
+ }
+ }
+ if (vtparts.size() > 0)
+ --OIdx;
+ ++paramCount;
+ continue;
+ }
+ if (Ty->isVectorTy()) {
+ EVT ObjectVT = getValueType(Ty);
+ unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
+ // declare .param .align <align> .b8 .param<n>[<size>];
+ unsigned sz = TD->getTypeAllocSize(Ty);
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(sz, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ EVT EltVT = ObjectVT.getVectorElementType();
+ EVT MemVT = EltVT;
+ bool NeedExtend = false;
+ if (EltVT.getSizeInBits() < 16) {
+ NeedExtend = true;
+ EltVT = MVT::i16;
+ }
+
+ // V1 store
+ if (NumElts == 1) {
+ SDValue Elt = OutVals[OIdx++];
+ if (NeedExtend)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
+
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), Elt,
+ InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
+ CopyParamVTs, &CopyParamOps[0], 5,
+ MemVT, MachinePointerInfo());
+ InFlag = Chain.getValue(1);
+ } else if (NumElts == 2) {
+ SDValue Elt0 = OutVals[OIdx++];
+ SDValue Elt1 = OutVals[OIdx++];
+ if (NeedExtend) {
+ Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
+ Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
+ }
+
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(0, MVT::i32), Elt0, Elt1,
+ InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
+ CopyParamVTs, &CopyParamOps[0], 6,
+ MemVT, MachinePointerInfo());
+ InFlag = Chain.getValue(1);
+ } else {
+ unsigned curOffset = 0;
+ // V4 stores
+ // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
+ // the
+ // vector will be expanded to a power of 2 elements, so we know we can
+ // always round up to the next multiple of 4 when creating the vector
+ // stores.
+ // e.g. 4 elem => 1 st.v4
+ // 6 elem => 2 st.v4
+ // 8 elem => 2 st.v4
+ // 11 elem => 3 st.v4
+ unsigned VecSize = 4;
+ if (EltVT.getSizeInBits() == 64)
+ VecSize = 2;
+
+ // This is potentially only part of a vector, so assume all elements
+ // are packed together.
+ unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
+
+ for (unsigned i = 0; i < NumElts; i += VecSize) {
+ // Get values
+ SDValue StoreVal;
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getConstant(paramCount, MVT::i32));
+ Ops.push_back(DAG.getConstant(curOffset, MVT::i32));
+
+ unsigned Opc = NVPTXISD::StoreParamV2;
+
+ StoreVal = OutVals[OIdx++];
+ if (NeedExtend)
+ StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
+ Ops.push_back(StoreVal);
+
+ if (i + 1 < NumElts) {
+ StoreVal = OutVals[OIdx++];
+ if (NeedExtend)
+ StoreVal =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
+ } else {
+ StoreVal = DAG.getUNDEF(EltVT);
+ }
+ Ops.push_back(StoreVal);
+
+ if (VecSize == 4) {
+ Opc = NVPTXISD::StoreParamV4;
+ if (i + 2 < NumElts) {
+ StoreVal = OutVals[OIdx++];
+ if (NeedExtend)
+ StoreVal =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
+ } else {
+ StoreVal = DAG.getUNDEF(EltVT);
+ }
+ Ops.push_back(StoreVal);
+
+ if (i + 3 < NumElts) {
+ StoreVal = OutVals[OIdx++];
+ if (NeedExtend)
+ StoreVal =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
+ } else {
+ StoreVal = DAG.getUNDEF(EltVT);
+ }
+ Ops.push_back(StoreVal);
+ }
+
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0],
+ Ops.size(), MemVT,
+ MachinePointerInfo());
+ InFlag = Chain.getValue(1);
+ curOffset += PerStoreOffset;
+ }
+ }
+ ++paramCount;
+ --OIdx;
+ continue;
+ }
// Plain scalar
// for ABI, declare .param .b<size> .param<n>;
- // for nonABI, declare .reg .b<size> .param<n>;
- unsigned isReg = 1;
- if (isABI)
- isReg = 0;
unsigned sz = VT.getSizeInBits();
- if (VT.isInteger() && (sz < 32))
- sz = 32;
+ bool needExtend = false;
+ if (VT.isInteger()) {
+ if (sz < 16)
+ needExtend = true;
+ if (sz < 32)
+ sz = 32;
+ }
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(isReg, MVT::i32), InFlag };
+ DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareParamOps, 5);
InFlag = Chain.getValue(1);
+ SDValue OutV = OutVals[OIdx];
+ if (needExtend) {
+ // zext/sext i1 to i16
+ unsigned opc = ISD::ZERO_EXTEND;
+ if (Outs[OIdx].Flags.isSExt())
+ opc = ISD::SIGN_EXTEND;
+ OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
+ }
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), OutVals[i],
- InFlag };
+ DAG.getConstant(0, MVT::i32), OutV, InFlag };
unsigned opcode = NVPTXISD::StoreParam;
- if (isReg)
- opcode = NVPTXISD::MoveToParam;
- else {
- if (Outs[i].Flags.isZExt())
- opcode = NVPTXISD::StoreParamU32;
- else if (Outs[i].Flags.isSExt())
- opcode = NVPTXISD::StoreParamS32;
- }
- Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
+ if (Outs[OIdx].Flags.isZExt())
+ opcode = NVPTXISD::StoreParamU32;
+ else if (Outs[OIdx].Flags.isSExt())
+ opcode = NVPTXISD::StoreParamS32;
+ Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5,
+ VT, MachinePointerInfo());
InFlag = Chain.getValue(1);
++paramCount;
@@ -518,55 +903,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(PTy && "Type of a byval parameter should be pointer");
ComputeValueVTs(*this, PTy->getElementType(), vtparts);
- if (isABI) {
- // declare .param .align 16 .b8 .param<n>[<size>];
- unsigned sz = Outs[i].Flags.getByValSize();
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
- // don't need to
- // worry about natural alignment or not. See TargetLowering::LowerCallTo()
- SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
- DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
- InFlag
- };
- Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
- InFlag = Chain.getValue(1);
- unsigned curOffset = 0;
- for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- unsigned elems = 1;
- EVT elemtype = vtparts[j];
- if (vtparts[j].isVector()) {
- elems = vtparts[j].getVectorNumElements();
- elemtype = vtparts[j].getVectorElementType();
- }
- for (unsigned k = 0, ke = elems; k != ke; ++k) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
- DAG.getConstant(curOffset, getPointerTy()));
- SDValue theVal =
- DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false, 0);
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(curOffset, MVT::i32),
- theVal, InFlag };
- Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, 5);
- InFlag = Chain.getValue(1);
- curOffset += sz / 8;
- }
- }
- ++paramCount;
- continue;
- }
- // Non-abi, struct or vector
- // Declare a bunch or .reg .b<size> .param<n>
+ // declare .param .align <align> .b8 .param<n>[<size>];
+ unsigned sz = Outs[OIdx].Flags.getByValSize();
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
+ // so we don't need to worry about natural alignment or not.
+ // See TargetLowering::LowerCallTo().
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32),
+ DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+ InFlag
+ };
+ Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
+ DeclareParamOps, 5);
+ InFlag = Chain.getValue(1);
unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
unsigned elems = 1;
@@ -577,107 +927,66 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
for (unsigned k = 0, ke = elems; k != ke; ++k) {
unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 32))
- sz = 32;
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(1, MVT::i32), InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
- InFlag = Chain.getValue(1);
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
DAG.getConstant(curOffset, getPointerTy()));
- SDValue theVal =
- DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
- false, false, false, 0);
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false,
+ 0);
+ if (elemtype.getSizeInBits() < 16) {
+ theVal = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, theVal);
+ }
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), theVal,
+ DAG.getConstant(curOffset, MVT::i32), theVal,
InFlag };
- Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
- CopyParamOps, 5);
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
+ CopyParamOps, 5, elemtype,
+ MachinePointerInfo());
+
InFlag = Chain.getValue(1);
- ++paramCount;
+ curOffset += sz / 8;
}
}
+ ++paramCount;
}
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
unsigned retAlignment = 0;
// Handle Result
- unsigned retCount = 0;
if (Ins.size() > 0) {
SmallVector<EVT, 16> resvtparts;
ComputeValueVTs(*this, retTy, resvtparts);
- // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
- // individual .reg .b<size> func_retval<0..> for non ABI
- unsigned resultsz = 0;
- for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = resvtparts[i];
- if (resvtparts[i].isVector()) {
- elems = resvtparts[i].getVectorNumElements();
- elemtype = resvtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (isABI == false) {
- if (elemtype.isInteger() && (sz < 32))
- sz = 32;
- } else {
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- }
- if (isABI == false) {
- SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(retCount, MVT::i32),
- InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
- DeclareRetOps, 5);
- InFlag = Chain.getValue(1);
- ++retCount;
- }
- resultsz += sz;
- }
- }
- if (isABI) {
- if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
- retTy->isPointerTy()) {
- // Scalar needs to be at least 32bit wide
- if (resultsz < 32)
- resultsz = 32;
- SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
- DAG.getConstant(resultsz, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
- DeclareRetOps, 5);
- InFlag = Chain.getValue(1);
- } else {
- if (Func) { // direct call
- if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
- retAlignment = getDataLayout()->getABITypeAlignment(retTy);
- } else { // indirect call
- const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
- if (!llvm::getAlign(*CallI, 0, retAlignment))
- retAlignment = getDataLayout()->getABITypeAlignment(retTy);
- }
- SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain,
- DAG.getConstant(retAlignment, MVT::i32),
- DAG.getConstant(resultsz / 8, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
- DeclareRetOps, 5);
- InFlag = Chain.getValue(1);
- }
+ // Declare
+ // .param .align 16 .b8 retval0[<size-in-bytes>], or
+ // .param .b<size-in-bits> retval0
+ unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
+ if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
+ retTy->isPointerTy()) {
+ // Scalar needs to be at least 32bit wide
+ if (resultsz < 32)
+ resultsz = 32;
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(resultsz, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
+ } else {
+ retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
+ SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue DeclareRetOps[] = { Chain,
+ DAG.getConstant(retAlignment, MVT::i32),
+ DAG.getConstant(resultsz / 8, MVT::i32),
+ DAG.getConstant(0, MVT::i32), InFlag };
+ Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
+ DeclareRetOps, 5);
+ InFlag = Chain.getValue(1);
}
}
@@ -690,7 +999,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for an
// INLINEASM SDNode.
SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
+ std::string proto_string =
+ getPrototype(retTy, Args, Outs, retAlignment, CS);
const char *asmstr = nvTM->getManagedStrPool()
->getManagedString(proto_string.c_str())->c_str();
SDValue InlineAsmOps[] = {
@@ -703,9 +1013,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Op to just print "call"
SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrintCallOps[] = {
- Chain,
- DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
- InFlag
+ Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
};
Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
dl, PrintCallVTs, PrintCallOps, 3);
@@ -753,59 +1061,172 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
- if (isABI) {
- unsigned resoffset = 0;
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- unsigned sz = Ins[i].VT.getSizeInBits();
- if (Ins[i].VT.isInteger() && (sz < 8))
- sz = 8;
- EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
- SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
- DAG.getConstant(resoffset, MVT::i32), InFlag };
- SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
- LoadRetOps, array_lengthof(LoadRetOps));
+ unsigned resoffset = 0;
+ if (retTy && retTy->isVectorTy()) {
+ EVT ObjectVT = getValueType(retTy);
+ unsigned NumElts = ObjectVT.getVectorNumElements();
+ EVT EltVT = ObjectVT.getVectorElementType();
+ assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
+ "Vector was not scalarized");
+ unsigned sz = EltVT.getSizeInBits();
+ bool needTruncate = sz < 16 ? true : false;
+
+ if (NumElts == 1) {
+ // Just a simple load
+ std::vector<EVT> LoadRetVTs;
+ if (needTruncate) {
+ // If loading i1 result, generate
+ // load i16
+ // trunc i16 to i1
+ LoadRetVTs.push_back(MVT::i16);
+ } else
+ LoadRetVTs.push_back(EltVT);
+ LoadRetVTs.push_back(MVT::Other);
+ LoadRetVTs.push_back(MVT::Glue);
+ std::vector<SDValue> LoadRetOps;
+ LoadRetOps.push_back(Chain);
+ LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
+ LoadRetOps.push_back(InFlag);
+ SDValue retval = DAG.getMemIntrinsicNode(
+ NVPTXISD::LoadParam, dl,
+ DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
+ LoadRetOps.size(), EltVT, MachinePointerInfo());
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
- InVals.push_back(retval);
- resoffset += sz / 8;
+ SDValue Ret0 = retval;
+ if (needTruncate)
+ Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
+ InVals.push_back(Ret0);
+ } else if (NumElts == 2) {
+ // LoadV2
+ std::vector<EVT> LoadRetVTs;
+ if (needTruncate) {
+ // If loading i1 result, generate
+ // load i16
+ // trunc i16 to i1
+ LoadRetVTs.push_back(MVT::i16);
+ LoadRetVTs.push_back(MVT::i16);
+ } else {
+ LoadRetVTs.push_back(EltVT);
+ LoadRetVTs.push_back(EltVT);
+ }
+ LoadRetVTs.push_back(MVT::Other);
+ LoadRetVTs.push_back(MVT::Glue);
+ std::vector<SDValue> LoadRetOps;
+ LoadRetOps.push_back(Chain);
+ LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
+ LoadRetOps.push_back(InFlag);
+ SDValue retval = DAG.getMemIntrinsicNode(
+ NVPTXISD::LoadParamV2, dl,
+ DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
+ LoadRetOps.size(), EltVT, MachinePointerInfo());
+ Chain = retval.getValue(2);
+ InFlag = retval.getValue(3);
+ SDValue Ret0 = retval.getValue(0);
+ SDValue Ret1 = retval.getValue(1);
+ if (needTruncate) {
+ Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
+ InVals.push_back(Ret0);
+ Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
+ InVals.push_back(Ret1);
+ } else {
+ InVals.push_back(Ret0);
+ InVals.push_back(Ret1);
+ }
+ } else {
+ // Split into N LoadV4
+ unsigned Ofst = 0;
+ unsigned VecSize = 4;
+ unsigned Opc = NVPTXISD::LoadParamV4;
+ if (EltVT.getSizeInBits() == 64) {
+ VecSize = 2;
+ Opc = NVPTXISD::LoadParamV2;
+ }
+ EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
+ for (unsigned i = 0; i < NumElts; i += VecSize) {
+ SmallVector<EVT, 8> LoadRetVTs;
+ if (needTruncate) {
+ // If loading i1 result, generate
+ // load i16
+ // trunc i16 to i1
+ for (unsigned j = 0; j < VecSize; ++j)
+ LoadRetVTs.push_back(MVT::i16);
+ } else {
+ for (unsigned j = 0; j < VecSize; ++j)
+ LoadRetVTs.push_back(EltVT);
+ }
+ LoadRetVTs.push_back(MVT::Other);
+ LoadRetVTs.push_back(MVT::Glue);
+ SmallVector<SDValue, 4> LoadRetOps;
+ LoadRetOps.push_back(Chain);
+ LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
+ LoadRetOps.push_back(InFlag);
+ SDValue retval = DAG.getMemIntrinsicNode(
+ Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()),
+ &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo());
+ if (VecSize == 2) {
+ Chain = retval.getValue(2);
+ InFlag = retval.getValue(3);
+ } else {
+ Chain = retval.getValue(4);
+ InFlag = retval.getValue(5);
+ }
+
+ for (unsigned j = 0; j < VecSize; ++j) {
+ if (i + j >= NumElts)
+ break;
+ SDValue Elt = retval.getValue(j);
+ if (needTruncate)
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
+ InVals.push_back(Elt);
+ }
+ Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ }
}
} else {
- SmallVector<EVT, 16> resvtparts;
- ComputeValueVTs(*this, retTy, resvtparts);
-
- assert(Ins.size() == resvtparts.size() &&
- "Unexpected number of return values in non-ABI case");
- unsigned paramNum = 0;
+ SmallVector<EVT, 16> VTs;
+ ComputePTXValueVTs(*this, retTy, VTs);
+ assert(VTs.size() == Ins.size() && "Bad value decomposition");
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- assert(EVT(Ins[i].VT) == resvtparts[i] &&
- "Unexpected EVT type in non-ABI case");
- unsigned numelems = 1;
- EVT elemtype = Ins[i].VT;
- if (Ins[i].VT.isVector()) {
- numelems = Ins[i].VT.getVectorNumElements();
- elemtype = Ins[i].VT.getVectorElementType();
- }
- std::vector<SDValue> tempRetVals;
- for (unsigned j = 0; j < numelems; ++j) {
- EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
- SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
- DAG.getConstant(paramNum, MVT::i32),
- InFlag };
- SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
- MoveRetOps, array_lengthof(MoveRetOps));
- Chain = retval.getValue(1);
- InFlag = retval.getValue(2);
- tempRetVals.push_back(retval);
- ++paramNum;
- }
- if (Ins[i].VT.isVector())
- InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
- &tempRetVals[0], tempRetVals.size()));
- else
- InVals.push_back(tempRetVals[0]);
+ unsigned sz = VTs[i].getSizeInBits();
+ bool needTruncate = sz < 8 ? true : false;
+ if (VTs[i].isInteger() && (sz < 8))
+ sz = 8;
+
+ SmallVector<EVT, 4> LoadRetVTs;
+ if (sz < 16) {
+ // If loading i1/i8 result, generate
+ // load i8 (-> i16)
+ // trunc i16 to i1/i8
+ LoadRetVTs.push_back(MVT::i16);
+ } else
+ LoadRetVTs.push_back(Ins[i].VT);
+ LoadRetVTs.push_back(MVT::Other);
+ LoadRetVTs.push_back(MVT::Glue);
+
+ SmallVector<SDValue, 4> LoadRetOps;
+ LoadRetOps.push_back(Chain);
+ LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
+ LoadRetOps.push_back(InFlag);
+ SDValue retval = DAG.getMemIntrinsicNode(
+ NVPTXISD::LoadParam, dl,
+ DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
+ LoadRetOps.size(), VTs[i], MachinePointerInfo());
+ Chain = retval.getValue(1);
+ InFlag = retval.getValue(2);
+ SDValue Ret0 = retval.getValue(0);
+ if (needTruncate)
+ Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
+ InVals.push_back(Ret0);
+ resoffset += sz / 8;
}
}
}
+
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
DAG.getIntPtrConstant(uniqueCallSite + 1, true),
InFlag, dl);
@@ -874,8 +1295,8 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// v = ld i1* addr
// =>
-// v1 = ld i8* addr
-// v = trunc v1 to i1
+// v1 = ld i8* addr (-> i16)
+// v = trunc i16 to i1
SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(Node);
@@ -884,7 +1305,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
SDValue newLD =
- DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+ DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
@@ -942,9 +1363,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// Since StoreV2 is a target node, we cannot rely on DAG type legalization.
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
// stored type to i16 and propogate the "real" type as the memory type.
- bool NeedExt = false;
+ bool NeedSExt = false;
if (EltVT.getSizeInBits() < 16)
- NeedExt = true;
+ NeedSExt = true;
switch (NumElts) {
default:
@@ -967,10 +1388,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
for (unsigned i = 0; i < NumElts; ++i) {
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
DAG.getIntPtrConstant(i));
- if (NeedExt)
- // ANY_EXTEND is correct here since the store will only look at the
- // lower-order bits anyway.
- ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ if (NeedSExt)
+ ExtVal = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i16, ExtVal);
Ops.push_back(ExtVal);
}
@@ -994,8 +1413,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// st i1 v, addr
// =>
-// v1 = zxt v to i8
-// st i8, addr
+// v1 = zxt v to i16
+// st.u8 i16, addr
SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
@@ -1007,9 +1426,10 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
- Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
- SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
+ Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3);
+ SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo(), MVT::i8, isNonTemporal,
+ isVolatile, Alignment);
return Result;
}
@@ -1116,7 +1536,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, Ty, vtparts);
+ ComputePTXValueVTs(*this, Ty, vtparts);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
@@ -1152,7 +1572,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
- ComputeValueVTs(*this, Ty, vtparts, &offsets, 0);
+ // NOTE: Here, we lose the ability to issue vector loads for vectors
+ // that are a part of a struct. This should be investigated in the
+ // future.
+ ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
bool aggregateIsPacked = false;
if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
@@ -1172,9 +1595,15 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
aggregateIsPacked ? 1
: TD->getABITypeAlignment(
partVT.getTypeForEVT(F->getContext()));
- SDValue p = DAG.getLoad(partVT, dl, Root, srcAddr,
- MachinePointerInfo(srcValue), false, false,
- true, partAlign);
+ SDValue p;
+ if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits())
+ p = DAG.getExtLoad(ISD::SEXTLOAD, dl, Ins[InsIdx].VT, Root, srcAddr,
+ MachinePointerInfo(srcValue), partVT, false,
+ false, partAlign);
+ else
+ p = DAG.getLoad(partVT, dl, Root, srcAddr,
+ MachinePointerInfo(srcValue), false, false, false,
+ partAlign);
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
InVals.push_back(p);
@@ -1208,6 +1637,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
+ if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
+ P = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, P);
InVals.push_back(P);
Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext()));
++InsIdx;
@@ -1230,6 +1661,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
DAG.getIntPtrConstant(0));
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
DAG.getIntPtrConstant(1));
+
+ if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
+ Elt0 = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt0);
+ Elt1 = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt1);
+ }
+
InVals.push_back(Elt0);
InVals.push_back(Elt1);
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
@@ -1269,6 +1706,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
break;
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
DAG.getIntPtrConstant(j));
+ if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
+ Elt = DAG.getNode(ISD::SIGN_EXTEND, dl, Ins[InsIdx].VT, Elt);
InVals.push_back(Elt);
}
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
@@ -1282,16 +1721,19 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
}
// A plain scalar.
EVT ObjectVT = getValueType(Ty);
- assert(ObjectVT == Ins[InsIdx].VT &&
- "Ins type did not match function type");
// If ABI, load from the param symbol
SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
Value *srcValue = Constant::getNullValue(PointerType::get(
ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue p = DAG.getLoad(
- ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
- true,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ SDValue p;
+ if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits())
+ p = DAG.getExtLoad(ISD::SEXTLOAD, dl, Ins[InsIdx].VT, Root, Arg,
+ MachinePointerInfo(srcValue), ObjectVT, false, false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ else
+ p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
+ MachinePointerInfo(srcValue), false, false, false,
+ TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
InVals.push_back(p);
@@ -1360,26 +1802,38 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
unsigned NumElts = VTy->getNumElements();
assert(NumElts == Outs.size() && "Bad scalarization of return value");
+ // const_cast can be removed in later LLVM versions
+ EVT EltVT = getValueType(const_cast<Type *>(RetTy)).getVectorElementType();
+ bool NeedExtend = false;
+ if (EltVT.getSizeInBits() < 16)
+ NeedExtend = true;
+
// V1 store
if (NumElts == 1) {
SDValue StoreVal = OutVals[0];
// We only have one element, so just directly store it
- if (StoreVal.getValueType().getSizeInBits() < 8)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal);
- Chain = DAG.getNode(NVPTXISD::StoreRetval, dl, MVT::Other, Chain,
- DAG.getConstant(0, MVT::i32), StoreVal);
+ if (NeedExtend)
+ StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
+ SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
+ DAG.getVTList(MVT::Other), &Ops[0], 3,
+ EltVT, MachinePointerInfo());
+
} else if (NumElts == 2) {
// V2 store
SDValue StoreVal0 = OutVals[0];
SDValue StoreVal1 = OutVals[1];
- if (StoreVal0.getValueType().getSizeInBits() < 8) {
- StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal0);
- StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal1);
+ if (NeedExtend) {
+ StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
+ StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
}
- Chain = DAG.getNode(NVPTXISD::StoreRetvalV2, dl, MVT::Other, Chain,
- DAG.getConstant(0, MVT::i32), StoreVal0, StoreVal1);
+ SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
+ StoreVal1 };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
+ DAG.getVTList(MVT::Other), &Ops[0], 4,
+ EltVT, MachinePointerInfo());
} else {
// V4 stores
// We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
@@ -1402,10 +1856,6 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
unsigned PerStoreOffset =
TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
- bool Extend = false;
- if (OutVals[0].getValueType().getSizeInBits() < 8)
- Extend = true;
-
for (unsigned i = 0; i < NumElts; i += VecSize) {
// Get values
SDValue StoreVal;
@@ -1413,17 +1863,17 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Ops.push_back(Chain);
Ops.push_back(DAG.getConstant(Offset, MVT::i32));
unsigned Opc = NVPTXISD::StoreRetvalV2;
- EVT ExtendedVT = (Extend) ? MVT::i8 : OutVals[0].getValueType();
+ EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
StoreVal = OutVals[i];
- if (Extend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal);
+ if (NeedExtend)
+ StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
Ops.push_back(StoreVal);
if (i + 1 < NumElts) {
StoreVal = OutVals[i + 1];
- if (Extend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal);
+ if (NeedExtend)
+ StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
} else {
StoreVal = DAG.getUNDEF(ExtendedVT);
}
@@ -1433,8 +1883,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Opc = NVPTXISD::StoreRetvalV4;
if (i + 2 < NumElts) {
StoreVal = OutVals[i + 2];
- if (Extend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal);
+ if (NeedExtend)
+ StoreVal =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
} else {
StoreVal = DAG.getUNDEF(ExtendedVT);
}
@@ -1442,19 +1893,29 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (i + 3 < NumElts) {
StoreVal = OutVals[i + 3];
- if (Extend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, StoreVal);
+ if (NeedExtend)
+ StoreVal =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
} else {
StoreVal = DAG.getUNDEF(ExtendedVT);
}
Ops.push_back(StoreVal);
}
- Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
+ // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
+ Chain =
+ DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0],
+ Ops.size(), EltVT, MachinePointerInfo());
Offset += PerStoreOffset;
}
}
} else {
+ SmallVector<EVT, 16> ValVTs;
+ // const_cast is necessary since we are still using an LLVM version from
+ // before the type system re-write.
+ ComputePTXValueVTs(*this, const_cast<Type *>(RetTy), ValVTs);
+ assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
+
unsigned sizesofar = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDValue theVal = OutVals[i];
@@ -1471,13 +1932,15 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
EVT theStoreType = tmpval.getValueType();
if (theStoreType.getSizeInBits() < 8)
tmpval = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, tmpval);
- Chain = DAG.getNode(NVPTXISD::StoreRetval, dl, MVT::Other, Chain,
- DAG.getConstant(sizesofar, MVT::i32), tmpval);
+ SDValue Ops[] = { Chain, DAG.getConstant(sizesofar, MVT::i32), tmpval };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
+ DAG.getVTList(MVT::Other), &Ops[0], 3,
+ ValVTs[i], MachinePointerInfo());
if (theValType.isVector())
sizesofar +=
- theValType.getVectorElementType().getStoreSizeInBits() / 8;
+ ValVTs[i].getVectorElementType().getStoreSizeInBits() / 8;
else
- sizesofar += theValType.getStoreSizeInBits() / 8;
+ sizesofar += ValVTs[i].getStoreSizeInBits() / 8;
}
}
}
@@ -1485,6 +1948,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
}
+
void NVPTXTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
@@ -1548,9 +2012,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
- Info.memVT = MVT::i32;
+ Info.memVT = getValueType(I.getType());
else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getValueType(I.getType());
else
Info.memVT = MVT::f32;
Info.ptrVal = I.getArgOperand(0);
@@ -1635,7 +2099,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'c':
- return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
+ return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
case 'h':
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
case 'r':
@@ -1775,7 +2239,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
unsigned NumElts = ResVT.getVectorNumElements();
EVT EltVT = ResVT.getVectorElementType();
- // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
+ // Since LDU/LDG are target nodes, we cannot rely on DAG type
+ // legalization.
// Therefore, we must ensure the type is legal. For i1 and i8, we set the
// loaded type to i16 and propogate the "real" type as the memory type.
bool NeedTrunc = false;
@@ -1834,7 +2299,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
OtherOps.push_back(Chain); // Chain
// Skip operand 1 (intrinsic ID)
- // Others
+ // Others
for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
OtherOps.push_back(N->getOperand(i));
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index b0dad0f0d7..43c63ae739 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -35,14 +35,6 @@ enum NodeType {
DeclareRetParam,
DeclareRet,
DeclareScalarRet,
- LoadParam,
- LoadParamV2,
- LoadParamV4,
- StoreParam,
- StoreParamV2,
- StoreParamV4,
- StoreParamS32, // to sext and store a <32bit value, not used currently
- StoreParamU32, // to zext and store a <32bit value, not used currently
MoveToParam,
PrintCall,
PrintCallUni,
@@ -57,9 +49,6 @@ enum NodeType {
MoveParam,
MoveRetval,
MoveToRetval,
- StoreRetval,
- StoreRetvalV2,
- StoreRetvalV4,
PseudoUseParam,
RETURN,
CallSeqBegin,
@@ -73,7 +62,18 @@ enum NodeType {
LDUV2, // LDU.v2
LDUV4, // LDU.v4
StoreV2,
- StoreV4
+ StoreV4,
+ LoadParam,
+ LoadParamV2,
+ LoadParamV4,
+ StoreParam,
+ StoreParamV2,
+ StoreParamV4,
+ StoreParamS32, // to sext and store a <32bit value, not used currently
+ StoreParamU32, // to zext and store a <32bit value, not used currently
+ StoreRetval,
+ StoreRetvalV2,
+ StoreRetvalV4
};
}
@@ -126,7 +126,8 @@ public:
std::string getPrototype(Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
- unsigned retAlignment) const;
+ unsigned retAlignment,
+ const ImmutableCallSite *CS) const;
virtual SDValue
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -164,6 +165,9 @@ private:
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
+
+ unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
+ Type *Ty, unsigned Idx) const;
};
} // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 80af163a49..b406aa9288 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -51,9 +51,6 @@ void NVPTXInstrInfo::copyPhysReg(
else if (DestRC == &NVPTX::Int16RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int8RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
else if (DestRC == &NVPTX::Int64RegsRegClass)
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index c980237408..965af511e1 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -82,101 +82,6 @@ def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
def true : Predicate<"1">;
-//===----------------------------------------------------------------------===//
-// Special Handling for 8-bit Operands and Operations
-//
-// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
-// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
-// 8-bit registers.
-//
-// PTX ld, st and cvt instructions permit source and destination data operands
-// to be wider than the instruction-type size, so that narrow values may be
-// loaded, stored, and converted using regular-width registers.
-//
-// So in PTX generation, we
-// - always use 16-bit registers in place in 8-bit registers.
-// (8-bit variables should stay as 8-bit as they represent memory layout.)
-// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
-// before operation
-// . div
-// . rem
-// . neg (sign)
-// . set, setp
-// . shr
-//
-// We are patching the operations by inserting the cvt instructions in the
-// asm strings of the affected instructions.
-//
-// Since vector operations, except for ld/st, are eventually elementized. We
-// do not need to special-hand the vector 8-bit operations.
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Generate string block like
-// {
-// .reg .s16 %temp1;
-// .reg .s16 %temp2;
-// cvt.s16.s8 %temp1, %a;
-// cvt.s16.s8 %temp2, %b;
-// opc.s16 %dst, %temp1, %temp2;
-// }
-// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
-class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
- string s = !strconcat("{{\n\t",
- !strconcat(".reg .", !strconcat(TypeStr,
- !strconcat(" \t%temp1;\n\t",
- !strconcat(".reg .", !strconcat(TypeStr,
- !strconcat(" \t%temp2;\n\t",
- !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
- !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
- !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
-}
-
-// Generate string block like
-// {
-// .reg .s16 %temp1;
-// .reg .s16 %temp2;
-// cvt.s16.s8 %temp1, %a;
-// mov.b16 %temp2, %b;
-// cvt.s16.s8 %temp2, %temp2;
-// opc.s16 %dst, %temp1, %temp2;
-// }
-// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
-class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
- string s = !strconcat("{{\n\t",
- !strconcat(".reg .", !strconcat(TypeStr,
- !strconcat(" \t%temp1;\n\t",
- !strconcat(".reg .",
- !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
- !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
- !strconcat("mov.b16 \t%temp2, $b;\n\t",
- !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
- !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
-}
-
-// Generate string block like
-// {
-// .reg .s16 %temp1;
-// .reg .s16 %temp2;
-// mov.b16 %temp1, %b;
-// cvt.s16.s8 %temp1, %temp1;
-// cvt.s16.s8 %temp2, %a;
-// opc.s16 %dst, %temp1, %temp2;
-// }
-// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
-class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
- string s = !strconcat("{{\n\t",
- !strconcat(".reg .", !strconcat(TypeStr,
- !strconcat(" \t%temp1;\n\t",
- !strconcat(".reg .", !strconcat(TypeStr,
- !strconcat(" \t%temp2;\n\t",
- !strconcat("mov.b16 \t%temp1, $a;\n\t",
- !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
- !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
- !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
-}
-
//===----------------------------------------------------------------------===//
// Some Common Instruction Class Templates
@@ -204,66 +109,6 @@ multiclass I3<string OpcStr, SDNode OpNode> {
def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
- def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
- def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
-}
-
-multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
- def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- Int64Regs:$b))]>;
- def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
- def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- Int16Regs:$b))]>;
- def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
- def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
- def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
-}
-
-multiclass I3_noi8<string OpcStr, SDNode OpNode> {
- def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
- Int64Regs:$b))]>;
- def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
- Int32Regs:$b))]>;
- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
- def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
- Int16Regs:$b))]>;
- def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
}
multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
@@ -522,81 +367,17 @@ def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
defm MULT : I3<"mul.lo.s", mul>;
-defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
-defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
-def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- !strconcat("{{ \n\t",
- !strconcat(".reg \t.s16 temp1; \n\t",
- !strconcat(".reg \t.s16 temp2; \n\t",
- !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
- !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
- !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
- !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
- !strconcat("}}", "")))))))),
- [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
-def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- !strconcat("{{ \n\t",
- !strconcat(".reg \t.s16 temp1; \n\t",
- !strconcat(".reg \t.s16 temp2; \n\t",
- !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
- !strconcat("mov.b16 \ttemp2, $b; \n\t",
- !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
- !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
- !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
- !strconcat("}}", ""))))))))),
- [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
-def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- !strconcat("{{ \n\t",
- !strconcat(".reg \t.u16 temp1; \n\t",
- !strconcat(".reg \t.u16 temp2; \n\t",
- !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
- !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
- !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
- !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
- !strconcat("}}", "")))))))),
- [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
-def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- !strconcat("{{ \n\t",
- !strconcat(".reg \t.u16 temp1; \n\t",
- !strconcat(".reg \t.u16 temp2; \n\t",
- !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
- !strconcat("mov.b16 \ttemp2, $b; \n\t",
- !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
- !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
- !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
- !strconcat("}}", ""))))))))),
- [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
-
-
-defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
-defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
-
-defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
+defm MULTHS : I3<"mul.hi.s", mulhs>;
+defm MULTHU : I3<"mul.hi.u", mulhu>;
+
+defm SDIV : I3<"div.s", sdiv>;
+defm UDIV : I3<"div.u", udiv>;
+
+defm SREM : I3<"rem.s", srem>;
// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
-defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
+defm UREM : I3<"rem.u", urem>;
// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
-def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
- (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
- Int8Regs:$c))]>;
-def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
- (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
- imm:$c))]>;
-def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
- (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
- Int8Regs:$c))]>;
-def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
- (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
- "mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
- imm:$c))]>;
-
def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
@@ -661,10 +442,6 @@ def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
(mul Int64Regs:$a, imm:$b), imm:$c))]>;
-def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
- !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
- "neg.s16 \t$dst, $dst;"),
- [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"neg.s16 \t$dst, $src;",
[(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
@@ -974,12 +751,6 @@ multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
!strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
[(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
- def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
- def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a,
@@ -1010,9 +781,6 @@ defm XOR : LOG_FORMAT<"xor", xor>;
def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
"not.pred \t$dst, $src;",
[(set Int1Regs:$dst, (not Int1Regs:$src))]>;
-def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
- "not.b16 \t$dst, $src;",
- [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"not.b16 \t$dst, $src;",
[(set Int16Regs:$dst, (not Int16Regs:$src))]>;
@@ -1056,14 +824,6 @@ multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a,
(i32 imm:$b)))]>;
- def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
- Int32Regs:$b))]>;
- def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
- (i32 imm:$b)))]>;
}
defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
@@ -1102,16 +862,6 @@ multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a,
(i32 imm:$b)))]>;
- def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
- !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
- !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
- Int32Regs:$b))]>;
- def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
- !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
- !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
- [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
- (i32 imm:$b)))]>;
}
defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
@@ -1257,8 +1007,6 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
let IsSimpleMove=1 in {
def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
"mov.pred \t$dst, $sss;", []>;
-def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
- "mov.u16 \t$dst, $sss;", []>;
def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
"mov.u16 \t$dst, $sss;", []>;
def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
@@ -1274,9 +1022,6 @@ def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
"mov.pred \t$dst, $src;",
[(set Int1Regs:$dst, imm:$src)]>;
-def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
- "mov.u16 \t$dst, $src;",
- [(set Int8Regs:$dst, imm:$src)]>;
def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
"mov.u16 \t$dst, $src;",
[(set Int16Regs:$dst, imm:$src)]>;
@@ -1331,47 +1076,8 @@ class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
string s = !strconcat(t11, ", -1, 0, p;\n\t}}");
}
-// Generate string block like
-// {
-// .reg .pred p;
-// .reg .s16 %temp1;
-// .reg .s16 %temp2;
-// cvt.s16.s8 %temp1, %a;
-// cvt s16.s8 %temp1, %b;
-// setp.gt.s16 p, %temp1, %temp2;
-// selp.s16 %dst, -1, 0, p;
-// }
-// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
-class Set_Stri8<string OpcStr, string d, string a, string b, string type,
- string cvt> {
- string t1 = "{{\n\t.reg .pred p;\n\t";
- string t2 = !strconcat(t1, ".reg .");
- string t3 = !strconcat(t2, type);
- string t4 = !strconcat(t3, " %temp1;\n\t");
- string t5 = !strconcat(t4, ".reg .");
- string t6 = !strconcat(t5, type);
- string t7 = !strconcat(t6, " %temp2;\n\t");
- string t8 = !strconcat(t7, cvt);
- string t9 = !strconcat(t8, " \t%temp1, ");
- string t10 = !strconcat(t9, a);
- string t11 = !strconcat(t10, ";\n\t");
- string t12 = !strconcat(t11, cvt);
- string t13 = !strconcat(t12, " \t%temp2, ");
- string t14 = !strconcat(t13, b);
- string t15 = !strconcat(t14, ";\n\t");
- string t16 = !strconcat(t15, OpcStr);
- string t17 = !strconcat(t16, "16");
- string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
- string t19 = !strconcat(t18, "selp.s16 \t");
- string t20 = !strconcat(t19, d);
- string s = !strconcat(t20, ", -1, 0, p;\n\t}}");
-}
-
multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
string TypeStr, string CVTStr> {
- def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
- []>;
def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
Int16Regs:$b),
Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
@@ -1385,15 +1091,6 @@ multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
[]>;
- def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
- [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
- def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
- [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
- def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
- Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
- [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
@@ -1422,15 +1119,6 @@ multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
[(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
- def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
- Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
- [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
- def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
- Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
- [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
- def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
- Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
- [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
Int16Regs:$b),
!strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
@@ -1639,22 +1327,6 @@ defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
-def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
- (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
- "selp.b16 \t$dst, $a, $b, $p;",
- [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
-def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
- (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
- "selp.b16 \t$dst, $a, $b, $p;",
- [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
-def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
- (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
- "selp.b16 \t$dst, $a, $b, $p;",
- [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
-def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
- (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
- "selp.b16 \t$dst, $a, $b, $p;",
- [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
@@ -1838,7 +1510,7 @@ class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat(!strconcat("ld.param", opstr),
"\t$dst, [retval0+$b];"),
- [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+ []>;
class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
@@ -1846,8 +1518,6 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
"\t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
-// FIXME: A bug in tablegen currently prevents us from using multi-output
-// patterns here, so we have to custom select these in C++.
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
!strconcat(!strconcat("ld.param.v2", opstr),
@@ -1864,24 +1534,21 @@ class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
!strconcat(!strconcat("st.param", opstr),
"\t[param$a+$b], $val;"),
- [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
+ []>;
class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
i32imm:$a, i32imm:$b),
!strconcat(!strconcat("st.param.v2", opstr),
"\t[param$a+$b], {{$val, $val2}};"),
- [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), regclass:$val,
- regclass:$val2)]>;
+ []>;
class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
regclass:$val3, i32imm:$a, i32imm:$b),
!strconcat(!strconcat("st.param.v4", opstr),
"\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
- [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), regclass:$val,
- regclass:$val2, regclass:$val3,
- regclass:$val4)]>;
+ []>;
class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
@@ -1893,13 +1560,13 @@ class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
!strconcat(!strconcat("st.param", opstr),
"\t[func_retval0+$a], $val;"),
- [(StoreRetval (i32 imm:$a), regclass:$val)]>;
+ []>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
!strconcat(!strconcat("st.param.v2", opstr),
"\t[func_retval0+$a], {{$val, $val2}};"),
- [(StoreRetvalV2 (i32 imm:$a), regclass:$val, regclass:$val2)]>;
+ []>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
@@ -1907,8 +1574,7 @@ class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
regclass:$val4, i32imm:$a),
!strconcat(!strconcat("st.param.v4", opstr),
"\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
- [(StoreRetvalV4 (i32 imm:$a), regclass:$val, regclass:$val2,
- regclass:$val3, regclass:$val4)]>;
+ []>;
class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
@@ -1983,29 +1649,19 @@ def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
-def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">;
-def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">;
-def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">;
-def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">;
-def LoadParamMemV2I8 : LoadParamV2MemInst<Int8Regs, ".b8">;
-def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
-def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
-def LoadParamMemV4I8 : LoadParamV4MemInst<Int8Regs, ".b8">;
-
-//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
-// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
-// "cvt.u16.u32\t$dst, temp_param_reg;"),
-// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
-//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
-// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
-// "cvt.u16.u32\t$dst, temp_param_reg;"),
-// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
-
+def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">;
+def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">;
+def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">;
+def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">;
+def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
+def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
+def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
+def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
-def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
-def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
-def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
+def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
+def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
+def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">;
def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">;
@@ -2013,10 +1669,6 @@ def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
"cvt.u16.u32\t$dst, retval$b;",
[(set Int16Regs:$dst,
(LoadParam (i32 0), (i32 imm:$b)))]>;
-def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
- "cvt.u16.u32\t$dst, retval$b;",
- [(set Int8Regs:$dst,
- (LoadParam (i32 0), (i32 imm:$b)))]>;
def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">;
def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
@@ -2024,31 +1676,12 @@ def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">;
def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">;
-def StoreParamI16 : NVPTXInst<(outs),
- (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
- "st.param.b16\t[param$a+$b], $val;",
- [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
-
-def StoreParamI8 : NVPTXInst<(outs),
- (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
- "st.param.b8\t[param$a+$b], $val;",
- [(StoreParam
- (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
-
-def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">;
-def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">;
-
-def StoreParamV2I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
- i32imm:$a, i32imm:$b),
- "st.param.v2.b16\t[param$a+$b], {{$val, $val2}};",
- [(StoreParamV2 (i32 imm:$a), (i32 imm:$b),
- Int16Regs:$val, Int16Regs:$val2)]>;
-
-def StoreParamV2I8 : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2,
- i32imm:$a, i32imm:$b),
- "st.param.v2.b8\t[param$a+$b], {{$val, $val2}};",
- [(StoreParamV2 (i32 imm:$a), (i32 imm:$b),
- Int8Regs:$val, Int8Regs:$val2)]>;
+def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">;
+def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">;
+def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">;
+def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">;
+def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">;
+def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">;
// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
//def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
@@ -2056,47 +1689,41 @@ def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
Int32Regs:$val3, Int32Regs:$val4,
i32imm:$a, i32imm:$b),
"st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
- Int32Regs:$val, Int32Regs:$val2,
- Int32Regs:$val3, Int32Regs:$val4)]>;
+ []>;
def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
Int16Regs:$val3, Int16Regs:$val4,
i32imm:$a, i32imm:$b),
"st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
- Int16Regs:$val, Int16Regs:$val2,
- Int16Regs:$val3, Int16Regs:$val4)]>;
+ []>;
-def StoreParamV4I8 : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2,
- Int8Regs:$val3, Int8Regs:$val4,
+def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
+ Int16Regs:$val3, Int16Regs:$val4,
i32imm:$a, i32imm:$b),
"st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
- Int8Regs:$val, Int8Regs:$val2,
- Int8Regs:$val3, Int8Regs:$val4)]>;
+ []>;
def StoreParamS32I16 : NVPTXInst<(outs),
(ins Int16Regs:$val, i32imm:$a, i32imm:$b),
!strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
"st.param.b32\t[param$a+$b], temp_param_reg;"),
- [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+ []>;
def StoreParamU32I16 : NVPTXInst<(outs),
(ins Int16Regs:$val, i32imm:$a, i32imm:$b),
!strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
"st.param.b32\t[param$a+$b], temp_param_reg;"),
- [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+ []>;
def StoreParamU32I8 : NVPTXInst<(outs),
- (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
!strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
"st.param.b32\t[param$a+$b], temp_param_reg;"),
- [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+ []>;
def StoreParamS32I8 : NVPTXInst<(outs),
- (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+ (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
!strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
"st.param.b32\t[param$a+$b], temp_param_reg;"),
- [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+ []>;
def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
@@ -2109,9 +1736,7 @@ def StoreParamV4F32 : NVPTXInst<(outs),
Float32Regs:$val3, Float32Regs:$val4,
i32imm:$a, i32imm:$b),
"st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
- [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
- Float32Regs:$val, Float32Regs:$val2,
- Float32Regs:$val3, Float32Regs:$val4)]>;
+ []>;
def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">;
def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">;
@@ -2122,36 +1747,18 @@ def MoveToParamI16 : NVPTXInst<(outs),
!strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
"mov.b32\tparam$a, temp_param_reg;"),
[(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
-def MoveToParamI8 : NVPTXInst<(outs),
- (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
- !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
- "mov.b32\tparam$a, temp_param_reg;"),
- [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
-def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">;
+def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">;
def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">;
def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">;
def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">;
-def StoreRetvalV2I8 : StoreRetvalV2Inst<Int8Regs, ".b8">;
+def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">;
def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">;
def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">;
-def StoreRetvalV4I8 : StoreRetvalV4Inst<Int8Regs, ".b8">;
-
-//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
-// !strconcat("\{\n\t",
-// !strconcat(".reg .b32 temp_retval_reg;\n\t",
-// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
-// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
-// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
-//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
-// !strconcat("\{\n\t",
-// !strconcat(".reg .b32 temp_retval_reg;\n\t",
-// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
-// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
-// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
+def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
@@ -2162,7 +1769,7 @@ def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">;
def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">;
def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">;
-def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">;
+def MoveRetvalI8 : MoveRetvalInst<Int16Regs, ".b8">;
def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">;
def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">;
@@ -2173,9 +1780,6 @@ def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">;
def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
"cvt.u32.u16\tfunc_retval$num, $val;",
[(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
-def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
- "cvt.u32.u16\tfunc_retval$num, $val;",
- [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2193,7 +1797,6 @@ class LastCallArgInst<NVPTXRegClass regclass> :
def CallArgI64 : CallArgInst<Int64Regs>;
def CallArgI32 : CallArgInst<Int32Regs>;
def CallArgI16 : CallArgInst<Int16Regs>;
-def CallArgI8 : CallArgInst<Int8Regs>;
def CallArgF64 : CallArgInst<Float64Regs>;
def CallArgF32 : CallArgInst<Float32Regs>;
@@ -2201,7 +1804,6 @@ def CallArgF32 : CallArgInst<Float32Regs>;
def LastCallArgI64 : LastCallArgInst<Int64Regs>;
def LastCallArgI32 : LastCallArgInst<Int32Regs>;
def LastCallArgI16 : LastCallArgInst<Int16Regs>;
-def LastCallArgI8 : LastCallArgInst<Int8Regs>;
def LastCallArgF64 : LastCallArgInst<Float64Regs>;
def LastCallArgF32 : LastCallArgInst<Float32Regs>;
@@ -2261,9 +1863,6 @@ def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"cvt.u16.u32\t$dst, $src;",
[(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
-def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
- "cvt.u16.u32\t$dst, $src;",
- [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
@@ -2275,7 +1874,6 @@ class PseudoUseParamInst<NVPTXRegClass regclass> :
def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
-def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>;
def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
@@ -2317,7 +1915,7 @@ multiclass LD<NVPTXRegClass regclass> {
}
let mayLoad=1, neverHasSideEffects=1 in {
-defm LD_i8 : LD<Int8Regs>;
+defm LD_i8 : LD<Int16Regs>;
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
defm LD_i64 : LD<Int64Regs>;
@@ -2359,7 +1957,7 @@ multiclass ST<NVPTXRegClass regclass> {
}
let mayStore=1, neverHasSideEffects=1 in {
-defm ST_i8 : ST<Int8Regs>;
+defm ST_i8 : ST<Int16Regs>;
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
defm ST_i64 : ST<Int64Regs>;
@@ -2443,7 +2041,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
[]>;
}
let mayLoad=1, neverHasSideEffects=1 in {
-defm LDV_i8 : LD_VEC<Int8Regs>;
+defm LDV_i8 : LD_VEC<Int16Regs>;
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
defm LDV_i64 : LD_VEC<Int64Regs>;
@@ -2526,7 +2124,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
[]>;
}
let mayStore=1, neverHasSideEffects=1 in {
-defm STV_i8 : ST_VEC<Int8Regs>;
+defm STV_i8 : ST_VEC<Int16Regs>;
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
defm STV_i64 : ST_VEC<Int64Regs>;
@@ -2539,10 +2137,6 @@ defm STV_f64 : ST_VEC<Float64Regs>;
multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
// FIXME: need to add f16 support
-// def CVTf16i8 :
-// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
-// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
-// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
// def CVTf16i16 :
// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
@@ -2560,10 +2154,6 @@ multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
"selp.f32 \t$d, 1.0, 0.0, $a;",
[(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
- def CVTf32i8 :
- NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
- !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
- [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
def CVTf32i16 :
NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
!strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
@@ -2581,10 +2171,6 @@ multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
"selp.f64 \t$d, 1.0, 0.0, $a;",
[(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
- def CVTf64i8 :
- NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
- !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
- [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
def CVTf64i16 :
NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
!strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
@@ -2604,24 +2190,6 @@ defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
// FIXME: need to add f16 support
-// def CVTi8f16:
-// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
-// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
-// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
- def CVTi8f32_ftz:
- NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
- !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
- [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
- def CVTi8f32:
- NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
- !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
- [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
- def CVTi8f64:
- NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
- !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
- [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
-
-// FIXME: need to add f16 support
// def CVTi16f16:
// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
@@ -2680,10 +2248,6 @@ defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
- def ext1to8:
- NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
- "selp.u16 \t$d, 1, 0, $a;",
- [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
def ext1to16:
NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
"selp.u16 \t$d, 1, 0, $a;",
@@ -2699,10 +2263,6 @@ multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
}
multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
- def ext1to8:
- NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
- "selp.s16 \t$d, -1, 0, $a;",
- [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
def ext1to16:
NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
"selp.s16 \t$d, -1, 0, $a;",
@@ -2718,23 +2278,6 @@ multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
}
multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
- // All Int8Regs are emiited as 16bit registers in ptx.
- // And there is no selp.u8 in ptx.
- def ext8to16:
- NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
- !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
- !strconcat(OpStr, "8 \t$d, $a;")))),
- [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
- def ext8to32:
- NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
- !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
- !strconcat(OpStr, "8 \t$d, $a;")))),
- [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
- def ext8to64:
- NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
- !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
- !strconcat(OpStr, "8 \t$d, $a;")))),
- [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
def ext16to32:
NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
!strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
@@ -2778,18 +2321,9 @@ def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
"cvt.u16.u64 \t$d, $a;",
[(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
-def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
- "cvt.u8.u64 \t$d, $a;",
- [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
"cvt.u16.u32 \t$d, $a;",
[(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
-def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
- "cvt.u8.u32 \t$d, $a;",
- [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
-def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
- "cvt.u8.u16 \t$d, $a;",
- [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
TRUNC_to1_asm<".b64">.s,
[(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
@@ -2799,13 +2333,8 @@ def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
TRUNC_to1_asm<".b16">.s,
[(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
-def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
- TRUNC_to1_asm<".b16">.s,
- [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
// Select instructions
-def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
- (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
(SELECTi16rr Int16Regs:$a, Int16Regs:$b,
(TRUNC_32to1 Int32Regs:$pred))>;
@@ -2834,28 +2363,11 @@ def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
// pack a set of smaller int registers to a larger int register
-def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
- (ins Int8Regs:$s1, Int8Regs:$s2,
- Int8Regs:$s3, Int8Regs:$s4),
- !strconcat("{{\n\t.reg .b8\t%t<4>;",
- !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
- !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
- !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
- !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
- "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
- []>;
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
Int16Regs:$s3, Int16Regs:$s4),
"mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
[]>;
-def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
- (ins Int8Regs:$s1, Int8Regs:$s2),
- !strconcat("{{\n\t.reg .b8\t%t<2>;",
- !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
- !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
- "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
- []>;
def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2),
"mov.b32\t$d, {{$s1, $s2}};",
@@ -2870,28 +2382,11 @@ def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
[]>;
// unpack a larger int register to a set of smaller int registers
-def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
- Int8Regs:$d3, Int8Regs:$d4),
- (ins Int32Regs:$s),
- !strconcat("{{\n\t.reg .b8\t%t<4>;",
- !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
- !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
- !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
- !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
- "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
- []>;
def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
Int16Regs:$d3, Int16Regs:$d4),
(ins Int64Regs:$s),
"mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
[]>;
-def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
- (ins Int16Regs:$s),
- !strconcat("{{\n\t.reg .b8\t%t<2>;",
- !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
- !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
- "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
- []>;
def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
(ins Int32Regs:$s),
"mov.b32\t{{$d1, $d2}}, $s;",
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 24037cafef..caa7775a4c 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1270,6 +1270,11 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
// Support for ldu on sm_20 or later
//-----------------------------------
+def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ return M->getMemoryVT() == MVT::i8;
+}]>;
+
// Scalar
// @TODO: Revisit this, Changed imemAny to imem
multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
@@ -1291,8 +1296,27 @@ multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
[(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
}
-defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int8Regs,
-int_nvvm_ldu_global_i>;
+multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
+ def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
+ def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
+ Requires<[hasLDU]>;
+ def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
+ def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
+ !strconcat("ldu.global.", TyStr),
+ [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
+}
+
+defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs,
+ ldu_i8>;
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
int_nvvm_ldu_global_i>;
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
@@ -1330,7 +1354,7 @@ multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
}
defm INT_PTX_LDU_G_v2i8_ELE
- : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int8Regs>;
+ : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v2i16_ELE
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v2i32_ELE
@@ -1342,7 +1366,7 @@ defm INT_PTX_LDU_G_v2i64_ELE
defm INT_PTX_LDU_G_v2f64_ELE
: VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
defm INT_PTX_LDU_G_v4i8_ELE
- : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>;
+ : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v4i16_ELE
: VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Int16Regs>;
@@ -1542,10 +1566,6 @@ def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
// nvvm.move intrinsicc
-def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s),
- "mov.b16 \t$r, $s;",
- [(set Int8Regs:$r,
- (int_nvvm_move_i8 Int8Regs:$s))]>;
def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
"mov.b16 \t$r, $s;",
[(set Int16Regs:$r,
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index b749b05315..4d3a1d9b40 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -38,10 +38,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
return ".s32";
} else if (RC == &NVPTX::Int16RegsRegClass) {
return ".s16";
- }
- // Int8Regs become 16-bit registers in PTX
- else if (RC == &NVPTX::Int8RegsRegClass) {
- return ".s16";
} else if (RC == &NVPTX::Int1RegsRegClass) {
return ".pred";
} else if (RC == &NVPTX::SpecialRegsRegClass) {
@@ -64,8 +60,6 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
return "%r";
} else if (RC == &NVPTX::Int16RegsRegClass) {
return "%rs";
- } else if (RC == &NVPTX::Int8RegsRegClass) {
- return "%rc";
} else if (RC == &NVPTX::Int1RegsRegClass) {
return "%p";
} else if (RC == &NVPTX::SpecialRegsRegClass) {
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 8d100d6316..bc705b8d5f 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -31,7 +31,6 @@ def VRDepot : NVPTXReg<"%Depot">;
foreach i = 0-395 in {
def P#i : NVPTXReg<"%p"#i>; // Predicate
- def RC#i : NVPTXReg<"%rc"#i>; // 8-bit
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
@@ -49,7 +48,6 @@ foreach i = 0-395 in {
// Register classes
//===----------------------------------------------------------------------===//
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>;
-def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>;
def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>;
def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>;
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>;
diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll
index 16af0a336d..e929f24ddb 100644
--- a/test/CodeGen/NVPTX/compare-int.ll
+++ b/test/CodeGen/NVPTX/compare-int.ll
@@ -288,8 +288,8 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -298,8 +298,8 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -308,8 +308,8 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) {
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ugt i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -318,8 +318,8 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp uge i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -328,8 +328,8 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) {
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ult i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -338,8 +338,8 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) {
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ule i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -348,8 +348,8 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) {
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sgt i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -358,8 +358,8 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sge i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -368,8 +368,8 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) {
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp slt i8 %a, %b
%ret = zext i1 %cmp to i8
@@ -378,8 +378,8 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) {
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %temp{{[0-9]+}}, %temp{{[0-9]+}}
-; CHECK: selp.u16 %rc{{[0-9]+}}, 1, 0, %p[[P0]]
+; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: selp.u16 %rs{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp sle i8 %a, %b
%ret = zext i1 %cmp to i8
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index 3265868d3c..204ae7b1fb 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -4,27 +4,27 @@
;; i8
define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
-; PTX32: ld.global.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
-; PTX64: ld.global.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.global.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(1)* %ptr
ret i8 %a
}
define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
-; PTX32: ld.shared.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.shared.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
-; PTX64: ld.shared.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.shared.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(3)* %ptr
ret i8 %a
}
define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
-; PTX32: ld.local.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.local.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
-; PTX64: ld.local.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.local.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(5)* %ptr
ret i8 %a
diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll
index 81a5216f96..f811a37191 100644
--- a/test/CodeGen/NVPTX/ld-generic.ll
+++ b/test/CodeGen/NVPTX/ld-generic.ll
@@ -4,9 +4,9 @@
;; i8
define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
-; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
; PTX32: ret
-; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
; PTX64: ret
%a = load i8 addrspace(0)* %ptr
ret i8 %a
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
index 779f7798d8..a5526f8ad7 100644
--- a/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -2,21 +2,21 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
define ptx_kernel void @t1(i1* %a) {
-; PTX32: mov.u16 %rc{{[0-9]+}}, 0;
-; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}};
-; PTX64: mov.u16 %rc{{[0-9]+}}, 0;
-; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}};
+; PTX32: mov.u16 %rs{{[0-9]+}}, 0;
+; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
+; PTX64: mov.u16 %rs{{[0-9]+}}, 0;
+; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}};
store i1 false, i1* %a
ret void
}
define ptx_kernel void @t2(i1* %a, i8* %b) {
-; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
-; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1;
+; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
+; PTX32: and.b16 temp, %rs{{[0-9]+}}, 1;
; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
-; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
-; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1;
+; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
+; PTX64: and.b16 temp, %rs{{[0-9]+}}, 1;
; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
%t1 = load i1* %a
diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll
index 0b26d802df..68c09fe065 100644
--- a/test/CodeGen/NVPTX/st-addrspace.ll
+++ b/test/CodeGen/NVPTX/st-addrspace.ll
@@ -5,27 +5,27 @@
;; i8
define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
-; PTX32: st.global.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
-; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(1)* %ptr
ret void
}
define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
-; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
-; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(3)* %ptr
ret void
}
define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
-; PTX32: st.local.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.local.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
-; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(5)* %ptr
ret void
diff --git a/test/CodeGen/NVPTX/st-generic.ll b/test/CodeGen/NVPTX/st-generic.ll
index 59a1fe0211..b9c616fbd1 100644
--- a/test/CodeGen/NVPTX/st-generic.ll
+++ b/test/CodeGen/NVPTX/st-generic.ll
@@ -5,9 +5,9 @@
;; i8
define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
-; PTX32: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX32: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
; PTX32: ret
-; PTX64: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}}
+; PTX64: st.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
; PTX64: ret
store i8 %a, i8 addrspace(0)* %ptr
ret void