1 files changed, 52 insertions, 557 deletions
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index c980237408..965af511e1 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -82,101 +82,6 @@ def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
 
 def true : Predicate<"1">;
 
-//===----------------------------------------------------------------------===//
-// Special Handling for 8-bit Operands and Operations
-//
-// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
-// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
-// 8-bit registers.
-//
-// PTX ld, st and cvt instructions permit source and destination data operands
-// to be wider than the instruction-type size, so that narrow values may be
-// loaded, stored, and converted using regular-width registers.
-//
-// So in PTX generation, we
-// - always use 16-bit registers in place in 8-bit registers.
-//   (8-bit variables should stay as 8-bit as they represent memory layout.)
-// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
-//   before operation
-//   . div
-//   . rem
-//   . neg (sign)
-//   . set, setp
-//   . shr
-//
-// We are patching the operations by inserting the cvt instructions in the
-// asm strings of the affected instructions.
-//
-// Since vector operations, except for ld/st, are eventually elementized. We
-// do not need to special-hand the vector 8-bit operations.
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Generate string block like
-// {
-//   .reg .s16 %temp1;
-//   .reg .s16 %temp2;
-//   cvt.s16.s8 %temp1, %a;
-//   cvt.s16.s8 %temp2, %b;
-//   opc.s16    %dst, %temp1, %temp2;
-// }
-// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
-class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
-  string s = !strconcat("{{\n\t",
-             !strconcat(".reg .", !strconcat(TypeStr,
-             !strconcat(" \t%temp1;\n\t",
-             !strconcat(".reg .", !strconcat(TypeStr,
-             !strconcat(" \t%temp2;\n\t",
-             !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
-             !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
-             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
-}
-
-// Generate string block like
-// {
-//   .reg .s16 %temp1;
-//   .reg .s16 %temp2;
-//   cvt.s16.s8 %temp1, %a;
-//   mov.b16    %temp2, %b;
-//   cvt.s16.s8 %temp2, %temp2;
-//   opc.s16    %dst, %temp1, %temp2;
-// }
-// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
-class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
-  string s = !strconcat("{{\n\t",
-             !strconcat(".reg .", !strconcat(TypeStr,
-             !strconcat(" \t%temp1;\n\t",
-             !strconcat(".reg .",
-             !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
-             !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
-             !strconcat("mov.b16 \t%temp2, $b;\n\t",
-             !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
-             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
-}
-
-// Generate string block like
-// {
-//   .reg .s16 %temp1;
-//   .reg .s16 %temp2;
-//   mov.b16    %temp1, %b;
-//   cvt.s16.s8 %temp1, %temp1;
-//   cvt.s16.s8 %temp2, %a;
-//   opc.s16    %dst, %temp1, %temp2;
-// }
-// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
-class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
-  string s = !strconcat("{{\n\t",
-             !strconcat(".reg .", !strconcat(TypeStr,
-             !strconcat(" \t%temp1;\n\t",
-             !strconcat(".reg .", !strconcat(TypeStr,
-             !strconcat(" \t%temp2;\n\t",
-             !strconcat("mov.b16 \t%temp1, $a;\n\t",
-             !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
-             !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
-             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
-}
-
 
 //===----------------------------------------------------------------------===//
 // Some Common Instruction Class Templates
@@ -204,66 +109,6 @@ multiclass I3<string OpcStr, SDNode OpNode> {
   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
-  def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
-  def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
-}
-
-multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
-  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
-                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
-                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
-                       Int64Regs:$b))]>;
-  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
-                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
-                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
-  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
-                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
-                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
-                       Int32Regs:$b))]>;
-  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
-                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
-                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
-  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
-                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
-                       Int16Regs:$b))]>;
-  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
-                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
-  def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-                     Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
-                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
-  def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-                     Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
-                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
-}
-
-multiclass I3_noi8<string OpcStr, SDNode OpNode> {
-  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
-                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
-                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
-                       Int64Regs:$b))]>;
-  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
-                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
-                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
-  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
-                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
-                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
-                       Int32Regs:$b))]>;
-  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
-                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
-                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
-  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
-                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
-                       Int16Regs:$b))]>;
-  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
-                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 }
 
 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
@@ -522,81 +367,17 @@ def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
 
 defm MULT : I3<"mul.lo.s", mul>;
 
-defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
-defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
-def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-            !strconcat("{{ \n\t",
-            !strconcat(".reg \t.s16 temp1; \n\t",
-            !strconcat(".reg \t.s16 temp2; \n\t",
-            !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
-            !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
-            !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
-            !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
-            !strconcat("}}", "")))))))),
-      [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
-def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-            !strconcat("{{ \n\t",
-            !strconcat(".reg \t.s16 temp1; \n\t",
-            !strconcat(".reg \t.s16 temp2; \n\t",
-            !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
-            !strconcat("mov.b16 \ttemp2, $b; \n\t",
-            !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
-            !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
-            !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
-            !strconcat("}}", ""))))))))),
-      [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
-def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-            !strconcat("{{ \n\t",
-            !strconcat(".reg \t.u16 temp1; \n\t",
-            !strconcat(".reg \t.u16 temp2; \n\t",
-            !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
-            !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
-            !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
-            !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
-            !strconcat("}}", "")))))))),
-      [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
-def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-            !strconcat("{{ \n\t",
-            !strconcat(".reg \t.u16 temp1; \n\t",
-            !strconcat(".reg \t.u16 temp2; \n\t",
-            !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
-            !strconcat("mov.b16 \ttemp2, $b; \n\t",
-            !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
-            !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
-            !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
-            !strconcat("}}", ""))))))))),
-      [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
-
-
-defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
-defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
-
-defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
+defm MULTHS : I3<"mul.hi.s", mulhs>;
+defm MULTHU : I3<"mul.hi.u", mulhu>;
+
+defm SDIV : I3<"div.s", sdiv>;
+defm UDIV : I3<"div.u", udiv>;
+
+defm SREM : I3<"rem.s", srem>;
 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
-defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
+defm UREM : I3<"rem.u", urem>;
 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
 
-def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
-                      (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
-                      "mad.lo.s16 \t$dst, $a, $b, $c;",
-                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
-                        Int8Regs:$c))]>;
-def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
-                      (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
-                      "mad.lo.s16 \t$dst, $a, $b, $c;",
-                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
-                        imm:$c))]>;
-def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
-                      (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
-                      "mad.lo.s16 \t$dst, $a, $b, $c;",
-                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
-                        Int8Regs:$c))]>;
-def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
-                      (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
-                      "mad.lo.s16 \t$dst, $a, $b, $c;",
-                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
-                        imm:$c))]>;
-
 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
                       (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
                       "mad.lo.s16 \t$dst, $a, $b, $c;",
@@ -661,10 +442,6 @@ def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
                         (mul Int64Regs:$a, imm:$b), imm:$c))]>;
 
 
-def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
-                     !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
-                                 "neg.s16 \t$dst, $dst;"),
-         [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
                      "neg.s16 \t$dst, $src;",
          [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
@@ -974,12 +751,6 @@ multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
   def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
-  def b8rr:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
-                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
-  def b8ri:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
-                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
   def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
@@ -1010,9 +781,6 @@ defm XOR : LOG_FORMAT<"xor", xor>;
 def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
                       "not.pred \t$dst, $src;",
                       [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
-def NOT8:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
-                      "not.b16 \t$dst, $src;",
-                      [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
 def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
                       "not.b16 \t$dst, $src;",
                       [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
@@ -1056,14 +824,6 @@ multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
                         (i32 imm:$b)))]>;
-   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
-                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
-                        Int32Regs:$b))]>;
-   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
-                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
-                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
-                        (i32 imm:$b)))]>;
 }
 
 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
@@ -1102,16 +862,6 @@ multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
                         (i32 imm:$b)))]>;
-   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
-                      !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
-                      !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
-                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
-                        Int32Regs:$b))]>;
-   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
-                      !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
-                      !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
-                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
-                        (i32 imm:$b)))]>;
 }
 
 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
@@ -1257,8 +1007,6 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
 let IsSimpleMove=1 in {
 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
                    "mov.pred \t$dst, $sss;", []>;
-def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
-                    "mov.u16 \t$dst, $sss;", []>;
 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
                     "mov.u16 \t$dst, $sss;", []>;
 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
@@ -1274,9 +1022,6 @@ def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
                     "mov.pred \t$dst, $src;",
           [(set Int1Regs:$dst, imm:$src)]>;
-def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
-                    "mov.u16 \t$dst, $src;",
-          [(set Int8Regs:$dst, imm:$src)]>;
 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
                     "mov.u16 \t$dst, $src;",
           [(set Int16Regs:$dst, imm:$src)]>;
@@ -1331,47 +1076,8 @@ class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
   string s   = !strconcat(t11, ", -1, 0, p;\n\t}}");
 }
 
-// Generate string block like
-// {
-//   .reg .pred p;
-//   .reg .s16 %temp1;
-//   .reg .s16 %temp2;
-//   cvt.s16.s8 %temp1, %a;
-//   cvt s16.s8 %temp1, %b;
-//   setp.gt.s16 p, %temp1, %temp2;
-//   selp.s16 %dst, -1, 0, p;
-// }
-// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
-class Set_Stri8<string OpcStr, string d, string a, string b, string type,
-  string cvt> {
-  string t1  = "{{\n\t.reg .pred p;\n\t";
-  string t2  = !strconcat(t1, ".reg .");
-  string t3  = !strconcat(t2, type);
-  string t4  = !strconcat(t3, " %temp1;\n\t");
-  string t5  = !strconcat(t4, ".reg .");
-  string t6  = !strconcat(t5, type);
-  string t7  = !strconcat(t6, " %temp2;\n\t");
-  string t8  = !strconcat(t7, cvt);
-  string t9  = !strconcat(t8, " \t%temp1, ");
-  string t10 = !strconcat(t9, a);
-  string t11 = !strconcat(t10, ";\n\t");
-  string t12 = !strconcat(t11, cvt);
-  string t13 = !strconcat(t12, " \t%temp2, ");
-  string t14 = !strconcat(t13, b);
-  string t15 = !strconcat(t14, ";\n\t");
-  string t16 = !strconcat(t15, OpcStr);
-  string t17 = !strconcat(t16, "16");
-  string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
-  string t19 = !strconcat(t18, "selp.s16 \t");
-  string t20 = !strconcat(t19, d);
-  string s   = !strconcat(t20, ", -1, 0, p;\n\t}}");
-}
-
 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
   string TypeStr, string CVTStr> {
-  def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-                     Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
-               []>;
   def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
       Int16Regs:$b),
                      Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
@@ -1385,15 +1091,6 @@ multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
                      Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
                []>;
 
-  def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-                     Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
-               [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
-  def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-                     Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
-               [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
-  def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
-                     Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
-               [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
   def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
@@ -1422,15 +1119,6 @@ multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
                [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
 
-  def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
-                     Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
-               [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
-  def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
-                     Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
-               [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
-  def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
-                     Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
-               [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
   def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
       Int16Regs:$b),
                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
@@ -1639,22 +1327,6 @@ defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
                      (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
                              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
-def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
-  (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
-                      "selp.b16 \t$dst, $a, $b, $p;",
-      [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
-def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
-  (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
-                      "selp.b16 \t$dst, $a, $b, $p;",
-      [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
-def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
-  (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
-                      "selp.b16 \t$dst, $a, $b, $p;",
-      [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
-def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
-  (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
-                      "selp.b16 \t$dst, $a, $b, $p;",
-      [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
 
 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
   (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
@@ -1838,7 +1510,7 @@ class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
                 !strconcat(!strconcat("ld.param", opstr),
                 "\t$dst, [retval0+$b];"),
-                [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
+                []>;
 
 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
@@ -1846,8 +1518,6 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
                 "\t$dst, retval$b;"),
                 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
 
-// FIXME: A bug in tablegen currently prevents us from using multi-output
-// patterns here, so we have to custom select these in C++.
 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
                 !strconcat(!strconcat("ld.param.v2", opstr),
@@ -1864,24 +1534,21 @@ class StoreParamInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
                 !strconcat(!strconcat("st.param", opstr),
                 "\t[param$a+$b], $val;"),
-                [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
+                []>;
 
 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
                              i32imm:$a, i32imm:$b),
                 !strconcat(!strconcat("st.param.v2", opstr),
                 "\t[param$a+$b], {{$val, $val2}};"),
-                [(StoreParamV2 (i32 imm:$a), (i32 imm:$b), regclass:$val,
-                               regclass:$val2)]>;
+                []>;
 
 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
                              regclass:$val3, i32imm:$a, i32imm:$b),
                 !strconcat(!strconcat("st.param.v4", opstr),
                 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
-                [(StoreParamV4 (i32 imm:$a), (i32 imm:$b), regclass:$val,
-                               regclass:$val2, regclass:$val3,
-                               regclass:$val4)]>;
+                []>;
 
 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
@@ -1893,13 +1560,13 @@ class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
                 !strconcat(!strconcat("st.param", opstr),
                 "\t[func_retval0+$a], $val;"),
-                [(StoreRetval (i32 imm:$a), regclass:$val)]>;
+                []>;
 
 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
                 !strconcat(!strconcat("st.param.v2", opstr),
                 "\t[func_retval0+$a], {{$val, $val2}};"),
-                [(StoreRetvalV2 (i32 imm:$a), regclass:$val, regclass:$val2)]>;
+                []>;
 
 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs),
@@ -1907,8 +1574,7 @@ class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
                      regclass:$val4, i32imm:$a),
                 !strconcat(!strconcat("st.param.v4", opstr),
                 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
-                [(StoreRetvalV4 (i32 imm:$a), regclass:$val, regclass:$val2,
-                                              regclass:$val3, regclass:$val4)]>;
+                []>;
 
 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
       NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
@@ -1983,29 +1649,19 @@ def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
 def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
 def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
 def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
-def LoadParamMemI8     : LoadParamMemInst<Int8Regs, ".b8">;
-def LoadParamMemV2I64    : LoadParamV2MemInst<Int64Regs, ".b64">;
-def LoadParamMemV2I32    : LoadParamV2MemInst<Int32Regs, ".b32">;
-def LoadParamMemV2I16    : LoadParamV2MemInst<Int16Regs, ".b16">;
-def LoadParamMemV2I8     : LoadParamV2MemInst<Int8Regs, ".b8">;
-def LoadParamMemV4I32    : LoadParamV4MemInst<Int32Regs, ".b32">;
-def LoadParamMemV4I16    : LoadParamV4MemInst<Int16Regs, ".b16">;
-def LoadParamMemV4I8     : LoadParamV4MemInst<Int8Regs, ".b8">;
-
-//def LoadParamMemI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
-//                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
-//                "cvt.u16.u32\t$dst, temp_param_reg;"),
-//                [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
-//def LoadParamMemI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
-//                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
-//                "cvt.u16.u32\t$dst, temp_param_reg;"),
-//                [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
-
+def LoadParamMemI8     : LoadParamMemInst<Int16Regs, ".b8">;
+def LoadParamMemV2I64  : LoadParamV2MemInst<Int64Regs, ".b64">;
+def LoadParamMemV2I32  : LoadParamV2MemInst<Int32Regs, ".b32">;
+def LoadParamMemV2I16  : LoadParamV2MemInst<Int16Regs, ".b16">;
+def LoadParamMemV2I8   : LoadParamV2MemInst<Int16Regs, ".b8">;
+def LoadParamMemV4I32  : LoadParamV4MemInst<Int32Regs, ".b32">;
+def LoadParamMemV4I16  : LoadParamV4MemInst<Int16Regs, ".b16">;
+def LoadParamMemV4I8   : LoadParamV4MemInst<Int16Regs, ".b8">;
 def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
 def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
-def LoadParamMemV2F32    : LoadParamV2MemInst<Float32Regs, ".f32">;
-def LoadParamMemV2F64    : LoadParamV2MemInst<Float64Regs, ".f64">;
-def LoadParamMemV4F32    : LoadParamV4MemInst<Float32Regs, ".f32">;
+def LoadParamMemV2F32  : LoadParamV2MemInst<Float32Regs, ".f32">;
+def LoadParamMemV2F64  : LoadParamV2MemInst<Float64Regs, ".f64">;
+def LoadParamMemV4F32  : LoadParamV4MemInst<Float32Regs, ".f32">;
 
 def LoadParamRegI64    : LoadParamRegInst<Int64Regs, ".b64">;
 def LoadParamRegI32    : LoadParamRegInst<Int32Regs, ".b32">;
@@ -2013,10 +1669,6 @@ def LoadParamRegI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
                          "cvt.u16.u32\t$dst, retval$b;",
                          [(set Int16Regs:$dst,
                            (LoadParam (i32 0), (i32 imm:$b)))]>;
-def LoadParamRegI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
-                         "cvt.u16.u32\t$dst, retval$b;",
-                         [(set Int8Regs:$dst,
-                           (LoadParam (i32 0), (i32 imm:$b)))]>;
 
 def LoadParamRegF32    : LoadParamRegInst<Float32Regs, ".f32">;
 def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
@@ -2024,31 +1676,12 @@ def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
 def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
 def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
 
-def StoreParamI16    : NVPTXInst<(outs),
-  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
-                       "st.param.b16\t[param$a+$b], $val;",
-           [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
-
-def StoreParamI8     : NVPTXInst<(outs),
-  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
-                       "st.param.b8\t[param$a+$b], $val;",
-                       [(StoreParam
-                         (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
-
-def StoreParamV2I64    : StoreParamV2Inst<Int64Regs, ".b64">;
-def StoreParamV2I32    : StoreParamV2Inst<Int32Regs, ".b32">;
-
-def StoreParamV2I16    : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
-                                                i32imm:$a, i32imm:$b),
-                       "st.param.v2.b16\t[param$a+$b], {{$val, $val2}};",
-                       [(StoreParamV2 (i32 imm:$a), (i32 imm:$b),
-                                      Int16Regs:$val, Int16Regs:$val2)]>;
-
-def StoreParamV2I8     : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2,
-                                                i32imm:$a, i32imm:$b),
-                       "st.param.v2.b8\t[param$a+$b], {{$val, $val2}};",
-                       [(StoreParamV2 (i32 imm:$a), (i32 imm:$b),
-                                       Int8Regs:$val, Int8Regs:$val2)]>;
+def StoreParamI16    : StoreParamInst<Int16Regs, ".b16">;
+def StoreParamI8     : StoreParamInst<Int16Regs, ".b8">;
+def StoreParamV2I64  : StoreParamV2Inst<Int64Regs, ".b64">;
+def StoreParamV2I32  : StoreParamV2Inst<Int32Regs, ".b32">;
+def StoreParamV2I16  : StoreParamV2Inst<Int16Regs, ".b16">;
+def StoreParamV2I8   : StoreParamV2Inst<Int16Regs, ".b8">;
 
 // FIXME: StoreParamV4Inst crashes llvm-tblgen :(
 //def StoreParamV4I32    : StoreParamV4Inst<Int32Regs, ".b32">;
@@ -2056,47 +1689,41 @@ def StoreParamV4I32    : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
                                                Int32Regs:$val3, Int32Regs:$val4,
                                                 i32imm:$a, i32imm:$b),
                    "st.param.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
-                         [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
-                          Int32Regs:$val, Int32Regs:$val2,
-                          Int32Regs:$val3, Int32Regs:$val4)]>;
+                         []>;
 
 def StoreParamV4I16    : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
                                                Int16Regs:$val3, Int16Regs:$val4,
                                                 i32imm:$a, i32imm:$b),
                 "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
-                         [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
-                          Int16Regs:$val, Int16Regs:$val2,
-                          Int16Regs:$val3, Int16Regs:$val4)]>;
+                         []>;
 
-def StoreParamV4I8     : NVPTXInst<(outs), (ins Int8Regs:$val, Int8Regs:$val2,
-                                                Int8Regs:$val3, Int8Regs:$val4,
+def StoreParamV4I8     : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
+                                                Int16Regs:$val3, Int16Regs:$val4,
                                                 i32imm:$a, i32imm:$b),
                  "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
-                         [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
-                          Int8Regs:$val, Int8Regs:$val2,
-                          Int8Regs:$val3, Int8Regs:$val4)]>;
+                         []>;
 
 def StoreParamS32I16 : NVPTXInst<(outs),
   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
                  !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
-                 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+                 []>;
 def StoreParamU32I16 : NVPTXInst<(outs),
   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
                  !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
-                 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
+                 []>;
 
 def StoreParamU32I8   : NVPTXInst<(outs),
-  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
                  !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
-                 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+                 []>;
 def StoreParamS32I8   : NVPTXInst<(outs),
-  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
+  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
                  !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
-                 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
+                 []>;
 
 def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
 def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
@@ -2109,9 +1736,7 @@ def StoreParamV4F32    : NVPTXInst<(outs),
                                         Float32Regs:$val3, Float32Regs:$val4,
                                         i32imm:$a, i32imm:$b),
                 "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
-                        [(StoreParamV4 (i32 imm:$a), (i32 imm:$b),
-                         Float32Regs:$val, Float32Regs:$val2,
-                         Float32Regs:$val3, Float32Regs:$val4)]>;
+                        []>;
 
 def MoveToParamI64   : MoveToParamInst<Int64Regs, ".b64">;
 def MoveToParamI32   : MoveToParamInst<Int32Regs, ".b32">;
@@ -2122,36 +1747,18 @@ def MoveToParamI16   : NVPTXInst<(outs),
                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
                               "mov.b32\tparam$a, temp_param_reg;"),
                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
-def MoveToParamI8    : NVPTXInst<(outs),
-  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
-                   !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
-                              "mov.b32\tparam$a, temp_param_reg;"),
-                   [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
 
 def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
 def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
 def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
-def StoreRetvalI8     : StoreRetvalInst<Int8Regs, ".b8">;
+def StoreRetvalI8     : StoreRetvalInst<Int16Regs, ".b8">;
 def StoreRetvalV2I64  : StoreRetvalV2Inst<Int64Regs, ".b64">;
 def StoreRetvalV2I32  : StoreRetvalV2Inst<Int32Regs, ".b32">;
 def StoreRetvalV2I16  : StoreRetvalV2Inst<Int16Regs, ".b16">;
-def StoreRetvalV2I8   : StoreRetvalV2Inst<Int8Regs, ".b8">;
+def StoreRetvalV2I8   : StoreRetvalV2Inst<Int16Regs, ".b8">;
 def StoreRetvalV4I32  : StoreRetvalV4Inst<Int32Regs, ".b32">;
 def StoreRetvalV4I16  : StoreRetvalV4Inst<Int16Regs, ".b16">;
-def StoreRetvalV4I8   : StoreRetvalV4Inst<Int8Regs, ".b8">;
-
-//def StoreRetvalI16    : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
-//     !strconcat("\{\n\t",
-//     !strconcat(".reg .b32 temp_retval_reg;\n\t",
-//     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
-//                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
-//     [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
-//def StoreRetvalI8     : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
-//     !strconcat("\{\n\t",
-//     !strconcat(".reg .b32 temp_retval_reg;\n\t",
-//     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
-//                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
-//     [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
+def StoreRetvalV4I8   : StoreRetvalV4Inst<Int16Regs, ".b8">;
 
 def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
 def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
@@ -2162,7 +1769,7 @@ def StoreRetvalV4F32  : StoreRetvalV4Inst<Float32Regs, ".f32">;
 def MoveRetvalI64    : MoveRetvalInst<Int64Regs, ".b64">;
 def MoveRetvalI32    : MoveRetvalInst<Int32Regs, ".b32">;
 def MoveRetvalI16    : MoveRetvalInst<Int16Regs, ".b16">;
-def MoveRetvalI8     : MoveRetvalInst<Int8Regs, ".b8">;
+def MoveRetvalI8     : MoveRetvalInst<Int16Regs, ".b8">;
 def MoveRetvalF64    : MoveRetvalInst<Float64Regs, ".f64">;
 def MoveRetvalF32    : MoveRetvalInst<Float32Regs, ".f32">;
 
@@ -2173,9 +1780,6 @@ def MoveToRetvalF32    : MoveToRetvalInst<Float32Regs, ".f32">;
 def MoveToRetvalI16    : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
                          "cvt.u32.u16\tfunc_retval$num, $val;",
                          [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
-def MoveToRetvalI8     : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
-                         "cvt.u32.u16\tfunc_retval$num, $val;",
-                         [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
 
 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
 def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2193,7 +1797,6 @@ class LastCallArgInst<NVPTXRegClass regclass> :
 def CallArgI64     : CallArgInst<Int64Regs>;
 def CallArgI32     : CallArgInst<Int32Regs>;
 def CallArgI16     : CallArgInst<Int16Regs>;
-def CallArgI8      : CallArgInst<Int8Regs>;
 
 def CallArgF64     : CallArgInst<Float64Regs>;
 def CallArgF32     : CallArgInst<Float32Regs>;
@@ -2201,7 +1804,6 @@ def CallArgF32     : CallArgInst<Float32Regs>;
 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
-def LastCallArgI8  : LastCallArgInst<Int8Regs>;
 
 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
@@ -2261,9 +1863,6 @@ def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
                    "cvt.u16.u32\t$dst, $src;",
                    [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
-def MoveParamI8  : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
-                   "cvt.u16.u32\t$dst, $src;",
-                   [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
 
@@ -2275,7 +1874,6 @@ class PseudoUseParamInst<NVPTXRegClass regclass> :
 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
-def PseudoUseParamI8  : PseudoUseParamInst<Int8Regs>;
 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
 
@@ -2317,7 +1915,7 @@ multiclass LD<NVPTXRegClass regclass> {
 }
 
 let mayLoad=1, neverHasSideEffects=1 in {
-defm LD_i8  : LD<Int8Regs>;
+defm LD_i8  : LD<Int16Regs>;
 defm LD_i16 : LD<Int16Regs>;
 defm LD_i32 : LD<Int32Regs>;
 defm LD_i64 : LD<Int64Regs>;
@@ -2359,7 +1957,7 @@ multiclass ST<NVPTXRegClass regclass> {
 }
 
 let mayStore=1, neverHasSideEffects=1 in {
-defm ST_i8  : ST<Int8Regs>;
+defm ST_i8  : ST<Int16Regs>;
 defm ST_i16 : ST<Int16Regs>;
 defm ST_i32 : ST<Int32Regs>;
 defm ST_i64 : ST<Int64Regs>;
@@ -2443,7 +2041,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
                 []>;
 }
 let mayLoad=1, neverHasSideEffects=1 in {
-defm LDV_i8  : LD_VEC<Int8Regs>;
+defm LDV_i8  : LD_VEC<Int16Regs>;
 defm LDV_i16 : LD_VEC<Int16Regs>;
 defm LDV_i32 : LD_VEC<Int32Regs>;
 defm LDV_i64 : LD_VEC<Int64Regs>;
@@ -2526,7 +2124,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
     []>;
 }
 let mayStore=1, neverHasSideEffects=1 in {
-defm STV_i8  : ST_VEC<Int8Regs>;
+defm STV_i8  : ST_VEC<Int16Regs>;
 defm STV_i16 : ST_VEC<Int16Regs>;
 defm STV_i32 : ST_VEC<Int32Regs>;
 defm STV_i64 : ST_VEC<Int64Regs>;
@@ -2539,10 +2137,6 @@ defm STV_f64 : ST_VEC<Float64Regs>;
 
 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
 // FIXME: need to add f16 support
-//  def CVTf16i8 :
-//    NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
-//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
-//        [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
 //  def CVTf16i16 :
 //    NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
@@ -2560,10 +2154,6 @@ multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
     NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
               "selp.f32 \t$d, 1.0, 0.0, $a;",
         [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
-  def CVTf32i8 :
-    NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
-              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
-        [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
   def CVTf32i16 :
     NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
@@ -2581,10 +2171,6 @@ multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
     NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
               "selp.f64 \t$d, 1.0, 0.0, $a;",
         [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
-  def CVTf64i8 :
-    NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
-              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
-        [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
   def CVTf64i16 :
     NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
@@ -2604,24 +2190,6 @@ defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
 
 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
 // FIXME: need to add f16 support
-//  def CVTi8f16:
-//    NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
-//              !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
-//        [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
-  def CVTi8f32_ftz:
-    NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
-              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
-        [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
-  def CVTi8f32:
-    NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
-              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
-        [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
-  def CVTi8f64:
-    NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
-              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
-        [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
-
-// FIXME: need to add f16 support
 //  def CVTi16f16:
 //    NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
@@ -2680,10 +2248,6 @@ defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
 
 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
-  def ext1to8:
-       NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
-           "selp.u16 \t$d, 1, 0, $a;",
-     [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
   def ext1to16:
        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
            "selp.u16 \t$d, 1, 0, $a;",
@@ -2699,10 +2263,6 @@ multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
 }
 
 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
-  def ext1to8:
-       NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
-           "selp.s16 \t$d, -1, 0, $a;",
-     [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
   def ext1to16:
        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
            "selp.s16 \t$d, -1, 0, $a;",
@@ -2718,23 +2278,6 @@ multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
 }
 
 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
-  // All Int8Regs are emiited as 16bit registers in ptx.
-  // And there is no selp.u8 in ptx.
-  def ext8to16:
-       NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
-           !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
-             !strconcat(OpStr, "8 \t$d, $a;")))),
-     [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
-  def ext8to32:
-       NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
-           !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
-             !strconcat(OpStr, "8 \t$d, $a;")))),
-     [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
-  def ext8to64:
-       NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
-           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
-             !strconcat(OpStr, "8 \t$d, $a;")))),
-     [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
   def ext16to32:
        NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
@@ -2778,18 +2321,9 @@ def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
              "cvt.u16.u64 \t$d, $a;",
        [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
-def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
-             "cvt.u8.u64 \t$d, $a;",
-       [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
              "cvt.u16.u32 \t$d, $a;",
        [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
-def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
-             "cvt.u8.u32 \t$d, $a;",
-       [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
-def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
-             "cvt.u8.u16 \t$d, $a;",
-       [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
              TRUNC_to1_asm<".b64">.s,
              [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
@@ -2799,13 +2333,8 @@ def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
              TRUNC_to1_asm<".b16">.s,
              [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
-def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
-             TRUNC_to1_asm<".b16">.s,
-             [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
 
 // Select instructions
-def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
-          (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
           (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
             (TRUNC_32to1 Int32Regs:$pred))>;
@@ -2834,28 +2363,11 @@ def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
 
 // pack a set of smaller int registers to a larger int register
-def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
-                          (ins Int8Regs:$s1, Int8Regs:$s2,
-                               Int8Regs:$s3, Int8Regs:$s4),
-                          !strconcat("{{\n\t.reg .b8\t%t<4>;",
-                          !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
-                          !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
-                          !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
-                          !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
-                           "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
-                          []>;
 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
                           (ins Int16Regs:$s1, Int16Regs:$s2,
                                Int16Regs:$s3, Int16Regs:$s4),
                           "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
                           []>;
-def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
-                          (ins Int8Regs:$s1, Int8Regs:$s2),
-                          !strconcat("{{\n\t.reg .b8\t%t<2>;",
-                          !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
-                          !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
-                                     "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
-                          []>;
 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
                           (ins Int16Regs:$s1, Int16Regs:$s2),
                           "mov.b32\t$d, {{$s1, $s2}};",
@@ -2870,28 +2382,11 @@ def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
                           []>;
 
 // unpack a larger int register to a set of smaller int registers
-def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
-                                Int8Regs:$d3, Int8Regs:$d4),
-                          (ins Int32Regs:$s),
-                          !strconcat("{{\n\t.reg .b8\t%t<4>;",
-                          !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
-                          !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
-                          !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
-                          !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
-                                     "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
-                          []>;
 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
                                  Int16Regs:$d3, Int16Regs:$d4),
                            (ins Int64Regs:$s),
                            "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
                           []>;
-def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
-                          (ins Int16Regs:$s),
-                          !strconcat("{{\n\t.reg .b8\t%t<2>;",
-                          !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
-                          !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
-                                     "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
-                          []>;
 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
                            (ins Int32Regs:$s),
                            "mov.b32\t{{$d1, $d2}}, $s;",