[mips][msa] Added support for matching shf from normal IR (i.e. not intrinsics)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191302 91177308-0d34-0410-b5e6-96231b3b80d8
author: Daniel Sanders <daniel.sanders@imgtec.com> 2013-09-24 14:20:00 +0000
committer: Daniel Sanders <daniel.sanders@imgtec.com> 2013-09-24 14:20:00 +0000
commit: 93d995719e2459a6e9ccdb2c93a8ede8fa88c899 (patch)
tree: 913c7a829ffc158956b34cc39441fbc6da703451
parent: 7e0df9aa2966d0462e34511524a4958e226b74ee (diff)
download: llvm-93d995719e2459a6e9ccdb2c93a8ede8fa88c899.tar.gz
llvm-93d995719e2459a6e9ccdb2c93a8ede8fa88c899.tar.bz2
llvm-93d995719e2459a6e9ccdb2c93a8ede8fa88c899.tar.xz
7 files changed, 149 insertions, 11 deletions
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 26321c9924..2612e3ba1c 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -211,6 +211,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
   O << MipsFCCToString((Mips::CondCode)MO.getImm());
 }
 
+void MipsInstPrinter::
+printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
+  llvm_unreachable("TODO");
+}
+
 bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
                                  unsigned OpNo, raw_ostream &OS) {
   OS << "\t" << Str << "\t";
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 1253ab0c48..11590ad824 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -96,6 +96,7 @@ private:
   void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
   void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
   void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+  void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
 
   bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
                   raw_ostream &OS);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 04f90833d8..90d2cc6f29 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -225,6 +225,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT";
   case MipsISD::VNOR:              return "MipsISD::VNOR";
   case MipsISD::VSHF:              return "MipsISD::VSHF";
+  case MipsISD::SHF:               return "MipsISD::SHF";
   default:                         return NULL;
   }
 }
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 0cb67b8d44..9e9e995918 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -174,6 +174,7 @@ namespace llvm {
 
       // Vector Shuffle with mask as an operand
       VSHF,  // Generic shuffle
+      SHF,   // 4-element set shuffle.
 
       // Combined (XOR (OR $a, $b), -1)
       VNOR,
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 4909743e34..5592ac5690 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -23,6 +23,8 @@ def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
 def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
                                     SDTCisInt<1>, SDTCisVec<1>,
                                     SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
+def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+                                   SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
 
 def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>;
 def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>;
@@ -39,6 +41,7 @@ def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp,
 def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp,
                       [SDNPCommutative, SDNPAssociative]>;
 def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>;
+def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>;
 
 def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
 def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
@@ -1074,6 +1077,16 @@ class MSA_I8_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   InstrItinClass Itinerary = itin;
 }
 
+class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterClass RCWD,
+                           RegisterClass RCWS = RCWD,
+                           InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs RCWD:$wd);
+  dag InOperandList = (ins RCWS:$ws, uimm8:$u8);
+  string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+  list<dag> Pattern = [(set RCWD:$wd, (MipsSHF immZExt8:$u8, RCWS:$ws))];
+  InstrItinClass Itinerary = itin;
+}
+
 class MSA_I10_LDI_DESC_BASE<string instr_asm, RegisterClass RCWD,
                             InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs RCWD:$wd);
@@ -2066,9 +2079,9 @@ class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>;
 class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>;
 class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>;
 
-class SHF_B_DESC : MSA_I8_X_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>;
-class SHF_H_DESC : MSA_I8_X_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>;
-class SHF_W_DESC : MSA_I8_X_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>;
+class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128B>;
+class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128H>;
+class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128W>;
 
 class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>;
 class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 929e91eb60..b79e5321b6 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1428,6 +1428,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_pcnt_w:
   case Intrinsic::mips_pcnt_d:
     return lowerMSAUnaryIntr(Op, DAG, ISD::CTPOP);
+  case Intrinsic::mips_shf_b:
+  case Intrinsic::mips_shf_h:
+  case Intrinsic::mips_shf_w:
+    return DAG.getNode(MipsISD::SHF, SDLoc(Op), Op->getValueType(0),
+                       Op->getOperand(2), Op->getOperand(1));
   case Intrinsic::mips_sll_b:
   case Intrinsic::mips_sll_h:
   case Intrinsic::mips_sll_w:
@@ -1735,6 +1740,72 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
   return SDValue();
 }
 
+// Lower VECTOR_SHUFFLE into SHF (if possible).
+//
+// SHF splits the vector into blocks of four elements, then shuffles these
+// elements according to a <4 x i2> constant (encoded as an integer immediate).
+//
+// It is therefore possible to lower into SHF when the mask takes the form:
+//   <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
+// When undef's appear they are treated as if they were whatever value is
+// necessary in order to fit the above form.
+//
+// For example:
+//   %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
+//                      <8 x i32> <i32 3, i32 2, i32 1, i32 0,
+//                                 i32 7, i32 6, i32 5, i32 4>
+// is lowered to:
+//   (SHF_H $w0, $w1, 27)
+// where the 27 comes from:
+//   3 + (2 << 2) + (1 << 4) + (0 << 6)
+static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
+                                       SmallVector<int, 16> Indices,
+                                       SelectionDAG &DAG) {
+  int SHFIndices[4] = { -1, -1, -1, -1 };
+
+  if (Indices.size() < 4)
+    return SDValue();
+
+  for (unsigned i = 0; i < 4; ++i) {
+    for (unsigned j = i; j < Indices.size(); j += 4) {
+      int Idx = Indices[j];
+
+      // Convert from vector index to 4-element subvector index
+      // If an index refers to an element outside of the subvector then give up
+      if (Idx != -1) {
+        Idx -= 4 * (j / 4);
+        if (Idx < 0 || Idx >= 4)
+          return SDValue();
+      }
+
+      // If the mask has an undef, replace it with the current index.
+      // Note that it might still be undef if the current index is also undef
+      if (SHFIndices[i] == -1)
+        SHFIndices[i] = Idx;
+
+      // Check that non-undef values are the same as in the mask. If they
+      // aren't then give up
+      if (!(Idx == -1 || Idx == SHFIndices[i]))
+        return SDValue();
+    }
+  }
+
+  // Calculate the immediate. Replace any remaining undefs with zero
+  APInt Imm(32, 0);
+  for (int i = 3; i >= 0; --i) {
+    int Idx = SHFIndices[i];
+
+    if (Idx == -1)
+      Idx = 0;
+
+    Imm <<= 2;
+    Imm |= Idx & 0x3;
+  }
+
+  return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy,
+                     DAG.getConstant(Imm, MVT::i32), Op->getOperand(0));
+}
+
 // Lower VECTOR_SHUFFLE into VSHF.
 //
 // This mostly consists of converting the shuffle indices in Indices into a
@@ -1802,6 +1873,9 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
   for (int i = 0; i < ResTyNumElts; ++i)
     Indices.push_back(Node->getMaskElt(i));
 
+  SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
+  if (Result.getNode())
+    return Result;
   return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
 }
 
diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll
index 35a5cf8658..9854234c71 100644
--- a/test/CodeGen/Mips/msa/shuffle.ll
+++ b/test/CodeGen/Mips/msa/shuffle.ll
@@ -156,14 +156,16 @@ define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
   ; CHECK: .size vshf_v8i16_4
 }
 
+; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
+; instruction when using a single vector.
+
 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: vshf_v4i32_0:
 
   %1 = load <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
-  ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
   store <4 x i32> %2, <4 x i32>* %c
   ; CHECK-DAG: st.w [[R3]], 0($4)
 
@@ -177,8 +179,7 @@ define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
   %1 = load <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
   store <4 x i32> %2, <4 x i32>* %c
   ; CHECK-DAG: st.w [[R3]], 0($4)
 
@@ -193,8 +194,7 @@ define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
   %2 = load <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
-  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo
-  ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
   store <4 x i32> %3, <4 x i32>* %c
   ; CHECK-DAG: st.w [[R3]], 0($4)
 
@@ -225,8 +225,7 @@ define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
   %1 = load <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
-  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
-  ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
   store <4 x i32> %2, <4 x i32>* %c
   ; CHECK-DAG: st.w [[R3]], 0($4)
 
@@ -311,3 +310,47 @@ define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   ret void
   ; CHECK: .size vshf_v2i64_4
 }
+
+define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: shf_v16i8_0:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
+  ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
+  store <16 x i8> %2, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v16i8_0
+}
+
+define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: shf_v8i16_0:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
+  store <8 x i16> %2, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v8i16_0
+}
+
+define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: shf_v4i32_0:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
+  store <4 x i32> %2, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size shf_v4i32_0
+}
+
+; shf.d does not exist
author	Daniel Sanders <daniel.sanders@imgtec.com>	2013-09-24 14:20:00 +0000
committer	Daniel Sanders <daniel.sanders@imgtec.com>	2013-09-24 14:20:00 +0000
commit	93d995719e2459a6e9ccdb2c93a8ede8fa88c899 (patch)
tree	913c7a829ffc158956b34cc39441fbc6da703451
parent	7e0df9aa2966d0462e34511524a4958e226b74ee (diff)
download	llvm-93d995719e2459a6e9ccdb2c93a8ede8fa88c899.tar.gz llvm-93d995719e2459a6e9ccdb2c93a8ede8fa88c899.tar.bz2 llvm-93d995719e2459a6e9ccdb2c93a8ede8fa88c899.tar.xz