Add preliminary v2i32 support for SPU backend. As there are no

such registers in SPU, this support boils down to "emulating" them by duplicating instructions on the general purpose registers. This adds the most basic operations on v2i32: passing parameters, addition, subtraction, multiplication and a few others. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110035 91177308-0d34-0410-b5e6-96231b3b80d8
author: Kalle Raiskila <kalle.raiskila@nokia.com> 2010-08-02 08:54:39 +0000
committer: Kalle Raiskila <kalle.raiskila@nokia.com> 2010-08-02 08:54:39 +0000
commit: 82fe467ca59094c19c500038f0a282b70daa1d00 (patch)
tree: 49986da3231c4f97a1848013763f5a9a7eb410f2 /lib
parent: ee56c42168f6c4271593f6018c4409b6a5910302 (diff)
download: llvm-82fe467ca59094c19c500038f0a282b70daa1d00.tar.gz
llvm-82fe467ca59094c19c500038f0a282b70daa1d00.tar.bz2
llvm-82fe467ca59094c19c500038f0a282b70daa1d00.tar.xz
4 files changed, 49 insertions, 6 deletions
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index ec2f663908..047eeb4f1b 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -37,7 +37,7 @@ def RetCC_SPU : CallingConv<[
 //===----------------------------------------------------------------------===//
 def CCC_SPU : CallingConv<[
   CCIfType<[i8, i16, i32, i64, i128, f32, f64, 
-            v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+            v16i8, v8i16, v4i32, v4f32, v2i64, v2f64, v2i32],
             CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
                            R12, R13, R14, R15, R16, R17, R18, R19, R20,
                            R21, R22, R23, R24, R25, R26, R27, R28, R29,
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ece19b9b89..bcde5794af 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1067,6 +1067,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       case MVT::v4i32:
       case MVT::v8i16:
       case MVT::v16i8:
+      case MVT::v2i32:
         ArgRegClass = &SPU::VECREGRegClass;
         break;
       }
@@ -1622,8 +1623,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
   }
   case MVT::v2i32: {
-    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
+    return SDValue();
   }
   case MVT::v2i64: {
     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
@@ -1768,6 +1768,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   } else if (EltVT == MVT::i16) {
     V2EltIdx0 = 8;
     maskVT = MVT::v8i16;
+  } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) {
+    V2EltIdx0 = 2;
+    maskVT = MVT::v4i32;
   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
     V2EltIdx0 = 4;
     maskVT = MVT::v4i32;
@@ -1847,6 +1850,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
       for (unsigned j = 0; j < BytesPerElement; ++j)
         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
     }
+    // For half vectors padd the mask with zeros for the second half.
+    // This is needed because mask is assumed to be full vector elsewhere in 
+    // the SPU backend. 
+    if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32)
+    for( unsigned i = 0; i < 2; ++i )
+    {
+      for (unsigned j = 0; j < BytesPerElement; ++j)
+        ResultMask.push_back(DAG.getConstant(0,MVT::i8));
+    }
 
     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
                                     &ResultMask[0], ResultMask.size());
@@ -1877,6 +1889,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+    case MVT::v2i32: n_copies = 2; VT = MVT::i32; break;
     }
 
     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
@@ -1997,7 +2010,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     // Variable index: Rotate the requested element into slot 0, then replicate
     // slot 0 across the vector
     EVT VecVT = N.getValueType();
-    if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+    if (!VecVT.isSimple() || !VecVT.isVector()) {
       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
                         "vector type!");
     }
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index a7fb14c26a..bc9668a809 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -607,7 +607,7 @@ class ARegInst<RegisterClass rclass>:
 multiclass AddInstruction {
   def v4i32: AVecInst<v4i32>;
   def v16i8: AVecInst<v16i8>;
-  
+  def v2i32: AVecInst<v2i32>;
   def r32:   ARegInst<R32C>;
 }
 
@@ -672,6 +672,12 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
   "sf\t$rT, $rA, $rB", IntegerOp,
   [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
 
+def SF2vec : RRForm<0b00000010000, (outs VECREG:$rT),
+                                  (ins VECREG:$rA, VECREG:$rB),
+  "sf\t$rT, $rA, $rB", IntegerOp,
+  [(set (v2i32 VECREG:$rT), (sub (v2i32 VECREG:$rB), (v2i32 VECREG:$rA)))]>;
+
+
 def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
   "sf\t$rT, $rA, $rB", IntegerOp,
   [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
@@ -829,6 +835,10 @@ def MPYUv4i32:
   MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
            [/* no pattern */]>;
 
+def MPYUv2i32:
+  MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
 def MPYUr16:
   MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
            [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
@@ -908,6 +918,10 @@ def MPYHv4i32:
     MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
              [/* no pattern */]>;
 
+def MPYHv2i32:
+    MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [/* no pattern */]>;
+
 def MPYHr32:
     MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
              [/* no pattern */]>;
@@ -1561,6 +1575,9 @@ def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
 def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
           (ORv4i32_i32 R32C:$rA)>;
 
+def : Pat<(v2i32 (SPUprefslot2vec R32C:$rA)),
+          (ORv4i32_i32 R32C:$rA)>;
+
 def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
           (ORv2i64_i64 R64C:$rA)>;
 
@@ -1582,6 +1599,9 @@ def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
 def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
           (ORi32_v4i32 VECREG:$rA)>;
 
+def : Pat<(SPUvec2prefslot (v2i32 VECREG:$rA)),
+          (ORi32_v4i32 VECREG:$rA)>;
+
 def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
           (ORi64_v2i64 VECREG:$rA)>;
 
@@ -2123,6 +2143,8 @@ multiclass ShuffleBytes
   def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
   def v4i32     : SHUFBVecInst<v4i32, v16i8>;
   def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+  def v2i32     : SHUFBVecInst<v2i32, v16i8>;
+  def v2i32_m32 : SHUFBVecInst<v2i32, v4i32>;
   def v2i64     : SHUFBVecInst<v2i64, v16i8>;
   def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
 
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
index ed7129e332..7205593b04 100644
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -39,7 +39,7 @@ def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
                      (FSMBIv8i16 0xcccc))>;
                  
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v4i32, i32 multiply instruction sequence:
+// v4i32, v2i32, i32 multiply instruction sequence:
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 
 def MPYv4i32:
@@ -49,6 +49,14 @@ def MPYv4i32:
                        (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
         (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
 
+def MPYv2i32:
+  Pat<(mul (v2i32 VECREG:$rA), (v2i32 VECREG:$rB)),
+      (Av2i32
+        (v2i32 (Av2i32 (v2i32 (MPYHv2i32 VECREG:$rA, VECREG:$rB)),
+                       (v2i32 (MPYHv2i32 VECREG:$rB, VECREG:$rA)))),
+        (v2i32 (MPYUv2i32 VECREG:$rA, VECREG:$rB)))>;
+
+
 def MPYi32:
   Pat<(mul R32C:$rA, R32C:$rB),
       (Ar32
author	Kalle Raiskila <kalle.raiskila@nokia.com>	2010-08-02 08:54:39 +0000
committer	Kalle Raiskila <kalle.raiskila@nokia.com>	2010-08-02 08:54:39 +0000
commit	82fe467ca59094c19c500038f0a282b70daa1d00 (patch)
tree	49986da3231c4f97a1848013763f5a9a7eb410f2 /lib
parent	ee56c42168f6c4271593f6018c4409b6a5910302 (diff)
download	llvm-82fe467ca59094c19c500038f0a282b70daa1d00.tar.gz llvm-82fe467ca59094c19c500038f0a282b70daa1d00.tar.bz2 llvm-82fe467ca59094c19c500038f0a282b70daa1d00.tar.xz