From ba67c2a4ee21ec86a86df56c230819d2bd85451d Mon Sep 17 00:00:00 2001
From: Matheus Almeida <matheus.almeida@imgtec.com>
Date: Wed, 29 Jan 2014 14:05:28 +0000
Subject: [mips][msa] Add copy_{u,s}.d.

These instructions are only available on Mips64 cores
that implement the MSA ASE.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200398 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MipsMSAInstrFormats.td | 17 ++++++++++++++++
 lib/Target/Mips/MipsMSAInstrInfo.td    | 12 +++++++++++
 lib/Target/Mips/MipsSEISelLowering.cpp | 37 +++++++++++++++++++++-------------
 test/CodeGen/Mips/msa/elm_copy.ll      | 26 ++++++++++++++++++++++++
 test/MC/Mips/msa/test_elm_msa64.s      | 14 +++++++++++++
 5 files changed, 92 insertions(+), 14 deletions(-)
 create mode 100644 test/MC/Mips/msa/test_elm_msa64.s

diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
index 27f0bde766..d463d60354 100644
--- a/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -15,6 +15,10 @@ class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
   let Inst{31-26} = 0b011110;
 }
 
+class MSA64Inst : MSAInst {
+  let Predicates = [HasMSA, HasMips64];
+}
+
 class MSACBranch : MSAInst {
   let Inst{31-26} = 0b010001;
 }
@@ -274,6 +278,19 @@ class MSA_ELM_COPY_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
   let Inst{5-0} = minor;
 }
 
+class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+  bits<4> n;
+  bits<5> ws;
+  bits<5> rd;
+
+  let Inst{25-22} = major;
+  let Inst{21-17} = 0b11100;
+  let Inst{16} = n{0};
+  let Inst{15-11} = ws;
+  let Inst{10-6} = rd;
+  let Inst{5-0} = minor;
+}
+
 class MSA_ELM_INSERT_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
   bits<6> n;
   bits<5> rs;
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index a788d60a57..1da8095f57 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -137,6 +137,8 @@ def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx),
                                 (MipsVExtractSExt node:$vec, node:$idx, i16)>;
 def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx),
                                 (MipsVExtractSExt node:$vec, node:$idx, i32)>;
+def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx),
+                                (MipsVExtractSExt node:$vec, node:$idx, i64)>;
 
 def vextract_zext_i8  : PatFrag<(ops node:$vec, node:$idx),
                                 (MipsVExtractZExt node:$vec, node:$idx, i8)>;
@@ -144,6 +146,8 @@ def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx),
                                 (MipsVExtractZExt node:$vec, node:$idx, i16)>;
 def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx),
                                 (MipsVExtractZExt node:$vec, node:$idx, i32)>;
+def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx),
+                                (MipsVExtractZExt node:$vec, node:$idx, i64)>;
 
 def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx),
     (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>;
@@ -614,10 +618,12 @@ class CLTI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000111>;
 class COPY_S_B_ENC : MSA_ELM_COPY_B_FMT<0b0010, 0b011001>;
 class COPY_S_H_ENC : MSA_ELM_COPY_H_FMT<0b0010, 0b011001>;
 class COPY_S_W_ENC : MSA_ELM_COPY_W_FMT<0b0010, 0b011001>;
+class COPY_S_D_ENC : MSA_ELM_COPY_D_FMT<0b0010, 0b011001>;
 
 class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>;
 class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>;
 class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>;
+class COPY_U_D_ENC : MSA_ELM_COPY_D_FMT<0b0011, 0b011001>;
 
 class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>;
 
@@ -1877,6 +1883,8 @@ class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", vextract_sext_i16, v8i16,
                                          GPR32Opnd, MSA128HOpnd>;
 class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", vextract_sext_i32, v4i32,
                                          GPR32Opnd, MSA128WOpnd>;
+class COPY_S_D_DESC : MSA_COPY_DESC_BASE<"copy_s.d", vextract_sext_i64, v2i64,
+                                         GPR64Opnd, MSA128DOpnd>;
 
 class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", vextract_zext_i8,  v16i8,
                                          GPR32Opnd, MSA128BOpnd>;
@@ -1884,6 +1892,8 @@ class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16,
                                          GPR32Opnd, MSA128HOpnd>;
 class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32,
                                          GPR32Opnd, MSA128WOpnd>;
+class COPY_U_D_DESC : MSA_COPY_DESC_BASE<"copy_u.d", vextract_zext_i64, v2i64,
+                                         GPR64Opnd, MSA128DOpnd>;
 
 class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v4f32, FGR32,
                                                  MSA128W>;
@@ -2898,10 +2908,12 @@ def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC;
 def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC;
 def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC;
 def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC;
+def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC;
 
 def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC;
 def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC;
 def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC;
+def COPY_U_D : COPY_U_D_ENC, COPY_U_D_DESC;
 
 def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
 def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index cc6411fd88..5a116ec6a0 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1616,25 +1616,34 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_copy_s_w:
     return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
   case Intrinsic::mips_copy_s_d:
-    // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal.
-    // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type
-    // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
-                       Op->getOperand(1), Op->getOperand(2));
+    if (HasMips64)
+      // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
+      return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
+    else {
+      // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
+      // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
+                         Op->getValueType(0), Op->getOperand(1),
+                         Op->getOperand(2));
+    }
   case Intrinsic::mips_copy_u_b:
   case Intrinsic::mips_copy_u_h:
   case Intrinsic::mips_copy_u_w:
     return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
   case Intrinsic::mips_copy_u_d:
-    // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal.
-    // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type
-    // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
-    //
-    // Note: When i64 is illegal, this results in copy_s.w instructions instead
-    // of copy_u.w instructions. This makes no difference to the behaviour
-    // since i64 is only illegal when the register file is 32-bit.
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
-                       Op->getOperand(1), Op->getOperand(2));
+    if (HasMips64)
+      // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
+      return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
+    else {
+      // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
+      // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
+      // Note: When i64 is illegal, this results in copy_s.w instructions
+      // instead of copy_u.w instructions. This makes no difference to the
+      // behaviour since i64 is only illegal when the register file is 32-bit.
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
+                         Op->getValueType(0), Op->getOperand(1),
+                         Op->getOperand(2));
+    }
   case Intrinsic::mips_div_s_b:
   case Intrinsic::mips_div_s_h:
   case Intrinsic::mips_div_s_w:
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
index e268c2fd4e..0dd75fa3db 100644
--- a/test/CodeGen/Mips/msa/elm_copy.ll
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -5,6 +5,10 @@
 ; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | \
 ; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS32
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+msa,+fp64 < %s | \
+; RUN:   FileCheck %s -check-prefix=MIPS-ANY -check-prefix=MIPS64
 
 @llvm_mips_copy_s_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_copy_s_b_RES  = global i32 0, align 16
@@ -21,9 +25,11 @@ declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_s_b_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_b_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_b_ARG1)
 ; MIPS-ANY-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS-ANY-DAG: copy_s.b [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_b_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_b_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_s_b_test
 ;
@@ -42,9 +48,11 @@ declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_s_h_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_h_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_h_ARG1)
 ; MIPS-ANY-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS-ANY-DAG: copy_s.h [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_h_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_h_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_s_h_test
 ;
@@ -63,9 +71,11 @@ declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_s_w_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_w_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_w_ARG1)
 ; MIPS-ANY-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS-ANY-DAG: copy_s.w [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_w_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_w_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_s_w_test
 ;
@@ -84,12 +94,17 @@ declare i64 @llvm.mips.copy.s.d(<2 x i64>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_s_d_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_s_d_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_s_d_ARG1)
 ; MIPS32-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS32-DAG: copy_s.w [[RD1:\$[0-9]+]], [[WS]][2]
 ; MIPS32-DAG: copy_s.w [[RD2:\$[0-9]+]], [[WS]][3]
+; MIPS64-DAG: copy_s.d [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_s_d_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_s_d_RES)
 ; MIPS32-DAG: sw [[RD1]], 0([[RES]])
 ; MIPS32-DAG: sw [[RD2]], 4([[RES]])
+; MIPS64-DAG: sd [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_s_d_test
 ;
 @llvm_mips_copy_u_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@@ -107,9 +122,11 @@ declare i32 @llvm.mips.copy.u.b(<16 x i8>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_u_b_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_b_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_b_ARG1)
 ; MIPS-ANY-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS-ANY-DAG: copy_u.b [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_b_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_b_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_u_b_test
 ;
@@ -128,9 +145,11 @@ declare i32 @llvm.mips.copy.u.h(<8 x i16>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_u_h_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_h_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_h_ARG1)
 ; MIPS-ANY-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS-ANY-DAG: copy_u.h [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_h_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_h_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_u_h_test
 ;
@@ -149,9 +168,11 @@ declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_u_w_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_w_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_ARG1)
 ; MIPS-ANY-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS-ANY-DAG: copy_u.w [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_w_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_u_w_test
 ;
@@ -170,11 +191,16 @@ declare i64 @llvm.mips.copy.u.d(<2 x i64>, i32) nounwind
 
 ; MIPS-ANY: llvm_mips_copy_u_d_test:
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_d_ARG1)
+; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_d_ARG1)
 ; MIPS32-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
+; MIPS64-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS32-DAG: copy_s.w [[RD1:\$[0-9]+]], [[WS]][2]
 ; MIPS32-DAG: copy_s.w [[RD2:\$[0-9]+]], [[WS]][3]
+; MIPS64-DAG: copy_u.d [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_d_RES)
+; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_d_RES)
 ; MIPS32-DAG: sw [[RD1]], 0([[RES]])
 ; MIPS32-DAG: sw [[RD2]], 4([[RES]])
+; MIPS64-DAG: sd [[RD]], 0([[RES]])
 ; MIPS-ANY: .size llvm_mips_copy_u_d_test
 ;
diff --git a/test/MC/Mips/msa/test_elm_msa64.s b/test/MC/Mips/msa/test_elm_msa64.s
new file mode 100644
index 0000000000..15bfcca247
--- /dev/null
+++ b/test/MC/Mips/msa/test_elm_msa64.s
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | FileCheck %s
+#
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -filetype=obj -o - | \
+# RUN:   llvm-objdump -d -arch=mips64 -mattr=+msa - | \
+# RUN:     FileCheck %s -check-prefix=CHECKOBJDUMP
+#
+# CHECK:        copy_s.d        $19, $w31[0]             # encoding: [0x78,0xb8,0xfc,0xd9]
+# CHECK:        copy_u.d        $18, $w29[1]             # encoding: [0x78,0xf9,0xec,0x99]
+
+# CHECKOBJDUMP:        copy_s.d        $19, $w31[0]
+# CHECKOBJDUMP:        copy_u.d        $18, $w29[1]
+
+        copy_s.d        $19, $w31[0]
+        copy_u.d        $18, $w29[1]
-- 
cgit v1.2.3