From 1bd15700a0eb3057d3e2d65070c3fc6b99e0d8a2 Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Mon, 20 Jun 2011 18:33:26 +0000
Subject: Emit movq for 64-bit register to XMM register moves, but continue to
 accept movd when assembling.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133452 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86InstrSSE.td      | 27 +++++++++++++++++++++------
 test/CodeGen/X86/bitcast2.ll       | 20 +++++++++++++++++++-
 test/CodeGen/X86/vec_set-8.ll      |  2 +-
 test/CodeGen/X86/vec_set-9.ll      |  2 +-
 test/CodeGen/X86/vec_set-C.ll      |  2 +-
 test/CodeGen/X86/vec_shuffle-14.ll |  4 ++--
 test/CodeGen/X86/vec_shuffle-17.ll |  2 +-
 test/MC/X86/x86-64.s               |  8 ++++++++
 test/MC/X86/x86_64-avx-encoding.s  |  6 +++---
 9 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7774057d3d..e6d0e0912c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2850,11 +2850,11 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       [(set VR128:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
 def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        "movq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))]>;
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}",
+                       "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))]>;
 
 
@@ -2895,7 +2895,7 @@ def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                                      (iPTR 0))), addr:$dst)]>;
 
 def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                         "mov{d|q}\t{$src, $dst|$dst, $src}",
+                         "movq\t{$src, $dst|$dst, $src}",
                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
                                            (iPTR 0)))]>;
 def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
@@ -2903,7 +2903,7 @@ def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
 
 def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
-                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        "movq\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (bitconvert FR64:$src))]>;
 def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
@@ -2931,7 +2931,7 @@ def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                                       (v4i32 (scalar_to_vector GR32:$src)))))]>,
                                       VEX;
 def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       "movq\t{$src, $dst|$dst, $src}", // X86-64 only
                        [(set VR128:$dst, (v2i64 (X86vzmovl
                                       (v2i64 (scalar_to_vector GR64:$src)))))]>,
                                       VEX, VEX_W;
@@ -2942,7 +2942,7 @@ def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                        [(set VR128:$dst, (v4i32 (X86vzmovl
                                       (v4i32 (scalar_to_vector GR32:$src)))))]>;
 def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       "movq\t{$src, $dst|$dst, $src}", // X86-64 only
                        [(set VR128:$dst, (v2i64 (X86vzmovl
                                       (v2i64 (scalar_to_vector GR64:$src)))))]>;
 }
@@ -2968,6 +2968,21 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
             (MOVZDI2PDIrm addr:$src)>;
 }
 
+// We used to emit this syntax to work around a bug in the Darwin assembler,
+// so we'll continue to assemble it.
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Quadword
 //===---------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/bitcast2.ll b/test/CodeGen/X86/bitcast2.ll
index 48922b5f5a..71ecedf8f1 100644
--- a/test/CodeGen/X86/bitcast2.ll
+++ b/test/CodeGen/X86/bitcast2.ll
@@ -1,13 +1,31 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; RUN: llc < %s -march=x86-64 | not grep rsp
 
 define i64 @test1(double %A) {
+; CHECK: test1
+; CHECK: movq
    %B = bitcast double %A to i64
    ret i64 %B
 }
 
 define double @test2(i64 %A) {
+; CHECK: test2
+; CHECK: movq
    %B = bitcast i64 %A to double
    ret double %B
 }
 
+define i32 @test3(float %A) {
+; CHECK: test3
+; CHECK: movd
+   %B = bitcast float %A to i32
+   ret i32 %B
+}
+
+define float @test4(i32 %A) {
+; CHECK: test4
+; CHECK: movd
+   %B = bitcast i32 %A to float
+   ret float %B
+}
+
diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll
index 66056d0add..aaf66adb09 100644
--- a/test/CodeGen/X86/vec_set-8.ll
+++ b/test/CodeGen/X86/vec_set-8.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 ; CHECK-NOT: movsd
-; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK: movq {{%rdi|%rcx}}, %xmm0
 ; CHECK-NOT: movsd
 
 define <2 x i64> @test(i64 %i) nounwind  {
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index 3656e5f6ca..aaa3184cf3 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep movq | count 1
 ; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
 
 define <2 x i64> @test3(i64 %A) nounwind {
diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll
index 7636ac3b37..0cf308c235 100644
--- a/test/CodeGen/X86/vec_set-C.ll
+++ b/test/CodeGen/X86/vec_set-C.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 1
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq
 
 define <2 x i64> @t1(i64 %x) nounwind  {
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
index f0cfc44ab1..a40f361a0c 100644
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ b/test/CodeGen/X86/vec_shuffle-14.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 4
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
 
 define <4 x i32> @t1(i32 %a) nounwind  {
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
index ebc8c5b34a..e174c2a8ac 100644
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ b/test/CodeGen/X86/vec_shuffle-17.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 ; CHECK-NOT: xor
-; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK: movq {{%rdi|%rcx}}, %xmm0
 ; CHECK-NOT: xor
 ; PR2108
 
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index a36ba25a6e..234e64915b 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -1128,3 +1128,11 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 // CHECK: strq
 // CHECK: encoding: [0x48,0x0f,0x00,0xc8]
 	str %rax
+
+// CHECK: movq %rdi, %xmm0
+// CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+	movq %rdi,%xmm0
+
+// CHECK: movq %rdi, %xmm0
+// CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+	movd %rdi,%xmm0
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index 7a96bb5a2b..7d0dcee81a 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -1444,9 +1444,9 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x79,0x7e,0x30]
           vmovd  %xmm14, (%rax)
 
-// CHECK: vmovd  %rax, %xmm14
-// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
-          vmovd  %rax, %xmm14
+// CHECK: vmovd  %eax, %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
+          vmovd  %eax, %xmm14
 
 // CHECK: vmovq  %xmm14, (%rax)
 // CHECK: encoding: [0xc5,0x79,0xd6,0x30]
-- 
cgit v1.2.3