3 files changed, 139 insertions, 10 deletions
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index a0d04c03fc..27f2b67fcd 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1996,7 +1996,7 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
 
 // VFP/NEON Instruction aliases for type suffices.
 class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
-  InstAlias<!strconcat(opc, dt, "\t", asm), Result>;
+  InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
 multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> {
   def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>;
   def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3f27b0c2f6..a7a74de56d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -3669,15 +3669,6 @@ def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
 def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
                        v2f32, fmul>;
 
-// Two-operand aliases.
-def : NEONInstAlias<"vmul${p}.f32 $Ddn $Dm$lane",
-                    (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
-                              VectorIndex32:$lane, pred:$p)>;
-def : NEONInstAlias<"vmul${p}.f32 $Qdn $Dm$lane",
-                    (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
-                              VectorIndex32:$lane, pred:$p)>;
-
-
 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
           (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
@@ -5620,6 +5611,79 @@ defm VEXTq : VFPDT32ReqInstAlias<"vext${p}", "$Vd, $Vn, $Vm, $index",
 defm VEXTq : VFPDT64ReqInstAlias<"vext${p}", "$Vd, $Vn, $Vm, $index",
                   (VEXTq64 QPR:$Vd, QPR:$Vn, QPR:$Vm, imm0_1:$index, pred:$p)>;
 
+// VMUL instructions data type suffix aliases for more-specific types.
+def : NEONInstAlias<"vmul${p}.s16 $Dd, $Dn $Dm$lane",
+                    (VMULslv4i16 DPR:$Dd, DPR:$Dn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.s16 $Qd, $Qn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qd, QPR:$Qn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u16 $Dd, $Dn $Dm$lane",
+                    (VMULslv4i16 DPR:$Dd, DPR:$Dn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u16 $Qd, $Qn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qd, QPR:$Qn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.s32 $Dd, $Dn $Dm$lane",
+                    (VMULslv2i32 DPR:$Dd, DPR:$Dn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.s32 $Qd, $Qn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qd, QPR:$Qn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u32 $Dd, $Dn $Dm$lane",
+                    (VMULslv2i32 DPR:$Dd, DPR:$Dn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u32 $Qd, $Qn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qd, QPR:$Qn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+                    (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.s16 $Ddn, $Dm$lane",
+                    (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.s16 $Qdn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u16 $Ddn, $Dm$lane",
+                    (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u16 $Qdn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+                    (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.s32 $Ddn, $Dm$lane",
+                    (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.s32 $Qdn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u32 $Ddn, $Dm$lane",
+                    (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.u32 $Qdn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+                    (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                              VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+                    (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                              VectorIndex32:$lane, pred:$p)>;
+
 // VLD1 single-lane pseudo-instructions. These need special handling for
 // the lane index that an InstAlias can't handle, so we use these instead.
 defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr",
diff --git a/test/MC/ARM/neon-mul-encoding.s b/test/MC/ARM/neon-mul-encoding.s
index 1c7caf2a55..990187e782 100644
--- a/test/MC/ARM/neon-mul-encoding.s
+++ b/test/MC/ARM/neon-mul-encoding.s
@@ -72,3 +72,68 @@
 
 @ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0d,0xd0,0xf2]
 @ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0d,0xe0,0xf2]
+
+
+        vmul.i16 d0, d4[2]
+        vmul.s16 d1, d7[3]
+        vmul.u16 d2, d1[1]
+        vmul.i32 d3, d2[0]
+        vmul.s32 d4, d3[1]
+        vmul.u32 d5, d4[0]
+        vmul.f32 d6, d5[1]
+
+        vmul.i16 q0, d4[2]
+        vmul.s16 q1, d7[3]
+        vmul.u16 q2, d1[1]
+        vmul.i32 q3, d2[0]
+        vmul.s32 q4, d3[1]
+        vmul.u32 q5, d4[0]
+        vmul.f32 q6, d5[1]
+
+        vmul.i16 d9, d0, d4[2]
+        vmul.s16 d8, d1, d7[3]
+        vmul.u16 d7, d2, d1[1]
+        vmul.i32 d6, d3, d2[0]
+        vmul.s32 d5, d4, d3[1]
+        vmul.u32 d4, d5, d4[0]
+        vmul.f32 d3, d6, d5[1]
+
+        vmul.i16 q9, q0, d4[2]
+        vmul.s16 q8, q1, d7[3]
+        vmul.u16 q7, q2, d1[1]
+        vmul.i32 q6, q3, d2[0]
+        vmul.s32 q5, q4, d3[1]
+        vmul.u32 q4, q5, d4[0]
+        vmul.f32 q3, q6, d5[1]
+
+@ CHECK: vmul.i16	d0, d0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf2]
+@ CHECK: vmul.i16	d1, d1, d7[3]   @ encoding: [0x6f,0x18,0x91,0xf2]
+@ CHECK: vmul.i16	d2, d2, d1[1]   @ encoding: [0x49,0x28,0x92,0xf2]
+@ CHECK: vmul.i32	d3, d3, d2[0]   @ encoding: [0x42,0x38,0xa3,0xf2]
+@ CHECK: vmul.i32	d4, d4, d3[1]   @ encoding: [0x63,0x48,0xa4,0xf2]
+@ CHECK: vmul.i32	d5, d5, d4[0]   @ encoding: [0x44,0x58,0xa5,0xf2]
+@ CHECK: vmul.f32	d6, d6, d5[1]   @ encoding: [0x65,0x69,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q0, q0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf3]
+@ CHECK: vmul.i16	q1, q1, d7[3]   @ encoding: [0x6f,0x28,0x92,0xf3]
+@ CHECK: vmul.i16	q2, q2, d1[1]   @ encoding: [0x49,0x48,0x94,0xf3]
+@ CHECK: vmul.i32	q3, q3, d2[0]   @ encoding: [0x42,0x68,0xa6,0xf3]
+@ CHECK: vmul.i32	q4, q4, d3[1]   @ encoding: [0x63,0x88,0xa8,0xf3]
+@ CHECK: vmul.i32	q5, q5, d4[0]   @ encoding: [0x44,0xa8,0xaa,0xf3]
+@ CHECK: vmul.f32	q6, q6, d5[1]   @ encoding: [0x65,0xc9,0xac,0xf3]
+
+@ CHECK: vmul.i16	d9, d0, d4[2]   @ encoding: [0x64,0x98,0x90,0xf2]
+@ CHECK: vmul.i16	d8, d1, d7[3]   @ encoding: [0x6f,0x88,0x91,0xf2]
+@ CHECK: vmul.i16	d7, d2, d1[1]   @ encoding: [0x49,0x78,0x92,0xf2]
+@ CHECK: vmul.i32	d6, d3, d2[0]   @ encoding: [0x42,0x68,0xa3,0xf2]
+@ CHECK: vmul.i32	d5, d4, d3[1]   @ encoding: [0x63,0x58,0xa4,0xf2]
+@ CHECK: vmul.i32	d4, d5, d4[0]   @ encoding: [0x44,0x48,0xa5,0xf2]
+@ CHECK: vmul.f32	d3, d6, d5[1]   @ encoding: [0x65,0x39,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q9, q0, d4[2]   @ encoding: [0x64,0x28,0xd0,0xf3]
+@ CHECK: vmul.i16	q8, q1, d7[3]   @ encoding: [0x6f,0x08,0xd2,0xf3]
+@ CHECK: vmul.i16	q7, q2, d1[1]   @ encoding: [0x49,0xe8,0x94,0xf3]
+@ CHECK: vmul.i32	q6, q3, d2[0]   @ encoding: [0x42,0xc8,0xa6,0xf3]
+@ CHECK: vmul.i32	q5, q4, d3[1]   @ encoding: [0x63,0xa8,0xa8,0xf3]
+@ CHECK: vmul.i32	q4, q5, d4[0]   @ encoding: [0x44,0x88,0xaa,0xf3]
+@ CHECK: vmul.f32	q3, q6, d5[1]   @ encoding: [0x65,0x69,0xac,0xf3]