summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Sjödin <jan_sjodin@yahoo.com>2012-01-11 15:20:20 +0000
committerJan Sjödin <jan_sjodin@yahoo.com>2012-01-11 15:20:20 +0000
commit46df3adb4e12e7f607a5bd21335311604834ba7e (patch)
tree1b2e0c130d06b57888461f9c5da463609507acc1
parent394a1f53b90698486ac7c75724a6bda349cd0353 (diff)
downloadllvm-46df3adb4e12e7f607a5bd21335311604834ba7e.tar.gz
llvm-46df3adb4e12e7f607a5bd21335311604834ba7e.tar.bz2
llvm-46df3adb4e12e7f607a5bd21335311604834ba7e.tar.xz
Add XOP Intrinsics and tests
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147949 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsX86.td529
-rw-r--r--lib/Target/X86/X86InstrXOP.td735
-rw-r--r--test/CodeGen/X86/xop-intrinsics-x86_64.ll1059
3 files changed, 2250 insertions, 73 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 2d5d9ff0d2..0a14d66ee7 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1960,6 +1960,535 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
//===----------------------------------------------------------------------===//
+// XOP
+
+ def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_xop_vpermil2pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpermil2ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_xop_vfrcz_pd :
+ GCCBuiltin<"__builtin_ia32_vfrczpd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+ def int_x86_xop_vfrcz_ps :
+ GCCBuiltin<"__builtin_ia32_vfrczps">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_xop_vfrcz_sd :
+ GCCBuiltin<"__builtin_ia32_vfrczsd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vfrcz_ss :
+ GCCBuiltin<"__builtin_ia32_vfrczss">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vfrcz_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vfrczpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_xop_vfrcz_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vfrczps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_xop_vpcmov :
+ GCCBuiltin<"__builtin_ia32_vpcmov">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v2di :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v2di">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v4si :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v4si">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v8hi :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v8hi">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v16qi :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v16qi">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v2df :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v2df">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v4sf :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v4sf">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_256">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v4di_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v4di256">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v8si_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v8si256">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v16hi_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v16hi256">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v32qi_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v32qi256">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v4df_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v4df256">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_v8sf_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_v8sf256">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomeqb :
+ GCCBuiltin<"__builtin_ia32_vpcomeqb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomeqw :
+ GCCBuiltin<"__builtin_ia32_vpcomeqw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomeqd :
+ GCCBuiltin<"__builtin_ia32_vpcomeqd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomeqq :
+ GCCBuiltin<"__builtin_ia32_vpcomeqq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomequb :
+ GCCBuiltin<"__builtin_ia32_vpcomequb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomequd :
+ GCCBuiltin<"__builtin_ia32_vpcomequd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomequq :
+ GCCBuiltin<"__builtin_ia32_vpcomequq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomequw :
+ GCCBuiltin<"__builtin_ia32_vpcomequw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalseb :
+ GCCBuiltin<"__builtin_ia32_vpcomfalseb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalsed :
+ GCCBuiltin<"__builtin_ia32_vpcomfalsed">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalseq :
+ GCCBuiltin<"__builtin_ia32_vpcomfalseq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalseub :
+ GCCBuiltin<"__builtin_ia32_vpcomfalseub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalseud :
+ GCCBuiltin<"__builtin_ia32_vpcomfalseud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalseuq :
+ GCCBuiltin<"__builtin_ia32_vpcomfalseuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalseuw :
+ GCCBuiltin<"__builtin_ia32_vpcomfalseuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomfalsew :
+ GCCBuiltin<"__builtin_ia32_vpcomfalsew">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgeb :
+ GCCBuiltin<"__builtin_ia32_vpcomgeb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomged :
+ GCCBuiltin<"__builtin_ia32_vpcomged">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgeq :
+ GCCBuiltin<"__builtin_ia32_vpcomgeq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgeub :
+ GCCBuiltin<"__builtin_ia32_vpcomgeub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgeud :
+ GCCBuiltin<"__builtin_ia32_vpcomgeud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgeuq :
+ GCCBuiltin<"__builtin_ia32_vpcomgeuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgeuw :
+ GCCBuiltin<"__builtin_ia32_vpcomgeuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgew :
+ GCCBuiltin<"__builtin_ia32_vpcomgew">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtb :
+ GCCBuiltin<"__builtin_ia32_vpcomgtb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtd :
+ GCCBuiltin<"__builtin_ia32_vpcomgtd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtq :
+ GCCBuiltin<"__builtin_ia32_vpcomgtq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtub :
+ GCCBuiltin<"__builtin_ia32_vpcomgtub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtud :
+ GCCBuiltin<"__builtin_ia32_vpcomgtud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtuq :
+ GCCBuiltin<"__builtin_ia32_vpcomgtuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtuw :
+ GCCBuiltin<"__builtin_ia32_vpcomgtuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomgtw :
+ GCCBuiltin<"__builtin_ia32_vpcomgtw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomleb :
+ GCCBuiltin<"__builtin_ia32_vpcomleb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomled :
+ GCCBuiltin<"__builtin_ia32_vpcomled">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomleq :
+ GCCBuiltin<"__builtin_ia32_vpcomleq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomleub :
+ GCCBuiltin<"__builtin_ia32_vpcomleub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomleud :
+ GCCBuiltin<"__builtin_ia32_vpcomleud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomleuq :
+ GCCBuiltin<"__builtin_ia32_vpcomleuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomleuw :
+ GCCBuiltin<"__builtin_ia32_vpcomleuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomlew :
+ GCCBuiltin<"__builtin_ia32_vpcomlew">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltb :
+ GCCBuiltin<"__builtin_ia32_vpcomltb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltd :
+ GCCBuiltin<"__builtin_ia32_vpcomltd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltq :
+ GCCBuiltin<"__builtin_ia32_vpcomltq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltub :
+ GCCBuiltin<"__builtin_ia32_vpcomltub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltud :
+ GCCBuiltin<"__builtin_ia32_vpcomltud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltuq :
+ GCCBuiltin<"__builtin_ia32_vpcomltuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltuw :
+ GCCBuiltin<"__builtin_ia32_vpcomltuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomltw :
+ GCCBuiltin<"__builtin_ia32_vpcomltw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomneb :
+ GCCBuiltin<"__builtin_ia32_vpcomneb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomned :
+ GCCBuiltin<"__builtin_ia32_vpcomned">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomneq :
+ GCCBuiltin<"__builtin_ia32_vpcomneq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomneub :
+ GCCBuiltin<"__builtin_ia32_vpcomneub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomneud :
+ GCCBuiltin<"__builtin_ia32_vpcomneud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomneuq :
+ GCCBuiltin<"__builtin_ia32_vpcomneuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomneuw :
+ GCCBuiltin<"__builtin_ia32_vpcomneuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomnew :
+ GCCBuiltin<"__builtin_ia32_vpcomnew">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrueb :
+ GCCBuiltin<"__builtin_ia32_vpcomtrueb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrued :
+ GCCBuiltin<"__builtin_ia32_vpcomtrued">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrueq :
+ GCCBuiltin<"__builtin_ia32_vpcomtrueq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrueub :
+ GCCBuiltin<"__builtin_ia32_vpcomtrueub">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrueud :
+ GCCBuiltin<"__builtin_ia32_vpcomtrueud">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrueuq :
+ GCCBuiltin<"__builtin_ia32_vpcomtrueuq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtrueuw :
+ GCCBuiltin<"__builtin_ia32_vpcomtrueuw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcomtruew :
+ GCCBuiltin<"__builtin_ia32_vpcomtruew">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vphaddbd :
+ GCCBuiltin<"__builtin_ia32_vphaddbd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddbq :
+ GCCBuiltin<"__builtin_ia32_vphaddbq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddbw :
+ GCCBuiltin<"__builtin_ia32_vphaddbw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphadddq :
+ GCCBuiltin<"__builtin_ia32_vphadddq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddubd :
+ GCCBuiltin<"__builtin_ia32_vphaddubd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddubq :
+ GCCBuiltin<"__builtin_ia32_vphaddubq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddubw :
+ GCCBuiltin<"__builtin_ia32_vphaddubw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddudq :
+ GCCBuiltin<"__builtin_ia32_vphaddudq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_xop_vphadduwd :
+ GCCBuiltin<"__builtin_ia32_vphadduwd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_xop_vphadduwq :
+ GCCBuiltin<"__builtin_ia32_vphadduwq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddwd :
+ GCCBuiltin<"__builtin_ia32_vphaddwd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_xop_vphaddwq :
+ GCCBuiltin<"__builtin_ia32_vphaddwq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_xop_vphsubbw :
+ GCCBuiltin<"__builtin_ia32_vphsubbw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_xop_vphsubdq :
+ GCCBuiltin<"__builtin_ia32_vphsubdq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_xop_vphsubwd :
+ GCCBuiltin<"__builtin_ia32_vphsubwd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+ def int_x86_xop_vpmacsdd :
+ GCCBuiltin<"__builtin_ia32_vpmacsdd">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacsdqh :
+ GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacsdql :
+ GCCBuiltin<"__builtin_ia32_vpmacsdql">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacssdd :
+ GCCBuiltin<"__builtin_ia32_vpmacssdd">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacssdqh :
+ GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacssdql :
+ GCCBuiltin<"__builtin_ia32_vpmacssdql">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacsswd :
+ GCCBuiltin<"__builtin_ia32_vpmacsswd">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacssww :
+ GCCBuiltin<"__builtin_ia32_vpmacssww">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacswd :
+ GCCBuiltin<"__builtin_ia32_vpmacswd">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmacsww :
+ GCCBuiltin<"__builtin_ia32_vpmacsww">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmadcsswd :
+ GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpmadcswd :
+ GCCBuiltin<"__builtin_ia32_vpmadcswd">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpperm :
+ GCCBuiltin<"__builtin_ia32_vpperm">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vprotb :
+ GCCBuiltin<"__builtin_ia32_vprotb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vprotd :
+ GCCBuiltin<"__builtin_ia32_vprotd">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vprotq :
+ GCCBuiltin<"__builtin_ia32_vprotq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vprotw :
+ GCCBuiltin<"__builtin_ia32_vprotw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshab :
+ GCCBuiltin<"__builtin_ia32_vpshab">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshad :
+ GCCBuiltin<"__builtin_ia32_vpshad">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshaq :
+ GCCBuiltin<"__builtin_ia32_vpshaq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshaw :
+ GCCBuiltin<"__builtin_ia32_vpshaw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshlb :
+ GCCBuiltin<"__builtin_ia32_vpshlb">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshld :
+ GCCBuiltin<"__builtin_ia32_vpshld">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshlq :
+ GCCBuiltin<"__builtin_ia32_vpshlq">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpshlw :
+ GCCBuiltin<"__builtin_ia32_vpshlw">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+ [IntrNoMem]>;
+
+//===----------------------------------------------------------------------===//
// MMX
// Empty MMX state op.
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index aef2c3ac88..0734333837 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -1,89 +1,119 @@
-//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-====//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//===----------------------------------------------------------------------===//
+//===-----------------------------------------------------------------------===//
//
// This file describes XOP (eXtended OPerations)
//
-//===----------------------------------------------------------------------===//
+//===-----------------------------------------------------------------------===//
-multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, VEX;
- def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, VEX;
+ [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
+ defm VFRCZPS : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+ defm VFRCZPD : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
+}
+
+// Scalar load 2 addr operand instructions
+let Constraints = "$src1 = $dst" in {
+multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ Operand memop, ComplexPattern mem_cpat> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+ VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+ memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (Int VR128:$src1,
+ (bitconvert mem_cpat:$src2)))]>, VEX;
}
+} // Constraints = "$src1 = $dst"
+
let isAsmParserOnly = 1 in {
- defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>;
- defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>;
- defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>;
- defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>;
- defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>;
- defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>;
- defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>;
- defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>;
- defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>;
- defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>;
- defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>;
- defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>;
- defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>;
- defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>;
- defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>;
- defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>;
- defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>;
- defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>;
- defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>;
-}
-
-multiclass xop2op256<bits<8> opc, string OpcodeStr> {
+ defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+ ssmem, sse_load_f32>;
+ defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+ sdmem, sse_load_f64>;
+}
+
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ PatFrag memop> {
def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, VEX, VEX_L;
+ [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, VEX;
+ [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
}
let isAsmParserOnly = 1 in {
- defm VFRCZPS : xop2op256<0x80, "vfrczps">;
- defm VFRCZPD : xop2op256<0x81, "vfrczpd">;
+ defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
+ memopv8f32>;
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
+ memopv4f64>;
}
-multiclass xop3op<bits<8> opc, string OpcodeStr> {
+multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4VOp3;
+ [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_W;
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
+ VEX_4V, VEX_W;
def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins f128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4VOp3;
+ [(set VR128:$dst,
+ (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>,
+ VEX_4VOp3;
}
let isAsmParserOnly = 1 in {
- defm VPSHLW : xop3op<0x95, "vpshlw">;
- defm VPSHLQ : xop3op<0x97, "vpshlq">;
- defm VPSHLD : xop3op<0x96, "vpshld">;
- defm VPSHLB : xop3op<0x94, "vpshlb">;
- defm VPSHAW : xop3op<0x99, "vpshaw">;
- defm VPSHAQ : xop3op<0x9B, "vpshaq">;
- defm VPSHAD : xop3op<0x9A, "vpshad">;
- defm VPSHAB : xop3op<0x98, "vpshab">;
- defm VPROTW : xop3op<0x91, "vprotw">;
- defm VPROTQ : xop3op<0x93, "vprotq">;
- defm VPROTD : xop3op<0x92, "vprotd">;
- defm VPROTB : xop3op<0x90, "vprotb">;
+ defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+ defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+ defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+ defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+ defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+ defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+ defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+ defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+ defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+ defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+ defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
}
multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
@@ -105,32 +135,35 @@ let isAsmParserOnly = 1 in {
}
// Instruction where second source can be memory, but third must be register
-multiclass xop4opm2<bits<8> opc, string OpcodeStr> {
+multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, VEX_4V, VEX_I8IMM;
+ [(set VR128:$dst,
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM;
def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, VEX_4V, VEX_I8IMM;
+ [(set VR128:$dst,
+ (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3))]>, VEX_4V, VEX_I8IMM;
}
let isAsmParserOnly = 1 in {
- defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">;
- defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">;
- defm VPMACSWW : xop4opm2<0x95, "vpmacsww">;
- defm VPMACSWD : xop4opm2<0x96, "vpmacswd">;
- defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">;
- defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">;
- defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">;
- defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">;
- defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">;
- defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">;
- defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">;
- defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">;
+ defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+ defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+ defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+ defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+ defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+ defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+ defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+ defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+ defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+ defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+ defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+ defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
}
// Instruction where second source can be memory, third must be imm8
@@ -204,38 +237,594 @@ let isAsmParserOnly = 1 in {
defm VPCMOV : xop4op256<0xA2, "vpcmov">;
}
-multiclass xop5op<bits<8> opc, string OpcodeStr> {
+multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
+ Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>;
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, MemOp4;
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
+ VEX_W, MemOp4;
def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>;
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>;
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>;
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, MemOp4;
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
+ VEX_W, MemOp4;
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>;
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>;
}
-defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">;
-defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">;
+defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
+ int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>;
+defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
+ int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>;
+
+// XOP Intrinsics patterns
+
+// VPCOM EQ
+def : Pat<(int_x86_xop_vpcomeqw VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomeqw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomequw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomequw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomequq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomequq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomequd VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomequd VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomequb VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomequb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomeqq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomeqq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomeqd VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomeqd VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 4))>;
+
+def : Pat<(int_x86_xop_vpcomeqb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 4))>;
+def : Pat<(int_x86_xop_vpcomeqb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 4))>;
+
+// VPCOM FALSE
+def : Pat<(int_x86_xop_vpcomfalsew VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalsew VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalseuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalseuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalseuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalseuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalseud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalseud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalseub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalseub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalseq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalseq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalsed VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalsed VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomfalseb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomfalseb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 6))>;
+
+// VPCOM GE
+def : Pat<(int_x86_xop_vpcomgew VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgew VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomgeuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgeuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomgeuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgeuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomgeud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgeud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomgeub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgeub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomgeq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgeq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomged VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomged VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 3))>;
+
+def : Pat<(int_x86_xop_vpcomgeb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 3))>;
+def : Pat<(int_x86_xop_vpcomgeb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 3))>;
+
+// VPCOM GT
+def : Pat<(int_x86_xop_vpcomgtw VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtd VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtd VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 2))>;
+
+def : Pat<(int_x86_xop_vpcomgtb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 2))>;
+def : Pat<(int_x86_xop_vpcomgtb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 2))>;
+
+// VPCOM LE
+def : Pat<(int_x86_xop_vpcomlew VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomlew VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomleuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomleuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomleuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomleuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomleud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomleud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomleub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomleub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomleq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomleq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomled VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomled VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 1))>;
+
+def : Pat<(int_x86_xop_vpcomleb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 1))>;
+def : Pat<(int_x86_xop_vpcomleb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 1))>;
+
+// VPCOM LT
+def : Pat<(int_x86_xop_vpcomltw VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltd VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltd VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 0))>;
+
+def : Pat<(int_x86_xop_vpcomltb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 0))>;
+def : Pat<(int_x86_xop_vpcomltb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 0))>;
+
+// VPCOM NE
+def : Pat<(int_x86_xop_vpcomnew VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomnew VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomneuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomneuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomneuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomneuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomneud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomneud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomneub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomneub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomneq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomneq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomned VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomned VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 5))>;
+
+def : Pat<(int_x86_xop_vpcomneb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 5))>;
+def : Pat<(int_x86_xop_vpcomneb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 5))>;
+
+// VPCOM TRUE
+def : Pat<(int_x86_xop_vpcomtruew VR128:$src1, VR128:$src2),
+ (VPCOMWri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtruew VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMWmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrueuw VR128:$src1, VR128:$src2),
+ (VPCOMUWri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrueuw VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUWmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrueuq VR128:$src1, VR128:$src2),
+ (VPCOMUQri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrueuq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUQmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrueud VR128:$src1, VR128:$src2),
+ (VPCOMUDri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrueud VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUDmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrueub VR128:$src1, VR128:$src2),
+ (VPCOMUBri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrueub VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMUBmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrueq VR128:$src1, VR128:$src2),
+ (VPCOMQri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrueq VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMQmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrued VR128:$src1, VR128:$src2),
+ (VPCOMDri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrued VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMDmi VR128:$src1, addr:$src2, (i8 6))>;
+
+def : Pat<(int_x86_xop_vpcomtrueb VR128:$src1, VR128:$src2),
+ (VPCOMBri VR128:$src1, VR128:$src2, (i8 6))>;
+def : Pat<(int_x86_xop_vpcomtrueb VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))),
+ (VPCOMBmi VR128:$src1, addr:$src2, (i8 6))>;
+
+// VPPERM
+def : Pat<(int_x86_xop_vpperm VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPPERMrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpperm VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPPERMrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpperm VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPPERMmr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+// VPCMOV
+def : Pat<(int_x86_xop_vpcmov VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv4i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_256 VR256:$src1,
+ (bitconvert (memopv4i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VPCMOV di
+def : Pat<(int_x86_xop_vpcmov_v2di VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v2di VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v2di VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4di_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4di_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4di_256 VR256:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VPCMOV si
+def : Pat<(int_x86_xop_vpcmov_v4si VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4si VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4si VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8si_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8si_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8si_256 VR256:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+
+// VPCMOV hi
+def : Pat<(int_x86_xop_vpcmov_v8hi VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8hi VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8hi VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v16hi_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v16hi_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v16hi_256 VR256:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VPCMOV qi
+def : Pat<(int_x86_xop_vpcmov_v16qi VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v16qi VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v16qi VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v32qi_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v32qi_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v32qi_256 VR256:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VPCMOV df
+def : Pat<(int_x86_xop_vpcmov_v2df VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v2df VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v2df VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4df_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4df_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4df_256 VR256:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VPCMOV sf
+def : Pat<(int_x86_xop_vpcmov_v4sf VR128:$src1, VR128:$src2, VR128:$src3),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4sf VR128:$src1, VR128:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v4sf VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR128:$src3),
+ (VPCMOVmr VR128:$src1, addr:$src2, VR128:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8sf_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8sf_256 VR256:$src1, VR256:$src2,
+ (bitconvert (memopv2i64 addr:$src3))),
+ (VPCMOVrmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_xop_vpcmov_v8sf_256 VR256:$src1,
+ (bitconvert (memopv2i64 addr:$src2)),
+ VR256:$src3),
+ (VPCMOVmrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
new file mode 100644
index 0000000000..ca1651e495
--- /dev/null
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -0,0 +1,1059 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
+
+define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: vpermil2pd
+ %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ; [#uses=1]
+ ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpermil2pd
+ %vec = load <2 x double>* %a1
+ %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]
+ ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpermil2pd
+ %vec = load <2 x double>* %a2
+ %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ ; CHECK: vpermil2pd
+ ; CHECK: ymm
+ %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
+ ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpermil2pd
+ ; CHECK: ymm
+ %vec = load <4 x double>* %a1
+ %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
+ ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpermil2pd
+ ; CHECK: ymm
+ %vec = load <4 x double>* %a2
+ %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: vpermil2ps
+ %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ; CHECK: vpermil2ps
+ ; CHECK: ymm
+ %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+ ; CHECK: vpcmov
+ %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov_v2di(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+ ; CHECK: vpcmov
+ %res = call <2 x i64> @llvm.x86.xop.vpcmov.v2di(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov.v2di(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcmov_v4si(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ; CHECK: vpcmov
+ %res = call <4 x i32> @llvm.x86.xop.vpcmov.v4si(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcmov.v4si(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcmov_v8hi(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ; CHECK: vpcmov
+ %res = call <8 x i16> @llvm.x86.xop.vpcmov.v8hi(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcmov.v8hi(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcmov_v16qi(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+ ; CHECK: vpcmov
+ %res = call <16 x i8> @llvm.x86.xop.vpcmov.v16qi(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcmov.v16qi(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vpcmov_v2df(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: vpcmov
+ %res = call <2 x double> @llvm.x86.xop.vpcmov.v2df(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpcmov.v2df(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpcmov_v4sf(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: vpcmov
+ %res = call <4 x float> @llvm.x86.xop.vpcmov.v4sf(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpcmov.v4sf(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %vec = load <4 x i64>* %a1
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+ ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %vec = load <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_v4di_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.v4di.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.v4di.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <8 x i32> @test_int_x86_xop_vpcmov_v8si_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <8 x i32> @llvm.x86.xop.vpcmov.v8si.256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) ;
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.xop.vpcmov.v8si.256(<8 x i32>, <8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @test_int_x86_xop_vpcmov_v16hi_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <16 x i16> @llvm.x86.xop.vpcmov.v16hi.256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2) ;
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.xop.vpcmov.v16hi.256(<16 x i16>, <16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @test_int_x86_xop_vpcmov_v32qi_256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <32 x i8> @llvm.x86.xop.vpcmov.v32qi.256(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ;
+ ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.xop.vpcmov.v32qi.256(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpcmov_v4df_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <4 x double> @llvm.x86.xop.vpcmov.v4df.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpcmov.v4df.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpcmov_v8sf_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ; CHECK: vpcmov
+ ; CHECK: ymm
+ %res = call <8 x float> @llvm.x86.xop.vpcmov.v8sf.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ;
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpcmov.v8sf.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK:vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK:vpcomb
+ %vec = load <16 x i8>* %a1
+ %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomb
+ %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomd
+ %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcomub
+ %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcomud
+ %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcomuq
+ %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomuw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcomw
+ %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
+ ; CHECK: vphaddbd
+ %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
+ ; CHECK: vphaddbq
+ %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
+ ; CHECK: vphaddbw
+ %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
+ ; CHECK: vphadddq
+ %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
+ ; CHECK: vphaddubd
+ %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
+ ; CHECK: vphaddubq
+ %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
+ ; CHECK: vphaddubw
+ %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
+ ; CHECK: vphaddudq
+ %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
+ ; CHECK: vphadduwd
+ %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
+ ; CHECK: vphadduwq
+ %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
+ ; CHECK: vphaddwd
+ %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
+ ; CHECK: vphaddwq
+ %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
+ ; CHECK: vphsubbw
+ %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
+ ; CHECK: vphsubdq
+ %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
+ ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vphsubdq
+ %vec = load <4 x i32>* %a0
+ %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
+ ; CHECK: vphsubwd
+ %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vphsubwd
+ %vec = load <8 x i16>* %a0
+ %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ; CHECK: vpmacsdd
+ %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ; CHECK: vpmacsdqh
+ %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ; CHECK: vpmacsdql
+ %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ; CHECK: vpmacssdd
+ %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ; CHECK: vpmacssdqh
+ %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ; CHECK: vpmacssdql
+ %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ; CHECK: vpmacsswd
+ %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ; CHECK: vpmacssww
+ %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ; CHECK: vpmacswd
+ %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ; CHECK: vpmacsww
+ %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ; CHECK: vpmadcsswd
+ %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ; CHECK: vpmadcswd
+ %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpmadcswd
+ %vec = load <8 x i16>* %a1
+ %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+ ; CHECK: vpperm
+ %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpperm
+ %vec = load <16 x i8>* %a2
+ %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
+ ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpperm
+ %vec = load <16 x i8>* %a1
+ %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vprotb
+ %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vprotd
+ %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vprotq
+ %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vprotw
+ %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpshab
+ %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpshad
+ %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpshaq
+ %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpshaw
+ %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpshlb
+ %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpshld
+ %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpshlq
+ %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpshlw
+ %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpshlw
+ %vec = load <8 x i16>* %a1
+ %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
+ ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vpshlw
+ %vec = load <8 x i16>* %a0
+ %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK-NOT: mov
+ ; CHECK: vfrczss
+ %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+ ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+ ; CHECK-NOT: mov
+ ; CHECK: vfrczss
+ %elem = load float* %a1
+ %vec = insertelement <4 x float> undef, float %elem, i32 0
+ %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK-NOT: mov
+ ; CHECK: vfrczsd
+ %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+ ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+ ; CHECK-NOT: mov
+ ; CHECK: vfrczsd
+ %elem = load double* %a1
+ %vec = insertelement <2 x double> undef, double %elem, i32 0
+ %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
+ ; CHECK: vfrczpd
+ %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
+ ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vfrczpd
+ %vec = load <2 x double>* %a0
+ %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
+ ; CHECK: vfrczpd
+ ; CHECK: ymm
+ %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
+ ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vfrczpd
+ ; CHECK: ymm
+ %vec = load <4 x double>* %a0
+ %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
+ ; CHECK: vfrczps
+ %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
+ ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vfrczps
+ %vec = load <4 x float>* %a0
+ %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
+ ; CHECK: vfrczps
+ ; CHECK: ymm
+ %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
+ ret <8 x float> %res
+}
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
+ ; CHECK-NOT: vmovaps
+ ; CHECK: vfrczps
+ ; CHECK: ymm
+ %vec = load <8 x float>* %a0
+ %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+