summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2013-10-27 08:18:37 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2013-10-27 08:18:37 +0000
commit62d66cbec5b2d4e00e86457762df0127ae234e6f (patch)
treee3f9fcdc99261abadac90a0feaf567198db89657
parentcba7d7d579c85f23f2849e906d93b5dc147edf26 (diff)
downloadllvm-62d66cbec5b2d4e00e86457762df0127ae234e6f.tar.gz
llvm-62d66cbec5b2d4e00e86457762df0127ae234e6f.tar.bz2
llvm-62d66cbec5b2d4e00e86457762df0127ae234e6f.tar.xz
AVX-512: PMIN/PMAX intrinsics and patterns
Patch by Cameron McInally <cameron.mcinally@nyu.edu> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193497 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IR/IntrinsicsX86.td26
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--lib/Target/X86/X86InstrAVX512.td28
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll56
4 files changed, 127 insertions, 1 deletions
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 537089d4e3..e39eea8da4 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -2719,6 +2719,32 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
llvm_v8f64_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmaxu_q : GCCBuiltin<"__builtin_ia32_pmaxuq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmaxs_q : GCCBuiltin<"__builtin_ia32_pmaxsq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_pminu_d : GCCBuiltin<"__builtin_ia32_pminud512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pminu_q : GCCBuiltin<"__builtin_ia32_pminuq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pmins_q : GCCBuiltin<"__builtin_ia32_pminsq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty], [IntrNoMem]>;
}
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0d7818943c..5dbef0f6fc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11173,24 +11173,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
+ case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -11200,6 +11208,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
@@ -11208,6 +11218,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
@@ -11216,6 +11228,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
@@ -11224,6 +11238,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index ed90a4bca3..1ac563010f 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1599,6 +1599,34 @@ defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
(VPMULUDQZrr VR512:$src1, VR512:$src2)>;
+defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Unpack Instructions
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 5cd7311eeb..29b508302d 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -233,3 +233,59 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
+
+define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
+ ; CHECK: vpmaxud
+ %res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) {
+ ; CHECK: vpmaxuq
+ %res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) {
+ ; CHECK: vpmaxsd
+ %res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) {
+ ; CHECK: vpmaxsq
+ %res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) {
+ ; CHECK: vpminud
+ %res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) {
+ ; CHECK: vpminuq
+ %res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly
+
+define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) {
+ ; CHECK: vpminsd
+ %res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly
+
+define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
+ ; CHECK: vpminsq
+ %res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly