diff options
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 40 | ||||
-rw-r--r-- | test/CodeGen/X86/avx2-intrinsics-x86.ll | 16 |
3 files changed, 65 insertions, 4 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index fc9525a37b..345c2803f4 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1731,10 +1731,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; } +// Vector extract and insert +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; +} // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index de7326a280..fc36884bf1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7130,14 +7130,17 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values // +let neverHasSideEffects = 1 in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +let mayLoad = 1 in def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f128mem:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +} def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; @@ -7174,14 +7177,17 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // +let neverHasSideEffects = 1 in { def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +let mayStore = 1 in def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), (ins f128mem:$dst, VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +} def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; @@ -7514,16 +7520,46 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // -def VPERM2I128rr : AVXAIi8<0x46, MRMSrcReg, (outs VR256:$dst), +def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>, VEX_4V; -def VPERM2I128rm : AVXAIi8<0x46, MRMSrcMem, (outs VR256:$dst), +def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2), imm:$src3))]>, VEX_4V; + +//===----------------------------------------------------------------------===// +// VINSERTI128 - Insert packed integer values +// +def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR128:$src2, i8imm:$src3), + "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>, + VEX_4V; +def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i128mem:$src2, i8imm:$src3), + "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), + imm:$src3))]>, VEX_4V; + +//===----------------------------------------------------------------------===// +// VEXTRACTI128 - Extract packed integer values +// +def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i8imm:$src2), + "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, + VEX; +let neverHasSideEffects = 1, mayStore = 1 in +def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), + (ins i128mem:$dst, VR256:$src1, i8imm:$src2), + "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 7d67b998d8..4ab0884b5b 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -886,3 +886,19 @@ define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { ret <4 x i64> %res } declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly + + +define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) { + ; CHECK: vextracti128 + %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone + + +define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { + ; CHECK: vinserti128 + %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone |