summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-29 05:37:58 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-29 05:37:58 +0000
commitfe2a6c584a62508e7e7ab990a16bf84af51ce52e (patch)
treee4122f8146dd1199c50cd763bb4ab30f63674f21
parent108126cfc6eddf1e0c9c7db39e25323403f04bbc (diff)
downloadllvm-fe2a6c584a62508e7e7ab990a16bf84af51ce52e.tar.gz
llvm-fe2a6c584a62508e7e7ab990a16bf84af51ce52e.tar.bz2
llvm-fe2a6c584a62508e7e7ab990a16bf84af51ce52e.tar.xz
Fix VINSERTF128/VEXTRACTF128 to be marked as FP instructions. Allow execution dependency fix pass to convert them to their integer equivalents when AVX2 is enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145376 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp10
-rw-r--r--lib/Target/X86/X86InstrSSE.td4
-rw-r--r--test/CodeGen/X86/avx-cast.ll2
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll3
-rw-r--r--test/CodeGen/X86/avx-splat.ll2
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll10
6 files changed, 23 insertions, 8 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5d310af3eb..b28f9c1e7c 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3568,10 +3568,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
{ X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
{ X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
- { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
- { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 0be59ccd87..1d8e3ce284 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7147,7 +7147,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7194,7 +7194,7 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll
index d6d2415ea0..32d450cac9 100644
--- a/test/CodeGen/X86/avx-cast.ll
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -16,7 +16,7 @@ entry:
ret <4 x double> %shuffle.i
}
-; CHECK: vpxor
+; CHECK: vxorps
; CHECK-NEXT: vinsertf128 $0
define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
entry:
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 57e7312154..eccc842406 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -2140,7 +2140,8 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
- ; CHECK: vmovdqu
+ ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
+ ; CHECK: vmovups
; add operation forces the execution domain.
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index af20b90322..f8522c2695 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -47,7 +47,7 @@ entry:
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
; To:
; shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovdqa
+; CHECK: vmovaps
; CHECK-NEXT: vinsertf128 $1
; CHECK-NEXT: vpermilps $-1
define <8 x float> @funcE() nounwind {
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index bab7fb81e5..a0f351de76 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1046,3 +1046,13 @@ define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+ ; CHECK: vmovdqu
+ ; add operation forces the execution domain.
+ %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind