diff options
author | Craig Topper <craig.topper@gmail.com> | 2012-04-12 07:23:00 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2012-04-12 07:23:00 +0000 |
commit | bf596c9c61c8d9ad6e69ead78555902d569c650a (patch) | |
tree | 7ef5aed54459ce5081023abeb0f5c8235a22388d | |
parent | b54efe809f258af2bd1cfbde6e196f70a8a33081 (diff) | |
download | llvm-bf596c9c61c8d9ad6e69ead78555902d569c650a.tar.gz llvm-bf596c9c61c8d9ad6e69ead78555902d569c650a.tar.bz2 llvm-bf596c9c61c8d9ad6e69ead78555902d569c650a.tar.xz |
Fix 128-bit ptest intrinsics to take v2i64 instead of v4f32 since these are integer instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154580 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 8 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-intrinsics-x86.ll | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/sse41.ll | 18 |
4 files changed, 25 insertions, 25 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index f4abba98c0..a6fda4a3af 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -904,13 +904,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Test instruction with bitwise comparison. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7741f409db..408ab16778 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6331,11 +6331,11 @@ def : Pat<(f64 (ftrunc FR64:$src)), let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize, VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize, VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), @@ -6351,11 +6351,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index 616601a943..b33493252a 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1078,33 +1078,33 @@ define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone -define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { +define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vptest ; CHECK: sbbl - %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] ret i32 %res } -declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone -define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { +define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vptest ; CHECK: seta ; CHECK: movzbl - %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] ret i32 %res } -declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone -define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { +define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK: vptest ; CHECK: sete ; CHECK: movzbl - %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] + %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] ret i32 %res } -declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 2ac4cb435a..54264b16ae 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -183,8 +183,8 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: insertps $0, %xmm1, %xmm0 } -define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind { - %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone +define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 ; X32: _ptestz_1: ; X32: ptest %xmm1, %xmm0 @@ -195,8 +195,8 @@ define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: sete %al } -define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind { - %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone +define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 ; X32: _ptestz_2: ; X32: ptest %xmm1, %xmm0 @@ -207,8 +207,8 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind { ; X64: sbbl %eax } -define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind { - %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone +define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind { + %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 ; X32: _ptestz_3: ; X32: ptest %xmm1, %xmm0 @@ -220,9 +220,9 @@ define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind { } -declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone -declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone -declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone +declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone ; This used to compile to insertps $0 + insertps $16. insertps $0 is always ; pointless. |