diff options
author | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-18 02:11:34 +0000 |
---|---|---|
committer | Bruno Cardoso Lopes <bruno.cardoso@gmail.com> | 2011-08-18 02:11:34 +0000 |
commit | 24b90e2287289d1756dafb8d935f0252738594ba (patch) | |
tree | 866f08a953976ad51b6e3763d269ced54465fa8d | |
parent | 3f0e2377609e916d77e192fff2badc7f6c05be4f (diff) | |
download | llvm-24b90e2287289d1756dafb8d935f0252738594ba.tar.gz llvm-24b90e2287289d1756dafb8d935f0252738594ba.tar.bz2 llvm-24b90e2287289d1756dafb8d935f0252738594ba.tar.xz |
Cleanup vector logical ops in AVX and add use int versions for simple
v2i64
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137919 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 40 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-logic.ll | 18 |
2 files changed, 38 insertions, 20 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3a75b0e166..333dd607e5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1613,21 +1613,22 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { - let Pattern = []<dag> in { - defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f128mem, - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], - [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f128mem, - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))], - [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, - OpSize, VEX_4V; - } + // In AVX no need to add a pattern for 128-bit logical rr ps, because they + // are all promoted to v2i64, and the patterns are covered by the int + // version. This is needed in SSE only, because v2i64 isn't supported on + // SSE1, but only on SSE2. + defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, [], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, @@ -2546,15 +2547,14 @@ let ExeDomain = SSEPackedInt in { def VPANDNrr : PDI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>, VEX_4V; + [(set VR128:$dst, + (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V; def VPANDNrm : PDI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>, - VEX_4V; + [(set VR128:$dst, (X86andnp VR128:$src1, + (memopv2i64 addr:$src2)))]>, VEX_4V; } } diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index d9e5d081fb..518c09c869 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -159,3 +159,21 @@ entry: %2 = bitcast <8 x i32> %and.i to <8 x float> ret <8 x float> %2 } + +;;; Test that basic 2 x i64 logic use the integer version on AVX + +; CHECK: vpandn %xmm +define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { +entry: + %y = xor <2 x i64> %a, <i64 -1, i64 -1> + %x = and <2 x i64> %a, %y + ret <2 x i64> %x +} + +; CHECK: vpand %xmm +define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { +entry: + %x = and <2 x i64> %a, %b + ret <2 x i64> %x +} + |