summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2014-04-09 23:39:25 +0000
committerJim Grosbach <grosbach@apple.com>2014-04-09 23:39:25 +0000
commitafb4ef3549c25bb3d4808be484aa0163bd829492 (patch)
treeca5f7f6b887855944d0563a81337802c696e7e7a /lib
parente9915738beed0d8b4b50dca8c0ed8c76e7d320db (diff)
downloadllvm-afb4ef3549c25bb3d4808be484aa0163bd829492.tar.gz
llvm-afb4ef3549c25bb3d4808be484aa0163bd829492.tar.bz2
llvm-afb4ef3549c25bb3d4808be484aa0163bd829492.tar.xz
Add support for load folding of avx1 logical instructions
AVX supports logical operations using an operand from memory. Unfortunately because integer operations were not added until AVX2 the AVX1 logical operation's types were preventing the isel from folding the loads. In a limited number of cases the peephole optimizer would fold the loads, but most were missed. This patch adds explicit patterns with appropriate casts in order for these loads to be folded. The included test cases run on reduced examples and disable the peephole optimizer to ensure the folds are being pattern matched. Patch by Louis Gerbarg <lgg@apple.com> rdar://16355124 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205938 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/X86InstrSSE.td13
1 files changed, 13 insertions, 0 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f2f3967944..72a18e7d79 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2973,6 +2973,19 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
+// AVX1 requires type coercions in order to fold loads directly into logical
+// operations.
+let Predicates = [HasAVX1Only] in {
+ def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
//===----------------------------------------------------------------------===//