summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2012-09-05 06:58:39 +0000
committerCraig Topper <craig.topper@gmail.com>2012-09-05 06:58:39 +0000
commitc17177f89319465ee1700a65a4a44d4b84c815aa (patch)
tree883cd6da6be84fa3057fdbd243470d9194bd74d2 /lib/Target
parent27b25c2076cc6bcc5c319053e2cfd71f5a847010 (diff)
downloadllvm-c17177f89319465ee1700a65a4a44d4b84c815aa.tar.gz
llvm-c17177f89319465ee1700a65a4a44d4b84c815aa.tar.bz2
llvm-c17177f89319465ee1700a65a4a44d4b84c815aa.tar.xz
Add patterns for integer forms of VINSERTF128/VINSERTI128 folded with loads. Also add patterns to turn subvector inserts with loads to index 0 of an undef into VMOVAPS.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163196 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/X86InstrSSE.td80
1 files changed, 76 insertions, 4 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 214d624e98..be5ae96dbe 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1017,6 +1017,48 @@ let Predicates = [HasAVX] in {
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+ // Special patterns for handling subvector inserts folded with loads
+ def : Pat<(insert_subvector undef, (alignedloadv4f32 addr:$src), (i32 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (v4f32 (VMOVAPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (alignedloadv2f64 addr:$src), (i32 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (v2f64 (VMOVAPDrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (alignedloadv2i64 addr:$src), (i32 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
+ (v2i64 (VMOVAPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef,
+ (bc_v4i32 (alignedloadv2i64 addr:$src)), (i32 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ (v4i32 (VMOVAPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef,
+ (bc_v8i16 (alignedloadv2i64 addr:$src)), (i32 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
+ (v8i16 (VMOVAPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef,
+ (bc_v16i8 (alignedloadv2i64 addr:$src)), (i32 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)),
+ (v16i8 (VMOVAPSrm addr:$src)), sub_xmm)>;
+
+ def : Pat<(insert_subvector undef, (loadv4f32 addr:$src), (i32 0)),
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (v4f32 (VMOVUPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (loadv2f64 addr:$src), (i32 0)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (v2f64 (VMOVUPDrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (loadv2i64 addr:$src), (i32 0)),
+ (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
+ (v2i64 (VMOVUPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (bc_v4i32 (loadv2i64 addr:$src)), (i32 0)),
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ (v4i32 (VMOVUPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (bc_v8i16 (loadv2i64 addr:$src)), (i32 0)),
+ (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
+ (v8i16 (VMOVUPSrm addr:$src)), sub_xmm)>;
+ def : Pat<(insert_subvector undef, (bc_v16i8 (loadv2i64 addr:$src)), (i32 0)),
+ (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)),
+ (v16i8 (VMOVUPSrm addr:$src)), sub_xmm)>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
@@ -7221,11 +7263,11 @@ def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
@@ -7249,7 +7291,22 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
@@ -7809,7 +7866,22 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+ (i32 imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1),
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (i32 imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1),
+ (bc_v16i8 (memopv2i64 addr:$src2)),
+ (i32 imm)),
+ (VINSERTI128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1),
+ (bc_v8i16 (memopv2i64 addr:$src2)),
(i32 imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;