diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-10-22 09:19:28 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-10-22 09:19:28 +0000 |
commit | ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d (patch) | |
tree | 71244718d2df517eb48a0908ef8b22ef5e4522b7 /lib/Target/X86/X86InstrAVX512.td | |
parent | 3ebe47ee13fa29f1fdcb74f82ca42770e101b40e (diff) | |
download | llvm-ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d.tar.gz llvm-ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d.tar.bz2 llvm-ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d.tar.xz |
AVX-512: aligned / unaligned load and store for 512-bit integer vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193156 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 65 |
1 files changed, 35 insertions, 30 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 05e346dec5..8cf5bb4371 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1067,23 +1067,6 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr SSEPackedDouble>, EVEX, EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -// Use vmovaps/vmovups for AVX-512 integer load/store. -// 512-bit load/store -def : Pat<(alignedloadv8i64 addr:$src), - (VMOVAPSZrm addr:$src)>; -def : Pat<(loadv8i64 addr:$src), - (VMOVUPSZrm addr:$src)>; - -def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; -def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - -def : Pat<(store (v8i64 VR512:$src), addr:$dst), - (VMOVUPDZmr addr:$dst, VR512:$src)>; -def : Pat<(store (v16i32 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; - let neverHasSideEffects = 1 in { def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), @@ -1115,25 +1098,36 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), } } -multiclass avx512_mov_int<bits<8> opc, string asm, RegisterClass RC, - RegisterClass KRC, +// 512-bit aligned load/store +def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>; +def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>; + +def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), + (VMOVDQA64mr addr:$dst, VR512:$src)>; +def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), + (VMOVDQA32mr addr:$dst, VR512:$src)>; + +multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm, + RegisterClass RC, RegisterClass KRC, PatFrag ld_frag, X86MemOperand x86memop> { let neverHasSideEffects = 1 in - def rr : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, - EVEX; + def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX; let canFoldAsLoad = 1 in - def rm : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))]>, - EVEX; + def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (ld_frag addr:$src))]>, EVEX; +let mayStore = 1 in + def mr : AVX512XSI<store_opc, MRMDestMem, (outs), + (ins x86memop:$dst, VR512:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX; let Constraints = "$src1 = $dst" in { - def rrk : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), + def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, KRC:$mask, RC:$src2), !strconcat(asm, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>, EVEX, EVEX_K; - def rmk : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), + def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, KRC:$mask, x86memop:$src2), !strconcat(asm, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), @@ -1141,11 +1135,22 @@ let Constraints = "$src1 = $dst" in { } } -defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, +defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM, + memopv16i32, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, +defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM, + memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +// 512-bit unaligned load/store +def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>; +def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>; + +def : Pat<(store (v8i64 VR512:$src), addr:$dst), + (VMOVDQU64mr addr:$dst, VR512:$src)>; +def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVDQU32mr addr:$dst, VR512:$src)>; + let AddedComplexity = 20 in { def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), (v16f32 VR512:$src2))), |