From fc678719d935ffb893a3e08905f0d00b649f9d4f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 22 Oct 2013 04:35:20 +0000 Subject: Replace (V)MOVZDI2PDIrr/rm instructions with patterns that select (V)MOVDI2PDIrr/rm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193146 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 2 -- lib/Target/X86/X86InstrSSE.td | 57 ++++++++++++++++------------------------- 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 32d2e16fed..7f66c6ef11 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -441,7 +441,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, { X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 }, { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 }, { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, @@ -502,7 +501,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 }, { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, - { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, { X86::VPABSBrr128, X86::VPABSBrm128, 0 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f1bb9f84a7..bf09191954 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4535,24 +4535,12 @@ let isCodeGenOnly = 1 in { // let isCodeGenOnly = 1, SchedRW = [WriteMove] in { let AddedComplexity = 15 in { -def VMOVZDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector GR32:$src)))))], - IIC_SSE_MOVDQ>, VEX; def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>, VEX, VEX_W; -} -let AddedComplexity = 15 in { -def MOVZDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector GR32:$src)))))], - IIC_SSE_MOVDQ>; def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl @@ -4561,43 +4549,42 @@ def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), } } // isCodeGenOnly, SchedRW -let isCodeGenOnly = 1, AddedComplexity = 20, SchedRW = [WriteLoad] in { -def VMOVZDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4i32 (X86vzmovl (v4i32 (scalar_to_vector - (loadi32 addr:$src))))))], - IIC_SSE_MOVDQ>, VEX; -def MOVZDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4i32 (X86vzmovl (v4i32 (scalar_to_vector - (loadi32 addr:$src))))))], - IIC_SSE_MOVDQ>; -} // isCodeGenOnly, AddedComplexity, SchedRW - let Predicates = [UseAVX] in { + let AddedComplexity = 15 in + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), + (VMOVDI2PDIrr GR32:$src)>; + // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), - (VMOVZDI2PDIrm addr:$src)>; + (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (VMOVZDI2PDIrm addr:$src)>; + (VMOVDI2PDIrm addr:$src)>; } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } -let Predicates = [UseSSE2], AddedComplexity = 20 in { - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), - (MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (MOVZDI2PDIrm addr:$src)>; +let Predicates = [UseSSE2] in { + let AddedComplexity = 15 in + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), + (MOVDI2PDIrr GR32:$src)>; + + let AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (MOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (MOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (MOVDI2PDIrm addr:$src)>; + } } // These are the correct encodings of the instructions so that we know how to -- cgit v1.2.3