diff options
Diffstat (limited to 'lib/Target/X86/X86InstrSSE.td')
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 121 |
1 files changed, 47 insertions, 74 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 15b9b91583..759b0e6fe6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2706,14 +2706,10 @@ let Predicates = [UseSSE2] in { /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { - def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, - Sched<[WriteVecLogic]>; - let isAsmParserOnly = 1, hasSideEffects = 0 in - def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; + def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2730,15 +2726,15 @@ let Predicates = [HasAVX] in { OpSize, VEX, VEX_L; def : Pat<(i32 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>; + (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR32:$src)), (SUBREG_TO_REG (i64 0), - (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>; + (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>; + (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR64:$src)), (SUBREG_TO_REG (i64 0), - (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>; + (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2747,18 +2743,18 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, TB, OpSize; def : Pat<(i32 (X86fgetsign FR32:$src)), - (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, + (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, Requires<[UseSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), (SUBREG_TO_REG (i64 0), - (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>, + (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>, Requires<[UseSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, + (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>, Requires<[UseSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), (SUBREG_TO_REG (i64 0), - (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>, + (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>, Requires<[UseSSE2]>; //===---------------------------------------------------------------------===// @@ -4248,13 +4244,13 @@ let ExeDomain = SSEPackedInt in { multiclass sse2_pinsrw<bit Is2Addr = 1> { def rri : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, - GR32:$src2, i32i8imm:$src3), + GR32orGR64:$src2, i32i8imm:$src3), !if(Is2Addr, "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, - Sched<[WriteShuffle]>; + (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))], + IIC_SSE_PINSRW>, Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4270,36 +4266,24 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { // Extract let Predicates = [HasAVX] in def VPEXTRWri : Ii8<0xC5, MRMSrcReg, - (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX, + [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), + imm:$src2))]>, TB, OpSize, VEX, Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, - (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>, + [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), + imm:$src2))], IIC_SSE_PEXTRW>, Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert -let Predicates = [HasAVX] in { - defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; - let isAsmParserOnly = 1, hasSideEffects = 0 in - def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), - "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; -} - -let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in { - defm PINSRW : sse2_pinsrw, TB, OpSize; - let isAsmParserOnly = 1, hasSideEffects = 0 in - def PINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, TB, OpSize, Sched<[WriteShuffle]>; -} // Predicates = [UseSSE2], Constraints = "$src1 = $dst" +let Predicates = [HasAVX] in +defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; + +let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in +defm PINSRW : sse2_pinsrw, TB, OpSize; } // ExeDomain = SSEPackedInt @@ -4309,27 +4293,24 @@ let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { -def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), +def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), + (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], + [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], IIC_SSE_MOVMSK>, VEX; -def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX; let Predicates = [HasAVX2] in { -def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), +def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), + (ins VR256:$src), "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L; -def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; + [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, + VEX, VEX_L; } -def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), +def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], + [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], IIC_SSE_MOVMSK>; -def PMOVMSKBr64r : PDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>; } // ExeDomain = SSEPackedInt @@ -6024,29 +6005,26 @@ let Predicates = [UseSSE41] in { /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), + def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), + imm:$src2))]>, OpSize; let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // FIXME: // There's an AssertZext in the way of writing the store pattern // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX] in defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; - def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; -} defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -6054,7 +6032,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">; /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { let isCodeGenOnly = 1, hasSideEffects = 0 in - def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), + def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -6123,11 +6101,11 @@ defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; /// destination multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr, OpndItins itins = DEFAULT_ITINS> { - def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), + def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set GR32:$dst, + [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], itins.rr>, OpSize; @@ -6140,13 +6118,8 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr, } let ExeDomain = SSEPackedSingle in { - let Predicates = [UseAVX] in { + let Predicates = [UseAVX] in defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; - def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, OpSize, VEX; - } defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; } @@ -6168,13 +6141,13 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), + (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; + (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, OpSize; def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), !if(Is2Addr, |