diff options
-rw-r--r-- | include/llvm/IntrinsicsX86.td | 65 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 25 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 91 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/mmx-shift.ll | 5 |
5 files changed, 107 insertions, 81 deletions
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 5bcfa7922b..0484926d07 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -324,9 +324,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">, - Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, - llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; @@ -336,15 +333,44 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">, - Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, - llvm_i32_ty], [IntrNoMem]>; def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">, Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + + def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">, + Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">, + Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">, + Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i32_ty], [IntrNoMem]>; } // Integer comparison ops @@ -939,6 +965,33 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, llvm_v1i64_ty], [IntrNoMem]>; + + def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">, + Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">, + Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty, + llvm_i32_ty], [IntrNoMem]>; + + def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">, + Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty, + llvm_i32_ty], [IntrNoMem]>; + def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">, + Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, + llvm_i32_ty], [IntrNoMem]>; } // Pack ops. diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index d4413e9fbc..d484695f60 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -118,7 +118,8 @@ let isTwoAddress = 1 in { } multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, Intrinsic IntId> { + string OpcodeStr, Intrinsic IntId, + Intrinsic IntId2> { def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), @@ -131,11 +132,7 @@ let isTwoAddress = 1 in { def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst), (ins VR64:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId VR64:$src1, - (v1i64 (bitconvert - (v2i32 (vector_shuffle immAllZerosV, - (v2i32 (scalar_to_vector (i32 imm:$src2))), - MMX_MOVL_shuffle_mask))))))]>; + [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>; } } @@ -283,23 +280,23 @@ let isTwoAddress = 1 in { // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_mmx_psrl_w>; + int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>; defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_mmx_psrl_d>; + int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>; defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_mmx_psrl_q>; + int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>; defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_mmx_psll_w>; + int_x86_mmx_psll_w, int_x86_mmx_pslli_w>; defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_mmx_psll_d>; + int_x86_mmx_psll_d, int_x86_mmx_pslli_d>; defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_mmx_psll_q>; + int_x86_mmx_psll_q, int_x86_mmx_pslli_q>; defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_mmx_psra_w>; + int_x86_mmx_psra_w, int_x86_mmx_psrai_w>; defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_mmx_psra_d>; + int_x86_mmx_psra_d, int_x86_mmx_psrai_d>; // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e8ee9dc1c4..9c07de6fd0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1780,6 +1780,21 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, (bitconvert (memopv2i64 addr:$src2))))]>; } +multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, + string OpcodeStr, + Intrinsic IntId, Intrinsic IntId2> { + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (IntId VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))))]>; + def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; +} + /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { @@ -1854,64 +1869,24 @@ defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>; -defm PSLLW : PDI_binop_rm_int<0xF1, "psllw", int_x86_sse2_psll_w>; -defm PSLLD : PDI_binop_rm_int<0xF2, "pslld", int_x86_sse2_psll_d>; -defm PSLLQ : PDI_binop_rm_int<0xF3, "psllq", int_x86_sse2_psll_q>; - -defm PSRLW : PDI_binop_rm_int<0xD1, "psrlw", int_x86_sse2_psrl_w>; -defm PSRLD : PDI_binop_rm_int<0xD2, "psrld", int_x86_sse2_psrl_d>; -defm PSRLQ : PDI_binop_rm_int<0xD3, "psrlq", int_x86_sse2_psrl_q>; - -defm PSRAW : PDI_binop_rm_int<0xE1, "psraw", int_x86_sse2_psra_w>; -defm PSRAD : PDI_binop_rm_int<0xE2, "psrad", int_x86_sse2_psra_d>; - -// Some immediate variants need to match a bit_convert. -let Constraints = "$src1 = $dst" in { -def PSLLWri : PDIi8<0x71, MRM6r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psllw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1, - (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>; -def PSLLDri : PDIi8<0x72, MRM6r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "pslld\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1, - (scalar_to_vector (i32 imm:$src2))))]>; -def PSLLQri : PDIi8<0x73, MRM6r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psllq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1, - (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>; - -def PSRLWri : PDIi8<0x71, MRM2r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psrlw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1, - (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>; -def PSRLDri : PDIi8<0x72, MRM2r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psrld\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1, - (scalar_to_vector (i32 imm:$src2))))]>; -def PSRLQri : PDIi8<0x73, MRM2r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psrlq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1, - (bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>; - -def PSRAWri : PDIi8<0x71, MRM4r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psraw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1, - (bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>; -def PSRADri : PDIi8<0x72, MRM4r, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "psrad\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1, - (scalar_to_vector (i32 imm:$src2))))]>; -} - -// PSRAQ doesn't exist in SSE[1-3]. +defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", + int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; +defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", + int_x86_sse2_psll_d, int_x86_sse2_pslli_d>; +defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", + int_x86_sse2_psll_q, int_x86_sse2_pslli_q>; + +defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>; +defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>; +defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x72, MRM2r, "psrlq", + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>; + +defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", + int_x86_sse2_psra_w, int_x86_sse2_psrai_w>; +defm PSRAD : PDI_binop_rmi_int<0xE2, 0x71, MRM4r, "psrad", + int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; // 128-bit logical shifts. let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 2fb0e8079a..7faff56509 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -122,7 +122,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name.compare(5,10,"x86.mmx.ps",10) == 0 && (Name.compare(13,4,"psll", 4) == 0 || Name.compare(13,4,"psra", 4) == 0 || - Name.compare(13,4,"psrl", 4) == 0)) { + Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') { const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1); diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll index d68af2d1d3..82eeafd075 100644 --- a/test/CodeGen/X86/mmx-shift.ll +++ b/test/CodeGen/X86/mmx-shift.ll @@ -1,14 +1,15 @@ ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32 +; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad define i64 @t1(<1 x i64> %mm1) nounwind { entry: - %tmp6 = tail call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %mm1, <1 x i64> <i64 32> ) ; <<1 x i64>> [#uses=1] + %tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 ) ; <<1 x i64>> [#uses=1] %retval1112 = bitcast <1 x i64> %tmp6 to i64 ; <i64> [#uses=1] ret i64 %retval1112 } -declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind { entry: |