diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-04-26 01:03:22 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-04-26 01:03:22 +0000 |
commit | 96db9b8ed87e502801e3dda7d13896acd17d8128 (patch) | |
tree | ac2e58413693d984ca219b617bf84947bb5f008b /lib | |
parent | f3a9eb107c079a407962b34ebe5232a41dbf8146 (diff) | |
download | llvm-96db9b8ed87e502801e3dda7d13896acd17d8128.tar.gz llvm-96db9b8ed87e502801e3dda7d13896acd17d8128.tar.bz2 llvm-96db9b8ed87e502801e3dda7d13896acd17d8128.tar.xz |
[InstCombine][X86] Teach how to fold calls to SSE2/AVX2 packed logical shift
right intrinsics.
A packed logical shift right with a shift count bigger than or equal to the
element size always produces a zero vector. In all other cases, it can be
safely replaced by a 'lshr' instruction.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207299 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 50 |
1 files changed, 41 insertions, 9 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 17ada47d2b..df217f19ac 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -570,8 +570,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_psll_w: case Intrinsic::x86_avx2_pslli_d: case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_pslli_w: { - // Simplify if count is constant. To 0 if > BitWidth, otherwise to shl. + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: { + // Simplify if count is constant. To 0 if >= BitWidth, + // otherwise to shl/lshr. auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); if (!CDV && !CInt) @@ -588,14 +601,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { VT->getElementType()->getPrimitiveSizeInBits() - 1) return ReplaceInstUsesWith( CI, ConstantAggregateZero::get(Vec->getType())); - else { - unsigned VWidth = VT->getNumElements(); - // Get a constant vector of the same type as the first operand. - auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); - return BinaryOperator::CreateShl( - Vec, Builder->CreateVectorSplat(VWidth, VTCI)); + + bool isPackedShiftLeft = true; + switch (II->getIntrinsicID()) { + default : break; + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; } - break; + + unsigned VWidth = VT->getNumElements(); + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + if (isPackedShiftLeft) + return BinaryOperator::CreateShl(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + + return BinaryOperator::CreateLShr(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); } case Intrinsic::x86_sse41_pmovsxbw: |