summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-04-26 01:03:22 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-04-26 01:03:22 +0000
commit96db9b8ed87e502801e3dda7d13896acd17d8128 (patch)
treeac2e58413693d984ca219b617bf84947bb5f008b /lib
parentf3a9eb107c079a407962b34ebe5232a41dbf8146 (diff)
downloadllvm-96db9b8ed87e502801e3dda7d13896acd17d8128.tar.gz
llvm-96db9b8ed87e502801e3dda7d13896acd17d8128.tar.bz2
llvm-96db9b8ed87e502801e3dda7d13896acd17d8128.tar.xz
[InstCombine][X86] Teach how to fold calls to SSE2/AVX2 packed logical shift
right intrinsics. A packed logical shift right with a shift count bigger than or equal to the element size always produces a zero vector. In all other cases, it can be safely replaced by a 'lshr' instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207299 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp50
1 files changed, 41 insertions, 9 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 17ada47d2b..df217f19ac 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -570,8 +570,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_pslli_w: {
- // Simplify if count is constant. To 0 if > BitWidth, otherwise to shl.
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: {
+ // Simplify if count is constant. To 0 if >= BitWidth,
+ // otherwise to shl/lshr.
auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
if (!CDV && !CInt)
@@ -588,14 +601,33 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
VT->getElementType()->getPrimitiveSizeInBits() - 1)
return ReplaceInstUsesWith(
CI, ConstantAggregateZero::get(Vec->getType()));
- else {
- unsigned VWidth = VT->getNumElements();
- // Get a constant vector of the same type as the first operand.
- auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
- return BinaryOperator::CreateShl(
- Vec, Builder->CreateVectorSplat(VWidth, VTCI));
+
+ bool isPackedShiftLeft = true;
+ switch (II->getIntrinsicID()) {
+ default : break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
}
- break;
+
+ unsigned VWidth = VT->getNumElements();
+ // Get a constant vector of the same type as the first operand.
+ auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
+ if (isPackedShiftLeft)
+ return BinaryOperator::CreateShl(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
+
+ return BinaryOperator::CreateLShr(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
}
case Intrinsic::x86_sse41_pmovsxbw: