summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-01-27 11:12:14 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-01-27 11:12:14 +0000
commit3fa842d791ff114d13159282933c7c15c5579cd7 (patch)
tree4de09be2412e99e311c663da438c4b2f2ad26ab3
parent4bfa6fecc1348f120e463138d4fb435b40d7b650 (diff)
downloadllvm-3fa842d791ff114d13159282933c7c15c5579cd7.tar.gz
llvm-3fa842d791ff114d13159282933c7c15c5579cd7.tar.bz2
llvm-3fa842d791ff114d13159282933c7c15c5579cd7.tar.xz
[vectorizer] Fix a trivial oversight where we always requested the
number of vector registers rather than toggling between vector and scalar register number based on VF. I don't have a test case as I spotted this by inspection and on X86 it only makes a difference if your target is lacking SSE and thus has *no* vector registers. If someone wants to add a test case for this for ARM or somewhere else where this is more significant, that would be awesome. Also made the variable name a bit more sensible while I'm here. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200211 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 66134bd95d..f904765f41 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4962,9 +4962,9 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
if (TC > 1 && TC < TinyTripCountUnrollThreshold)
return 1;
- unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true);
- DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters <<
- " vector registers\n");
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
+ DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters <<
+ " registers\n");
LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
// We divide by these constants so assume that we have at least one
@@ -4978,7 +4978,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// Next, divide the remaining registers by the number of registers that is
// required by the loop, in order to estimate how many parallel instances
// fit without causing spills.
- unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
+ unsigned UF = (TargetNumRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
// Clamp the unroll factor ranges to reasonable factors.
unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();