summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp18
-rw-r--r--test/Transforms/LoopVectorize/ARM/width-detect.ll52
2 files changed, 64 insertions, 6 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4803a9d038..4bb8c43656 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2635,7 +2635,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements"
" into one vector.");
-
+
unsigned VF = MaxVectorSize;
// If we optimize the program for size, avoid creating the tail loop.
@@ -2697,17 +2697,23 @@ unsigned LoopVectorizationCostModel::getWidestType() {
// For each instruction in the loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- if (Legal->isUniformAfterVectorization(it))
+ Type *T = it->getType();
+
+ // Only examine Loads, Stores and PHINodes.
+ if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
continue;
- Type *T = it->getType();
+ // Examine PHI nodes that are reduction variables.
+ if (PHINode *PN = dyn_cast<PHINode>(it))
+ if (!Legal->getReductionVars()->count(PN))
+ continue;
+ // Examine the stored values.
if (StoreInst *ST = dyn_cast<StoreInst>(it))
T = ST->getValueOperand()->getType();
- // PHINodes and pointers are difficult to analyze, but we catch all other
- // uses of the types in other instructions.
- if (isa<PHINode>(it) || T->isPointerTy() || T->isVoidTy())
+ // Ignore stored/loaded pointer types.
+ if (T->isPointerTy())
continue;
MaxWidth = std::max(MaxWidth, T->getScalarSizeInBits());
diff --git a/test/Transforms/LoopVectorize/ARM/width-detect.ll b/test/Transforms/LoopVectorize/ARM/width-detect.ll
new file mode 100644
index 0000000000..c0795b6a79
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+;CHECK:foo_F64
+;CHECK: <2 x double>
+;CHECK:ret
+define double @foo_F64(double* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %prod.01 = phi double [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
+ %2 = getelementptr inbounds double* %A, i64 %indvars.iv
+ %3 = load double* %2, align 8
+ %4 = fmul fast double %prod.01, %3
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %prod.0.lcssa = phi double [ 0.000000e+00, %0 ], [ %4, %.lr.ph ]
+ ret double %prod.0.lcssa
+}
+
+;CHECK:foo_I8
+;CHECK: xor <16 x i8>
+;CHECK:ret
+define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i8* %A, i64 %indvars.iv
+ %3 = load i8* %2, align 1
+ %4 = xor i8 %3, %red.01
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %red.0.lcssa = phi i8 [ 0, %0 ], [ %4, %.lr.ph ]
+ ret i8 %red.0.lcssa
+}
+
+