summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp35
-rw-r--r--test/Transforms/LoopVectorize/global_alias.ll7
-rw-r--r--test/Transforms/LoopVectorize/induction.ll38
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll2
4 files changed, 76 insertions, 6 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index ee94173eb1..7f77784d28 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2272,8 +2272,41 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
(RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
-
+
fixLCSSAPHIs();
+
+ // Perform simple cse.
+ SmallPtrSet<Instruction*, 16> Visited;
+ SmallVector<Instruction*, 16> ToRemove;
+ for (BasicBlock::iterator I = LoopVectorBody->begin(),
+ E = LoopVectorBody->end(); I != E; ++I) {
+ Instruction *In = I;
+
+ if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
+ !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
+ continue;
+
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
+ ve = Visited.end(); v != ve; ++v) {
+ if (In->isIdenticalTo(*v)) {
+ In->replaceAllUsesWith(*v);
+ ToRemove.push_back(In);
+ In = 0;
+ break;
+ }
+ }
+ if (In)
+ Visited.insert(In);
+
+ }
+ // Erase all of the instructions that we RAUWed.
+ for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
+ ve = ToRemove.end(); v != ve; ++v) {
+ assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
+ (*v)->eraseFromParent();
+ }
}
void InnerLoopVectorizer::fixLCSSAPHIs() {
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index 4fd4c989de..0118fb4741 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -336,9 +336,8 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias07(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
; CHECK: ret
-
define i32 @noAlias07(i32 %a) #0 {
entry:
%a.addr = alloca i32, align 4
@@ -552,7 +551,7 @@ for.end: ; preds = %for.cond
; return Bar.A[N][a];
; }
; CHECK-LABEL: define i32 @noAlias11(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
; CHECK: ret
define i32 @noAlias11(i32 %a) #0 {
@@ -612,7 +611,7 @@ for.end: ; preds = %for.cond
; return Bar.A[N][a];
; }
; CHECK-LABEL: define i32 @noAlias12(
-; CHECK: sub nsw <4 x i32>
+; CHECK: store <4 x i32>
; CHECK: ret
define i32 @noAlias12(i32 %a) #0 {
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index d4cc50e986..2471c52ac2 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -28,3 +28,41 @@ for.end:
ret void
}
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+
+; Make sure we remove unneeded vectorization of induction variables.
+; In order for instcombine to cleanup the vectorized induction variables that we
+; create in the loop vectorizer we need to perform some form of redundancy
+; elimination to get rid of multiple uses.
+
+; IND-LABEL: scalar_use
+
+; IND: br label %vector.body
+; IND: vector.body:
+; Vectorized induction variable.
+; IND-NOT: insertelement <2 x i64>
+; IND-NOT: shufflevector <2 x i64>
+; IND: br {{.*}}, label %vector.body
+
+define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %ind.sum = add i64 %iv, %offset
+ %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+ %l1 = load float* %arr.idx, align 4
+ %ind.sum2 = add i64 %iv, %offset2
+ %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+ %l2 = load float* %arr.idx2, align 4
+ %m = fmul fast float %b, %l2
+ %ad = fadd fast float %l1, %m
+ store float %ad, float* %arr.idx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 6141c39462..9c8201ab78 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0"
@array = common global [1024 x i32] zeroinitializer, align 16
;CHECK-LABEL: @array_at_plus_one(
-;CHECK: trunc i64
;CHECK: add i64 %index, 12
+;CHECK: trunc i64
;CHECK: ret i32
define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0