summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp36
-rw-r--r--test/Transforms/SLPVectorizer/X86/diamond.ll4
-rw-r--r--test/Transforms/SLPVectorizer/X86/external_user.ll2
-rw-r--r--test/Transforms/SLPVectorizer/X86/rgb_phi.ll2
4 files changed, 29 insertions, 15 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2cf843da5d..12316b4e59 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1320,6 +1320,9 @@ void BoUpSLP::vectorizeTree() {
it != e; ++it) {
Value *Scalar = it->Scalar;
llvm::User *User = it->User;
+
+ // Skip users that we already RAUW. This happens when one instruction
+ // has multiple uses of the same value.
if (std::find(Scalar->use_begin(), Scalar->use_end(), User) ==
Scalar->use_end())
continue;
@@ -1337,8 +1340,18 @@ void BoUpSLP::vectorizeTree() {
Instruction *Loc = 0;
if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
Loc = PN->getParent()->getFirstInsertionPt();
- } else if (Instruction *Iv = dyn_cast<Instruction>(Vec)){
- Loc = ++((BasicBlock::iterator)*Iv);
+ } else if (isa<Instruction>(Vec)){
+ if (PHINode *PH = dyn_cast<PHINode>(User)) {
+ for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
+ if (PH->getIncomingValue(i) == Scalar) {
+ Loc = PH->getIncomingBlock(i)->getTerminator();
+ break;
+ }
+ }
+ assert(Loc && "Unable to find incoming value for the PHI");
+ } else {
+ Loc = cast<Instruction>(User);
+ }
} else {
Loc = F->getEntryBlock().begin();
}
@@ -1433,24 +1446,25 @@ void BoUpSLP::optimizeGatherSequence() {
BasicBlock *BB = *I;
// For all instructions in the function:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- InsertElementInst *Insert = dyn_cast<InsertElementInst>(it);
- if (!Insert || !GatherSeq.count(Insert))
+ Instruction *In = it;
+ if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
+ !GatherSeq.count(In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
ve = Visited.end(); v != ve; ++v) {
- if (Insert->isIdenticalTo(*v) &&
- DT->dominates((*v)->getParent(), Insert->getParent())) {
- Insert->replaceAllUsesWith(*v);
- ToRemove.push_back(Insert);
- Insert = 0;
+ if (In->isIdenticalTo(*v) &&
+ DT->dominates((*v)->getParent(), In->getParent())) {
+ In->replaceAllUsesWith(*v);
+ ToRemove.push_back(In);
+ In = 0;
break;
}
}
- if (Insert)
- Visited.insert(Insert);
+ if (In)
+ Visited.insert(In);
}
}
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll
index 2a237eaffd..099f7cfb97 100644
--- a/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -51,8 +51,8 @@ entry:
; CHECK: @extr_user
; CHECK: load <4 x i32>
-; CHECK-NEXT: extractelement <4 x i32>
; CHECK: store <4 x i32>
+; CHECK: extractelement <4 x i32>
; CHECK-NEXT: ret
define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
entry:
@@ -81,8 +81,8 @@ entry:
; In this example we have an external user that is not the first element in the vector.
; CHECK: @extr_user1
; CHECK: load <4 x i32>
-; CHECK-NEXT: extractelement <4 x i32>
; CHECK: store <4 x i32>
+; CHECK: extractelement <4 x i32>
; CHECK-NEXT: ret
define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
entry:
diff --git a/test/Transforms/SLPVectorizer/X86/external_user.ll b/test/Transforms/SLPVectorizer/X86/external_user.ll
index 7f032b5b19..22f0e64be0 100644
--- a/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -26,9 +26,9 @@ target triple = "x86_64-apple-macosx10.8.0"
;CHECK: phi <2 x double>
;CHECK: fadd <2 x double>
;CHECK: fmul <2 x double>
-;CHECK: extractelement <2 x double>
;CHECK: br
;CHECK: store <2 x double>
+;CHECK: extractelement <2 x double>
;CHECK: ret double
define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) {
diff --git a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
index 6a9243560e..9f5a6213c2 100644
--- a/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -23,10 +23,10 @@ target triple = "i386-apple-macosx10.9.0"
;CHECK: fmul <3 x float>
;CHECK: fadd <3 x float>
; At the moment we don't sink extractelements.
+;CHECK: br
;CHECK: extractelement
;CHECK: extractelement
;CHECK: extractelement
-;CHECK: br
;CHECK: ret
define float @foo(float* nocapture readonly %A) {