summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Zolotukhin <mzolotukhin@apple.com>2014-06-06 15:34:24 +0000
committerMichael Zolotukhin <mzolotukhin@apple.com>2014-06-06 15:34:24 +0000
commitafbe82d590b14b304102013aba14c5f608b5b5ab (patch)
treef0ba92b8edf09db6e8edb63c1b2179557f559628
parent6c9eb10784646b7025011e3ebf4a34b4241c79c2 (diff)
downloadllvm-afbe82d590b14b304102013aba14c5f608b5b5ab.tar.gz
llvm-afbe82d590b14b304102013aba14c5f608b5b5ab.tar.bz2
llvm-afbe82d590b14b304102013aba14c5f608b5b5ab.tar.xz
[SLP] Enable vectorization of GEP expressions.
The use cases look like the following: x->a = y->a + 10 x->b = y->b + 12 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210342 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp87
-rw-r--r--test/Transforms/SLPVectorizer/X86/gep.ll41
2 files changed, 128 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d862167095..f18202be16 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -941,6 +941,51 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
return;
}
+ case Instruction::GetElementPtr: {
+ // We don't combine GEPs with complicated (nested) indexing.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
+ DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ // We can't combine several GEPs into one vector if they operate on
+ // different types.
+ Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
+ if (Ty0 != CurTy) {
+ DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ // We don't combine GEPs with non-constant indexes.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ auto Op = cast<Instruction>(VL[j])->getOperand(1);
+ if (!isa<ConstantInt>(Op)) {
+ DEBUG(
+ dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
+ for (unsigned i = 0, e = 2; i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth + 1);
+ }
+ return;
+ }
case Instruction::Store: {
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
@@ -1146,6 +1191,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return VecCost - ScalarCost;
}
+ case Instruction::GetElementPtr: {
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_UniformConstantValue;
+
+ int ScalarCost =
+ VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
+ int VecCost =
+ TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
+
+ return VecCost - ScalarCost;
+ }
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
int ScalarLdCost = VecTy->getNumElements() *
@@ -1674,6 +1733,34 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = S;
return propagateMetadata(S, E->Scalars);
}
+ case Instruction::GetElementPtr: {
+ setInsertPointAfterBundle(E->Scalars);
+
+ ValueList Op0VL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
+
+ Value *Op0 = vectorizeTree(Op0VL);
+
+ std::vector<Value *> OpVecs;
+ for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
+ ++j) {
+ ValueList OpVL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
+
+ Value *OpVec = vectorizeTree(OpVL);
+ OpVecs.push_back(OpVec);
+ }
+
+ Value *V = Builder.CreateGEP(Op0, OpVecs);
+ E->VectorizedValue = V;
+
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
+ return V;
+ }
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);
diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll
new file mode 100644
index 0000000000..d6eedad9dd
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S |Filecheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test if SLP can handle GEP expressions.
+; The test perform the following action:
+; x->first = y->first + 16
+; x->second = y->second + 16
+
+; CHECK-LABEL: foo1
+; CHECK: <2 x i32*>
+define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
+ %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32* %2, i64 16
+ %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+ store i32* %3, i32** %4, align 8
+ %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+ %6 = load i32** %5, align 8
+ %7 = getelementptr inbounds i32* %6, i64 16
+ %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+ store i32* %7, i32** %8, align 8
+ ret void
+}
+
+; Test that we don't vectorize GEP expressions if indexes are not constants.
+; We can't produce an efficient code in that case.
+; CHECK-LABEL: foo2
+; CHECK-NOT: <2 x i32*>
+define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
+ %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32* %2, i32 %i
+ %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
+ store i32* %3, i32** %4, align 8
+ %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
+ %6 = load i32** %5, align 8
+ %7 = getelementptr inbounds i32* %6, i32 %i
+ %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
+ store i32* %7, i32** %8, align 8
+ ret void
+}