From 7208b0763c9fca2c6364c642d28fd014a496cc37 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 1 Nov 2013 14:09:50 +0000 Subject: LoopVectorize: Look for consecutive acces in GEPs with trailing zero indices If we have a pointer to a single-element struct we can still build wide loads and stores to it (if there is no padding). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193860 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 49 ++++++++++++++++++++------ test/Transforms/LoopVectorize/struct_access.ll | 42 ++++++++++++++++++++++ 2 files changed, 80 insertions(+), 11 deletions(-) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f18707c7f1..ee94173eb1 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1069,6 +1069,31 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, return Builder.CreateAdd(Val, Cv, "induction"); } +/// \brief Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +static unsigned getGEPInductionOperand(DataLayout *DL, + const GetElementPtrInst *Gep) { + unsigned LastOperand = Gep->getNumOperands() - 1; + unsigned GEPAllocSize = DL->getTypeAllocSize( + cast(Gep->getType()->getScalarType())->getElementType()); + + // Walk backwards and try to peel off zeros. + while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { + // Find the type we're currently indexing into. + gep_type_iterator GEPTI = gep_type_begin(Gep); + std::advance(GEPTI, LastOperand - 1); + + // If it's a type with the same allocation size as the result of the GEP we + // can peel off the zero index. + if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize) + break; + --LastOperand; + } + + return LastOperand; +} + int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); // Make sure that the pointer does not point to structs. @@ -1090,8 +1115,6 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { return 0; unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = Gep->getOperand(NumOperands - 1); - Value *GpPtr = Gep->getPointerOperand(); // If this GEP value is a consecutive pointer induction variable and all of // the indices are constant then we know it is consecutive. We can @@ -1115,14 +1138,18 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { return -1; } - // Check that all of the gep indices are uniform except for the last. - for (unsigned i = 0; i < NumOperands - 1; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + unsigned InductionOperand = getGEPInductionOperand(DL, Gep); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0; i != NumOperands; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) return 0; - // We can emit wide load/stores only if the last index is the induction - // variable. - const SCEV *Last = SE->getSCEV(LastIndex); + // We can emit wide load/stores only if the last non-zero index is the + // induction variable. + const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand)); if (const SCEVAddRecExpr *AR = dyn_cast(Last)) { const SCEV *Step = AR->getStepRecurrence(*SE); @@ -1219,7 +1246,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); - unsigned LastOperand = NumOperands - 1; + unsigned InductionOperand = getGEPInductionOperand(DL, Gep); // Create the new GEP with the new induction variable. GetElementPtrInst *Gep2 = cast(Gep->clone()); @@ -1228,9 +1255,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Instruction *GepOperandInst = dyn_cast(GepOperand); // Update last index or loop invariant instruction anchored in loop. - if (i == LastOperand || + if (i == InductionOperand || (GepOperandInst && OrigLoop->contains(GepOperandInst))) { - assert((i == LastOperand || + assert((i == InductionOperand || SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) && "Must be last index or loop invariant"); diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll index 0cfaabe177..75beae82f1 100644 --- a/test/Transforms/LoopVectorize/struct_access.ll +++ b/test/Transforms/LoopVectorize/struct_access.ll @@ -44,3 +44,45 @@ for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] ret i32 %sum.0.lcssa } + +%struct.lit = type { i32 } + +; Verify that we still vectorize the access if the struct has the same size as +; the loaded element. +; struct lit { +; int x; +; }; +; +; +; int bar(struct lit *A, int n) { +; +; int sum = 0; +; for (int i = 0; i < n; ++i) +; sum += A[i].x; +; +; return sum; +; } + +;CHECK-LABEL: @bar( +;CHECK: load <4 x i32> +;CHECK: ret +define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %x = getelementptr inbounds %struct.lit* %A, i64 %indvars.iv, i32 0 + %0 = load i32* %x, align 4 + %add = add nsw i32 %0, %sum.05 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} -- cgit v1.2.3