From 191bd64a39490fa75d35b9aaecdd57b00c7a8b5f Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 1 Sep 2010 01:45:53 +0000 Subject: Revert 112442 and 112440 until the compile time problems introduced by 112440 are resolved. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112692 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/IVUsers.h | 13 +-- lib/Analysis/IVUsers.cpp | 167 +++++++++++++-------------- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 156 ++++++++----------------- test/CodeGen/X86/lsr-reuse.ll | 2 +- 4 files changed, 134 insertions(+), 204 deletions(-) diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index 7aa34f1cff..578e6aba83 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -27,7 +27,6 @@ class Value; class IVUsers; class ScalarEvolution; class SCEV; -class SCEVAddRecExpr; class IVUsers; /// IVStrideUse - Keep track of one use of a strided induction variable. @@ -123,7 +122,7 @@ class IVUsers : public LoopPass { LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; - SmallPtrSet Processed; + SmallPtrSet Processed; /// IVUses - A list of all tracked IV uses of induction variable expressions /// we are interested in. @@ -135,16 +134,14 @@ class IVUsers : public LoopPass { virtual void releaseMemory(); - const SCEVAddRecExpr *findInterestingAddRec(const SCEV *S) const; - bool isInterestingUser(const Instruction *User) const; - public: static char ID; // Pass ID, replacement for typeid IVUsers(); - /// AddUsersIfInteresting - Inspect the def-use graph starting at the - /// specified Instruction and add IVUsers. - void AddUsersIfInteresting(Instruction *I); + /// AddUsersIfInteresting - Inspect the specified Instruction. If it is a + /// reducible SCEV, recursively add its users to the IVUsesByStride set and + /// return true. Otherwise, return false. + bool AddUsersIfInteresting(Instruction *I); IVStrideUse &AddUser(Instruction *User, Value *Operand); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index d9bdf5cbb0..fd514336be 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -35,123 +35,112 @@ Pass *llvm::createIVUsersPass() { return new IVUsers(); } -/// findInterestingAddRec - Test whether the given expression is interesting. -/// Return the addrec with the current loop which makes it interesting, or -/// null if it is not interesting. -const SCEVAddRecExpr *IVUsers::findInterestingAddRec(const SCEV *S) const { +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, + ScalarEvolution *SE) { // An addrec is interesting if it's affine or if it has an interesting start. if (const SCEVAddRecExpr *AR = dyn_cast(S)) { // Keep things simple. Don't touch loop-variant strides. if (AR->getLoop() == L) - return AR; - // We don't yet know how to do effective SCEV expansions for addrecs - // with interesting steps. - if (findInterestingAddRec(AR->getStepRecurrence(*SE))) - return 0; - // Otherwise recurse to see if the start value is interesting. - return findInterestingAddRec(AR->getStart()); + return AR->isAffine() || !L->contains(I); + // Otherwise recurse to see if the start value is interesting, and that + // the step value is not interesting, since we don't yet know how to + // do effective SCEV expansions for addrecs with interesting steps. + return isInteresting(AR->getStart(), I, L, SE) && + !isInteresting(AR->getStepRecurrence(*SE), I, L, SE); } // An add is interesting if exactly one of its operands is interesting. if (const SCEVAddExpr *Add = dyn_cast(S)) { + bool AnyInterestingYet = false; for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); OI != OE; ++OI) - if (const SCEVAddRecExpr *AR = findInterestingAddRec(*OI)) - return AR; - return 0; + if (isInteresting(*OI, I, L, SE)) { + if (AnyInterestingYet) + return false; + AnyInterestingYet = true; + } + return AnyInterestingYet; } // Nothing else is interesting here. - return 0; + return false; } -bool IVUsers::isInterestingUser(const Instruction *User) const { - // Void and FP expressions cannot be reduced. - if (!SE->isSCEVable(User->getType())) - return false; +/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; // Void and FP expressions cannot be reduced. // LSR is not APInt clean, do not touch integers bigger than 64-bits. - if (SE->getTypeSizeInBits(User->getType()) > 64) + if (SE->getTypeSizeInBits(I->getType()) > 64) return false; - // Don't descend into PHI nodes outside the current loop. - if (LI->getLoopFor(User->getParent()) != L && - isa(User)) - return false; + if (!Processed.insert(I)) + return true; // Instruction already handled. - // Otherwise, it may be interesting. - return true; -} + // Get the symbolic expression for this instruction. + const SCEV *ISE = SE->getSCEV(I); -/// AddUsersIfInteresting - Inspect the specified instruction. If it is a -/// reducible SCEV, recursively add its users to the IVUsesByStride set and -/// return true. Otherwise, return false. -void IVUsers::AddUsersIfInteresting(Instruction *I) { - // Stop if we've seen this before. - if (!Processed.insert(I)) - return; + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L, SE)) + return false; - // If this PHI node is not SCEVable, ignore it. - if (!SE->isSCEVable(I->getType())) - return; - - // If this PHI node is not an addrec for this loop, ignore it. - const SCEVAddRecExpr *Expr = findInterestingAddRec(SE->getSCEV(I)); - if (!Expr) - return; - - // Walk the def-use graph. - SmallVector, 16> Worklist; - Worklist.push_back(std::make_pair(I, Expr)); - do { - std::pair P = - Worklist.pop_back_val(); - Instruction *Op = P.first; - const SCEVAddRecExpr *OpAR = P.second; - - // Visit Op's users. - SmallPtrSet VisitedUsers; - for (Value::use_iterator UI = Op->use_begin(), E = Op->use_end(); - UI != E; ++UI) { - // Don't visit any individual user more than once. - Instruction *User = cast(*UI); - if (!VisitedUsers.insert(User)) - continue; - - // If it's an affine addrec (which we can pretty safely re-expand) inside - // the loop, or a potentially non-affine addrec outside the loop (which - // we can evaluate outside of the loop), follow it. - if (OpAR->isAffine() || !L->contains(User)) { - if (isInterestingUser(User)) { - const SCEV *UserExpr = SE->getSCEV(User); - - if (const SCEVAddRecExpr *AR = findInterestingAddRec(UserExpr)) { - // Interesting. Keep searching. - if (Processed.insert(User)) - Worklist.push_back(std::make_pair(User, AR)); - continue; - } - } + SmallPtrSet UniqueUsers; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + if (!UniqueUsers.insert(User)) + continue; + + // Do not infinitely recurse on PHI nodes. + if (isa(User) && Processed.count(User)) + continue; + + // Descend recursively, but not into PHI nodes outside the current loop. + // It's important to see the entire expression outside the loop to get + // choices that depend on addressing mode use right, although we won't + // consider references outside the loop in all cases. + // If User is already in Processed, we don't want to recurse into it again, + // but do want to record a second reference in the same instruction. + bool AddUserToIVUsers = false; + if (LI->getLoopFor(User->getParent()) != L) { + if (isa(User) || Processed.count(User) || + !AddUsersIfInteresting(User)) { + DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; } + } else if (Processed.count(User) || + !AddUsersIfInteresting(User)) { + DEBUG(dbgs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } - // Otherwise, this is the point where the def-use chain - // becomes uninteresting. Call it an IV User. - AddUser(User, Op); + if (AddUserToIVUsers) { + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Transform the expression into a normalized form. + ISE = TransformForPostIncUse(NormalizeAutodetect, + ISE, User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n'); } - } while (!Worklist.empty()); + } + return true; } IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUses.push_back(new IVStrideUse(this, User, Operand)); - IVStrideUse &NewUse = IVUses.back(); - - // Auto-detect and remember post-inc loops for this expression. - const SCEV *S = SE->getSCEV(Operand); - (void)TransformForPostIncUse(NormalizeAutodetect, - S, User, Operand, - NewUse.PostIncLoops, - *SE, *DT); - return NewUse; + return IVUses.back(); } IVUsers::IVUsers() @@ -176,7 +165,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) - AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(I); return false; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4fe93b9d41..e8dc5d3a64 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -113,7 +113,6 @@ class RegUseTracker { public: void CountRegister(const SCEV *Reg, size_t LUIdx); void DropRegister(const SCEV *Reg, size_t LUIdx); - void DropUse(size_t LUIdx, size_t NewLUIdx); void DropUse(size_t LUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -152,24 +151,6 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { RSD.UsedByIndices.reset(LUIdx); } -/// DropUse - Clear out reference by use LUIdx, and prepare for use NewLUIdx -/// to be swapped into LUIdx's position. -void -RegUseTracker::DropUse(size_t LUIdx, size_t NewLUIdx) { - // Remove the use index from every register's use list. - for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); - I != E; ++I) { - SmallBitVector &UsedByIndices = I->second.UsedByIndices; - UsedByIndices.resize(std::max(UsedByIndices.size(), NewLUIdx + 1)); - if (LUIdx < UsedByIndices.size()) { - UsedByIndices[LUIdx] = UsedByIndices[NewLUIdx]; - UsedByIndices.reset(NewLUIdx); - } else - UsedByIndices.reset(LUIdx); - } -} - -/// DropUse - Clear out reference by use LUIdx. void RegUseTracker::DropUse(size_t LUIdx) { // Remove the use index from every register's use list. @@ -1353,9 +1334,7 @@ class LSRInstance { UseMapDenseMapInfo> UseMapTy; UseMapTy UseMap; - bool reconcileNewOffset(LSRUse &LU, - int64_t NewMinOffset, int64_t NewMaxOffset, - bool HasBaseReg, + bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, const Type *AccessTy); std::pair getUse(const SCEV *&Expr, @@ -1364,8 +1343,7 @@ class LSRInstance { void DeleteUse(LSRUse &LU); - LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU, - int64_t &NewBaseOffs); + LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); public: void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); @@ -1866,13 +1844,11 @@ LSRInstance::OptimizeLoopTermCond() { /// at the given offset and other details. If so, update the use and /// return true. bool -LSRInstance::reconcileNewOffset(LSRUse &LU, - int64_t NewMinOffset, int64_t NewMaxOffset, - bool HasBaseReg, +LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, const Type *AccessTy) { - int64_t ResultMinOffset = LU.MinOffset; - int64_t ResultMaxOffset = LU.MaxOffset; - const Type *ResultAccessTy = AccessTy; + int64_t NewMinOffset = LU.MinOffset; + int64_t NewMaxOffset = LU.MaxOffset; + const Type *NewAccessTy = AccessTy; // Check for a mismatched kind. It's tempting to collapse mismatched kinds to // something conservative, however this can pessimize in the case that one of @@ -1880,27 +1856,29 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, if (LU.Kind != Kind) return false; // Conservatively assume HasBaseReg is true for now. - if (NewMinOffset < LU.MinOffset) { - if (!isAlwaysFoldable(LU.MaxOffset - NewMinOffset, 0, HasBaseReg, + if (NewOffset < LU.MinOffset) { + if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, Kind, AccessTy, TLI)) return false; - ResultMinOffset = NewMinOffset; - } else if (NewMaxOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(NewMaxOffset - LU.MinOffset, 0, HasBaseReg, + NewMinOffset = NewOffset; + } else if (NewOffset > LU.MaxOffset) { + if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, Kind, AccessTy, TLI)) return false; - ResultMaxOffset = NewMaxOffset; + NewMaxOffset = NewOffset; } // Check for a mismatched access type, and fall back conservatively as needed. // TODO: Be less conservative when the type is similar and can use the same // addressing modes. if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - ResultAccessTy = Type::getVoidTy(AccessTy->getContext()); + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); // Update the use. - LU.MinOffset = ResultMinOffset; - LU.MaxOffset = ResultMaxOffset; - LU.AccessTy = ResultAccessTy; + LU.MinOffset = NewMinOffset; + LU.MaxOffset = NewMaxOffset; + LU.AccessTy = NewAccessTy; + if (NewOffset != LU.Offsets.back()) + LU.Offsets.push_back(NewOffset); return true; } @@ -1925,12 +1903,9 @@ LSRInstance::getUse(const SCEV *&Expr, // A use already existed with this base. size_t LUIdx = P.first->second; LSRUse &LU = Uses[LUIdx]; - if (reconcileNewOffset(LU, Offset, Offset, - /*HasBaseReg=*/true, Kind, AccessTy)) { - LU.Offsets.push_back(Offset); + if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) // Reuse this use. return std::make_pair(LUIdx, Offset); - } } // Create a new use. @@ -1939,7 +1914,11 @@ LSRInstance::getUse(const SCEV *&Expr, Uses.push_back(LSRUse(Kind, AccessTy)); LSRUse &LU = Uses[LUIdx]; - LU.Offsets.push_back(Offset); + // We don't need to track redundant offsets, but we don't need to go out + // of our way here to avoid them. + if (LU.Offsets.empty() || Offset != LU.Offsets.back()) + LU.Offsets.push_back(Offset); + LU.MinOffset = Offset; LU.MaxOffset = Offset; return std::make_pair(LUIdx, Offset); @@ -1947,12 +1926,8 @@ LSRInstance::getUse(const SCEV *&Expr, /// DeleteUse - Delete the given use from the Uses list. void LSRInstance::DeleteUse(LSRUse &LU) { - if (&LU != &Uses.back()) { + if (&LU != &Uses.back()) std::swap(LU, Uses.back()); - RegUses.DropUse(&LU - Uses.begin(), Uses.size() - 1); - } else { - RegUses.DropUse(&LU - Uses.begin()); - } Uses.pop_back(); } @@ -1960,9 +1935,8 @@ void LSRInstance::DeleteUse(LSRUse &LU) { /// a formula that has the same registers as the given formula. LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, - const LSRUse &OrigLU, - int64_t &NewBaseOffs) { - // Search all uses for a formula similar to OrigF. This could be more clever. + const LSRUse &OrigLU) { + // Search all uses for the formula. This could be more clever. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; // Check whether this use is close enough to OrigLU, to see whether it's @@ -1985,15 +1959,8 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && F.AM.Scale == OrigF.AM.Scale) { - // Ok, all the registers and symbols matched. Check to see if the - // immediate looks nicer than our old one. - if (OrigF.AM.BaseOffs == INT64_MIN || - (F.AM.BaseOffs != INT64_MIN && - abs64(F.AM.BaseOffs) < abs64(OrigF.AM.BaseOffs))) { - // Looks good. Take it. - NewBaseOffs = F.AM.BaseOffs; + if (F.AM.BaseOffs == 0) return &LU; - } // This is the formula where all the registers and symbols matched; // there aren't going to be any others. Since we declined it, we // can skip the rest of the formulae and procede to the next LSRUse. @@ -2634,17 +2601,6 @@ struct WorkItem { WorkItem(size_t LI, int64_t I, const SCEV *R) : LUIdx(LI), Imm(I), OrigReg(R) {} - bool operator==(const WorkItem &that) const { - return LUIdx == that.LUIdx && Imm == that.Imm && OrigReg == that.OrigReg; - } - bool operator<(const WorkItem &that) const { - if (LUIdx != that.LUIdx) - return LUIdx < that.LUIdx; - if (Imm != that.Imm) - return Imm < that.Imm; - return OrigReg < that.OrigReg; - } - void print(raw_ostream &OS) const; void dump() const; }; @@ -2684,7 +2640,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // Now examine each set of registers with the same base value. Build up // a list of work to do and do the work in a separate step so that we're // not adding formulae and register counts while we're searching. - SmallSetVector WorkItems; + SmallVector WorkItems; + SmallSet, 32> UniqueItems; for (SmallVectorImpl::const_iterator I = Sequence.begin(), E = Sequence.end(); I != E; ++I) { const SCEV *Reg = *I; @@ -2727,10 +2684,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // Compute the difference between the two. int64_t Imm = (uint64_t)JImm - M->first; for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; - LUIdx = UsedByIndices.find_next(LUIdx)) { + LUIdx = UsedByIndices.find_next(LUIdx)) // Make a memo of this use, offset, and register tuple. - WorkItems.insert(WorkItem(LUIdx, Imm, OrigReg)); - } + if (UniqueItems.insert(std::make_pair(LUIdx, Imm))) + WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); } } } @@ -2738,6 +2695,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Map.clear(); Sequence.clear(); UsedByIndicesMap.clear(); + UniqueItems.clear(); // Now iterate through the worklist and add new formulae. for (SmallVectorImpl::const_iterator I = WorkItems.begin(), @@ -3034,12 +2992,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) { - int64_t NewBaseOffs; - if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU, - NewBaseOffs)) { - if (reconcileNewOffset(*LUThatHas, - F.AM.BaseOffs + LU.MinOffset - NewBaseOffs, - F.AM.BaseOffs + LU.MaxOffset - NewBaseOffs, + if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { + if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs, /*HasBaseReg=*/false, LU.Kind, LU.AccessTy)) { DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); @@ -3047,30 +3001,6 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; - // Update the relocs to reference the new use. - // Do this first so that MinOffset and MaxOffset are updated - // before we begin to determine which formulae to delete. - for (SmallVectorImpl::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.AM.BaseOffs - NewBaseOffs; - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - LUThatHas->Offsets.push_back(Fixup.Offset); - if (Fixup.Offset > LUThatHas->MaxOffset) - LUThatHas->MaxOffset = Fixup.Offset; - if (Fixup.Offset < LUThatHas->MinOffset) - LUThatHas->MinOffset = Fixup.Offset; - } - // DeleteUse will do a swap+pop_back, so if this fixup is - // now pointing to the last LSRUse, update it to point to the - // position it'll be swapped to. - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } - // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { @@ -3089,6 +3019,20 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (Any) LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + // Update the relocs to reference the new use. + for (SmallVectorImpl::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.AM.BaseOffs; + DEBUG(dbgs() << "New fixup has offset " + << Fixup.Offset << '\n'); + } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; + } + // Delete the old use. DeleteUse(LU); --LUIdx; diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index 98afc3e215..d2ff58be10 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -452,8 +452,8 @@ bb5: ; preds = %bb3, %entry ; CHECK-NEXT: addss %xmm{{.*}}, %xmm{{.*}} ; CHECK-NEXT: mulss (%r{{[^,]*}}), %xmm{{.*}} ; CHECK-NEXT: movss %xmm{{.*}}, (%r{{[^,]*}}) -; CHECK-NEXT: decq %r{{.*}} ; CHECK-NEXT: addq $4, %r{{.*}} +; CHECK-NEXT: decq %r{{.*}} ; CHECK-NEXT: addq $4, %r{{.*}} ; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} ; CHECK-NEXT: BB10_2: -- cgit v1.2.3