diff options
author | Abdoulaye Walsimou Gaye <awg@embtoolkit.org> | 2012-11-24 21:08:10 +0100 |
---|---|---|
committer | Abdoulaye Walsimou Gaye <awg@embtoolkit.org> | 2012-11-24 21:08:10 +0100 |
commit | 41ef0ec9e25a34ffb598ce0d5749b3bfa3b0b929 (patch) | |
tree | 1eac7e3d956942eeba5fed895cd9e6514ee905d5 | |
parent | feadf1055966dff37453b11a22c3e22907a651e6 (diff) | |
parent | 12c349d44181f8083ee9120ddd3b2664c0a1fe8f (diff) | |
download | llvm-41ef0ec9e25a34ffb598ce0d5749b3bfa3b0b929.tar.gz llvm-41ef0ec9e25a34ffb598ce0d5749b3bfa3b0b929.tar.bz2 llvm-41ef0ec9e25a34ffb598ce0d5749b3bfa3b0b929.tar.xz |
Merge branch 'release-3.2' into embtk-support-release-3.2
29 files changed, 479 insertions, 351 deletions
diff --git a/autoconf/configure.ac b/autoconf/configure.ac index f1842a6d8a..7715531a33 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -685,9 +685,9 @@ AC_ARG_ENABLE(backtraces, [Enable embedding backtraces on crash (default is YES)]),, enableval=default) case "$enableval" in - yes) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;; - no) AC_SUBST(ENABLE_TIMESTAMPS,[0]) ;; - default) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;; + yes) AC_SUBST(ENABLE_BACKTRACES,[1]) ;; + no) AC_SUBST(ENABLE_BACKTRACES,[0]) ;; + default) AC_SUBST(ENABLE_BACKTRACES,[1]) ;; *) AC_MSG_ERROR([Invalid setting for --enable-backtraces. Use "yes" or "no"]) ;; esac AC_DEFINE_UNQUOTED([ENABLE_BACKTRACES],$ENABLE_BACKTRACES, @@ -704,6 +704,7 @@ ENABLE_PIC ENABLE_SHARED ENABLE_EMBED_STDCXX ENABLE_TIMESTAMPS +ENABLE_BACKTRACES TARGETS_TO_BUILD LLVM_ENUM_TARGETS LLVM_ENUM_ASM_PRINTERS @@ -5390,11 +5391,11 @@ else fi case "$enableval" in - yes) ENABLE_TIMESTAMPS=1 + yes) ENABLE_BACKTRACES=1 ;; - no) ENABLE_TIMESTAMPS=0 + no) ENABLE_BACKTRACES=0 ;; - default) ENABLE_TIMESTAMPS=1 + default) ENABLE_BACKTRACES=1 ;; *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-backtraces. Use \"yes\" or \"no\"" >&5 echo "$as_me: error: Invalid setting for --enable-backtraces. Use \"yes\" or \"no\"" >&2;} @@ -10315,7 +10316,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10318 "configure" +#line 10317 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -21971,6 +21972,7 @@ ENABLE_PIC!$ENABLE_PIC$ac_delim ENABLE_SHARED!$ENABLE_SHARED$ac_delim ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim +ENABLE_BACKTRACES!$ENABLE_BACKTRACES$ac_delim TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim @@ -22062,7 +22064,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 94; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index 5ace200803..a9c2d743ff 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -318,21 +318,26 @@ public: FunctionIndex = ~0U }; private: - /// AttrList - The attributes that we are managing. This can be null to - /// represent the empty attributes list. + /// @brief The attributes that we are managing. This can be null to represent + /// the empty attributes list. AttributeListImpl *AttrList; + + /// @brief The attributes for the specified index are returned. Attributes + /// for the result are denoted with Idx = 0. + Attributes getAttributes(unsigned Idx) const; + + explicit AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {} public: AttrListPtr() : AttrList(0) {} - AttrListPtr(const AttrListPtr &P); + AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {} const AttrListPtr &operator=(const AttrListPtr &RHS); - ~AttrListPtr(); //===--------------------------------------------------------------------===// // Attribute List Construction and Mutation //===--------------------------------------------------------------------===// /// get - Return a Attributes list with the specified parameters in it. - static AttrListPtr get(ArrayRef<AttributeWithIndex> Attrs); + static AttrListPtr get(LLVMContext &C, ArrayRef<AttributeWithIndex> Attrs); /// addAttr - Add the specified attribute at the specified index to this /// attribute list. Since attribute lists are immutable, this @@ -419,13 +424,6 @@ public: const AttributeWithIndex &getSlot(unsigned Slot) const; void dump() const; - -private: - explicit AttrListPtr(AttributeListImpl *L); - - /// getAttributes - The attributes for the specified index are - /// returned. Attributes for the result are denoted with Idx = 0. - Attributes getAttributes(unsigned Idx) const; }; } // End llvm namespace diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index ac803c5783..b24291ffb3 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -779,7 +779,9 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, 0, Name); + GlobalValue::ExternalWeakLinkage, 0, Name, + 0, GlobalVariable::NotThreadLocal, + PTy->getAddressSpace()); ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; @@ -2792,7 +2794,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Attributes::get(RetType->getContext(), FuncAttrs))); - AttrListPtr PAL = AttrListPtr::get(Attrs); + AttrListPtr PAL = AttrListPtr::get(Context, Attrs); if (PAL.getParamAttributes(1).hasAttribute(Attributes::StructRet) && !RetType->isVoidTy()) @@ -3349,7 +3351,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { FnAttrs))); // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Attrs); + AttrListPtr PAL = AttrListPtr::get(Context, Attrs); InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args); II->setCallingConv(CC); @@ -3751,7 +3753,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, FnAttrs))); // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Attrs); + AttrListPtr PAL = AttrListPtr::get(Context, Attrs); CallInst *CI = CallInst::Create(Callee, Args); CI->setTailCall(isTail); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 279343c48c..4ec9da12dd 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -487,7 +487,7 @@ bool BitcodeReader::ParseAttributeBlock() { Attributes::get(Context, B))); } - MAttributes.push_back(AttrListPtr::get(Attrs)); + MAttributes.push_back(AttrListPtr::get(Context, Attrs)); Attrs.clear(); break; } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index c3bf2d234c..8585cbb30d 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -59,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { - assert(I->start == Def && "Cannot insert def, already live"); - assert(I->valno->def == Def && "Inconsistent existing value def"); + assert(I->valno->def == I->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, I->start); + if (Def != I->start) + I->start = I->valno->def = Def; return I->valno; } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 0f3efd8345..5c909903f9 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -518,7 +518,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, Out << "Attrs.push_back(PAWI);"; nl(Out); } - Out << name << "_PAL = AttrListPtr::get(Attrs);"; + Out << name << "_PAL = AttrListPtr::get(mod->getContext(), Attrs);"; nl(Out); out(); nl(Out); Out << '}'; nl(Out); diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 8a0274b5ff..be48b2063f 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -611,7 +611,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttrListPtr::get(AttributesVec)); + NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec)); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); @@ -731,11 +731,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); - cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec)); + cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(), + AttributesVec)); } else { New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); - cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec)); + cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(), + AttributesVec)); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index fc22548db7..4cfd0b235a 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -280,7 +280,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (FnAttrs.hasAttributes()) AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, FnAttrs)); - PAL = AttrListPtr::get(AttributesVec); + PAL = AttrListPtr::get(Fn.getContext(), AttributesVec); } Instruction *New; @@ -806,7 +806,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewPAL = AttrListPtr::get(AttributesVec); + AttrListPtr NewPAL = AttrListPtr::get(F->getContext(), AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -874,7 +874,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec); + AttrListPtr NewCallPAL = AttrListPtr::get(F->getContext(), AttributesVec); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5ad6f9111c..48f270429e 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1177,7 +1177,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(), + attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -1355,7 +1356,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs); + const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8cb4a59cba..e223a049f0 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2356,8 +2356,20 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Try not to increase register pressure. BO0->hasOneUse() && BO1->hasOneUse()) { // Determine Y and Z in the form icmp (X+Y), (X+Z). - Value *Y = (A == C || A == D) ? B : A; - Value *Z = (C == A || C == B) ? D : C; + Value *Y, *Z; + if (A == C) { + Y = B; + Z = D; + } else if (A == D) { + Y = B; + Z = C; + } else if (B == C) { + Y = A; + Z = D; + } else if (B == D) { + Y = A; + Z = C; + } return new ICmpInst(Pred, Y, Z); } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4ab5b6e4a0..4d106fc188 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -164,7 +164,8 @@ static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) { return TD.getPreferredAlignment(GV); if (PointerType *PT = dyn_cast<PointerType>(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); + if (PT->getElementType()->isSized()) + return TD.getABITypeAlignment(PT->getElementType()); return 0; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 09687d8909..7a4079784b 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -339,36 +339,6 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { } } -/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C -/// is repeated Weight times. -static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C, - APInt Weight) { - // For addition the result can be efficiently computed as the product of the - // constant and the weight. - if (Opcode == Instruction::Add) - return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight)); - - // The weight might be huge, so compute by repeated squaring to ensure that - // compile time is proportional to the logarithm of the weight. - Constant *Result = 0; - Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc. - // Visit the bits in Weight. - while (Weight != 0) { - // If the current bit in Weight is non-zero do Result = Result op Power. - if (Weight[0]) - Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power; - // Move on to the next bit if any more are non-zero. - Weight = Weight.lshr(1); - if (Weight.isMinValue()) - break; - // Square the power. - Power = ConstantExpr::get(Opcode, Power, Power); - } - - assert(Result && "Only positive weights supported!"); - return Result; -} - typedef std::pair<Value*, APInt> RepeatedValue; /// LinearizeExprTree - Given an associative binary expression, return the leaf @@ -382,9 +352,7 @@ typedef std::pair<Value*, APInt> RepeatedValue; /// op /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times /// -/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and -/// they are all non-constant except possibly for the last one, which if it is -/// constant will have weight one (Ops[N].second === 1). +/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct. /// /// This routine may modify the function, in which case it returns 'true'. The /// changes it makes may well be destructive, changing the value computed by 'I' @@ -604,7 +572,6 @@ static bool LinearizeExprTree(BinaryOperator *I, // The leaves, repeated according to their weights, represent the linearized // form of the expression. - Constant *Cst = 0; // Accumulate constants here. for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) { Value *V = LeafOrder[i]; LeafMap::iterator It = Leaves.find(V); @@ -618,31 +585,14 @@ static bool LinearizeExprTree(BinaryOperator *I, continue; // Ensure the leaf is only output once. It->second = 0; - // Glob all constants together into Cst. - if (Constant *C = dyn_cast<Constant>(V)) { - C = EvaluateRepeatedConstant(Opcode, C, Weight); - Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C; - continue; - } - // Add non-constant Ops.push_back(std::make_pair(V, Weight)); } - // Add any constants back into Ops, all globbed together and reduced to having - // weight 1 for the convenience of users. - Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); - if (Cst && Cst != Identity) { - // If combining multiple constants resulted in the absorber then the entire - // expression must evaluate to the absorber. - if (Cst == Absorber) - Ops.clear(); - Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1))); - } - // For nilpotent operations or addition there may be no operands, for example // because the expression was "X xor X" or consisted of 2^Bitwidth additions: // in both cases the weight reduces to 0 causing the value to be skipped. if (Ops.empty()) { + Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); assert(Identity && "Associative operation without identity!"); Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1))); } @@ -656,8 +606,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { assert(Ops.size() > 1 && "Single values should be used directly!"); - // Since our optimizations never increase the number of operations, the new - // expression can always be written by reusing the existing binary operators + // Since our optimizations should never increase the number of operations, the + // new expression can usually be written reusing the existing binary operators // from the original expression tree, without creating any new instructions, // though the rewritten expression may have a completely different topology. // We take care to not change anything if the new expression will be the same @@ -671,6 +621,20 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, unsigned Opcode = I->getOpcode(); BinaryOperator *Op = I; + /// NotRewritable - The operands being written will be the leaves of the new + /// expression and must not be used as inner nodes (via NodesToRewrite) by + /// mistake. Inner nodes are always reassociable, and usually leaves are not + /// (if they were they would have been incorporated into the expression and so + /// would not be leaves), so most of the time there is no danger of this. But + /// in rare cases a leaf may become reassociable if an optimization kills uses + /// of it, or it may momentarily become reassociable during rewriting (below) + /// due it being removed as an operand of one of its uses. Ensure that misuse + /// of leaf nodes as inner nodes cannot occur by remembering all of the future + /// leaves and refusing to reuse any of them as inner nodes. + SmallPtrSet<Value*, 8> NotRewritable; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + NotRewritable.insert(Ops[i].Op); + // ExpressionChanged - Non-null if the rewritten expression differs from the // original in some non-trivial way, requiring the clearing of optional flags. // Flags are cleared from the operator in ExpressionChanged up to I inclusive. @@ -703,12 +667,14 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // the old operands with the new ones. DEBUG(dbgs() << "RA: " << *Op << '\n'); if (NewLHS != OldLHS) { - if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode)) + BinaryOperator *BO = isReassociableOp(OldLHS, Opcode); + if (BO && !NotRewritable.count(BO)) NodesToRewrite.push_back(BO); Op->setOperand(0, NewLHS); } if (NewRHS != OldRHS) { - if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode)) + BinaryOperator *BO = isReassociableOp(OldRHS, Opcode); + if (BO && !NotRewritable.count(BO)) NodesToRewrite.push_back(BO); Op->setOperand(1, NewRHS); } @@ -732,7 +698,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, Op->swapOperands(); } else { // Overwrite with the new right-hand side. - if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode)) + BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode); + if (BO && !NotRewritable.count(BO)) NodesToRewrite.push_back(BO); Op->setOperand(1, NewRHS); ExpressionChanged = Op; @@ -745,7 +712,8 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // Now deal with the left-hand side. If this is already an operation node // from the original expression then just rewrite the rest of the expression // into it. - if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) { + BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode); + if (BO && !NotRewritable.count(BO)) { Op = BO; continue; } @@ -1446,9 +1414,26 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. - if (Ops.size() == 1) return Ops[0].Op; - + Constant *Cst = 0; unsigned Opcode = I->getOpcode(); + while (!Ops.empty() && isa<Constant>(Ops.back().Op)) { + Constant *C = cast<Constant>(Ops.pop_back_val().Op); + Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C; + } + // If there was nothing but constants then we are done. + if (Ops.empty()) + return Cst; + + // Put the combined constant back at the end of the operand list, except if + // there is no point. For example, an add of 0 gets dropped here, while a + // multiplication by zero turns the whole expression into zero. + if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) { + if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType())) + return Cst; + Ops.push_back(ValueEntry(0, Cst)); + } + + if (Ops.size() == 1) return Ops[0].Op; // Handle destructive annihilation due to identities between elements in the // argument list here. diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d95c855ce7..ccc2f7a77b 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -568,6 +568,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. + // NOTE! This may appear superficially to be something we could ignore + // entirely, but that is not so! There may be PHI-node uses where some + // instructions are dead but not others. We can't completely ignore the + // PHI node, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -1382,11 +1386,7 @@ class SROA : public FunctionPass { /// \brief A collection of instructions to delete. /// We try to batch deletions to simplify code and make things a bit more /// efficient. - SmallVector<Instruction *, 8> DeadInsts; - - /// \brief A set to prevent repeatedly marking an instruction split into many - /// uses as dead. Only used to guard insertion into DeadInsts. - SmallPtrSet<Instruction *, 4> DeadSplitInsts; + SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts; /// \brief Post-promotion worklist. /// @@ -1573,7 +1573,7 @@ private: do { LoadInst *LI = Loads.pop_back_val(); LI->replaceAllUsesWith(NewPN); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } while (!Loads.empty()); // Inject loads into all of the pred blocks. @@ -1717,7 +1717,7 @@ private: DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI->replaceAllUsesWith(V); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } } }; @@ -2134,8 +2134,13 @@ static bool isVectorPromotionViable(const DataLayout &TD, } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (!isa<LoadInst>(I->U->getUser()) && - !isa<StoreInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LI->isVolatile()) + return false; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + if (SI->isVolatile()) + return false; + } else { return false; } } @@ -2241,18 +2246,23 @@ static bool isIntegerWideningViable(const DataLayout &TD, static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { + DEBUG(dbgs() << " start: " << *V << "\n"); IntegerType *IntTy = cast<IntegerType>(V->getType()); assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element extends past full value"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!"); - if (Ty != IntTy) + if (Ty != IntTy) { V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); + DEBUG(dbgs() << " trunced: " << *V << "\n"); + } return V; } @@ -2262,20 +2272,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, IntegerType *Ty = cast<IntegerType>(V->getType()); assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - if (Ty != IntTy) + DEBUG(dbgs() << " start: " << *V << "\n"); + if (Ty != IntTy) { V = IRB.CreateZExt(V, IntTy, Name + ".ext"); + DEBUG(dbgs() << " extended: " << *V << "\n"); + } assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateShl(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); + DEBUG(dbgs() << " masked: " << *Old << "\n"); V = IRB.CreateOr(Old, V, Name + ".insert"); + DEBUG(dbgs() << " inserted: " << *V << "\n"); } return V; } @@ -2442,30 +2459,21 @@ private: void deleteIfTriviallyDead(Value *V) { Instruction *I = cast<Instruction>(V); if (isInstructionTriviallyDead(I)) - Pass.DeadInsts.push_back(I); + Pass.DeadInsts.insert(I); } - bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *Result; + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); if (LI.getType() == VecTy->getElementType() || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - getIndex(IRB, BeginOffset), getName(".extract")); - } else { - Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset), + getName(".extract")); } - if (Result->getType() != LI.getType()) - Result = convertValue(TD, IRB, Result, LI.getType()); - LI.replaceAllUsesWith(Result); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *Result << "\n"); - return true; + return V; } - bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2473,12 +2481,10 @@ private: V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); - LI.replaceAllUsesWith(V); - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return true; + if (Offset > 0 || EndOffset < NewAllocaEndOffset) + V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, + getName(".extract")); + return V; } bool visitLoadInst(LoadInst &LI) { @@ -2488,7 +2494,46 @@ private: IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(LI.getType())) { + bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of the original allocation it's behavior is undefined. Rather + // than trying to transform it, just replace it with undef. + // FIXME: We should do something more clever for functions being + // instrumented by asan. + // FIXME: Eventually, once ASan and friends can flush out bugs here, this + // should be transformed to a load of null making it unreachable. + uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); + if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { + LI.replaceAllUsesWith(UndefValue::get(LI.getType())); + Pass.DeadInsts.insert(&LI); + deleteIfTriviallyDead(OldOp); + DEBUG(dbgs() << " to: undef!!\n"); + return true; + } + + Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); + bool IsPtrAdjusted = false; + Value *V; + if (VecTy) { + V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + } else if (IntTy && LI.getType()->isIntegerTy()) { + V = rewriteIntegerLoad(IRB, LI); + } else if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, NewAllocaTy, LI.getType())) { + V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), getName(".load")); + } else { + Type *LTy = TargetTy->getPointerTo(); + V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), + getPartitionTypeAlign(TargetTy), + LI.isVolatile(), getName(".load")); + IsPtrAdjusted = true; + } + V = convertValue(TD, IRB, V, TargetTy); + + if (IsSplitIntLoad) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -2498,21 +2543,8 @@ private: assert(LI.getType()->getIntegerBitWidth() == TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide loads can be split and recomposed"); - IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8); - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NewAllocaTy, NarrowTy); - Value *V; // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); - if (IsConvertable) - V = convertValue(TD, IRB, - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")), - NarrowTy); - else - V = IRB.CreateAlignedLoad( - getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy), getName(".load")); // Create a placeholder value with the same type as LI to use as the // basis for the new value. This allows us to replace the uses of LI with // the computed value, and then replace the placeholder with LI, leaving @@ -2524,44 +2556,18 @@ private: LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; - if (Pass.DeadSplitInsts.insert(&LI)) - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return IsConvertable; - } - - if (VecTy) - return rewriteVectorizedLoadInst(IRB, LI, OldOp); - if (IntTy && LI.getType()->isIntegerTy()) - return rewriteIntegerLoad(IRB, LI); - - if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, NewAllocaTy, LI.getType())) { - Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); - Value *NewV = convertValue(TD, IRB, NewLI, LI.getType()); - LI.replaceAllUsesWith(NewV); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *NewLI << "\n"); - return !LI.isVolatile(); + } else { + LI.replaceAllUsesWith(V); } - assert(!IntTy && "Invalid load found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - LI.getPointerOperand()->getType()); - LI.setOperand(0, NewPtr); - LI.setAlignment(getPartitionTypeAlign(LI.getType())); - DEBUG(dbgs() << " to: " << LI << "\n"); - + Pass.DeadInsts.insert(&LI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !LI.isVolatile(); + DEBUG(dbgs() << " to: " << *V << "\n"); + return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI, - Value *OldOp) { - Value *V = SI.getValueOperand(); + bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + StoreInst &SI, Value *OldOp) { if (V->getType() == ElementTy || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) @@ -2574,17 +2580,16 @@ private: V = convertValue(TD, IRB, V, VecTy); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { + bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); - Value *V = SI.getValueOperand(); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); @@ -2596,7 +2601,7 @@ private: } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2608,74 +2613,53 @@ private: assert(OldOp == OldPtr); IRBuilder<> IRB(&SI); - if (VecTy) - return rewriteVectorizedStoreInst(IRB, SI, OldOp); - Type *ValueTy = SI.getValueOperand()->getType(); + Value *V = SI.getValueOperand(); + + // Strip all inbounds GEPs and pointer casts to try to dig out any root + // alloca that should be re-examined after promoting this alloca. + if (V->getType()->isPointerTy()) + if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) + Pass.PostPromotionWorklist.insert(AI); uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(ValueTy)) { + if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); - assert(ValueTy->isIntegerTy() && + assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); - assert(ValueTy->getIntegerBitWidth() == - TD.getTypeStoreSizeInBits(ValueTy) && + assert(V->getType()->getIntegerBitWidth() == + TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(ValueTy->getIntegerBitWidth() == + assert(V->getType()->getIntegerBitWidth() == TD.getTypeSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); - Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy, - BeginOffset, getName(".extract")); - StoreInst *NewSI; - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NarrowTy, NewAllocaTy); - if (IsConvertable) - NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy), - &NewAI, NewAI.getAlignment()); - else - NewSI = IRB.CreateAlignedStore( - V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy)); - (void)NewSI; - if (Pass.DeadSplitInsts.insert(&SI)) - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return IsConvertable; + V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, + getName(".extract")); } - if (IntTy && ValueTy->isIntegerTy()) - return rewriteIntegerStore(IRB, SI); - - // Strip all inbounds GEPs and pointer casts to try to dig out any root - // alloca that should be re-examined after promoting this alloca. - if (ValueTy->isPointerTy()) - if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand() - ->stripInBoundsOffsets())) - Pass.PostPromotionWorklist.insert(AI); + if (VecTy) + return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + if (IntTy && V->getType()->isIntegerTy()) + return rewriteIntegerStore(IRB, V, SI); + StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, ValueTy, NewAllocaTy)) { - Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy); - StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(), - SI.isVolatile()); - (void)NewSI; - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return !SI.isVolatile(); + canConvertValue(TD, V->getType(), NewAllocaTy)) { + V = convertValue(TD, IRB, V, NewAllocaTy); + NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + SI.isVolatile()); + } else { + Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo()); + NewSI = IRB.CreateAlignedStore(V, NewPtr, + getPartitionTypeAlign(V->getType()), + SI.isVolatile()); } - - assert(!IntTy && "Invalid store found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - SI.getPointerOperand()->getType()); - SI.setOperand(1, NewPtr); - SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType())); - DEBUG(dbgs() << " to: " << SI << "\n"); - + (void)NewSI; + Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !SI.isVolatile(); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } bool visitMemSetInst(MemSetInst &II) { @@ -2695,8 +2679,7 @@ private: } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); Type *AllocaTy = NewAI.getAllocatedType(); Type *ScalarTy = AllocaTy->getScalarType(); @@ -2852,8 +2835,7 @@ private: return false; } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && EndOffset == NewAllocaEndOffset; @@ -2963,8 +2945,7 @@ private: assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); ConstantInt *Size = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), @@ -3533,7 +3514,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { DI != DE; ++DI) { Changed = true; (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); - DeadInsts.push_back(*DI); + DeadInsts.insert(*DI); } for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), DE = P.dead_op_end(); @@ -3544,7 +3525,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { if (Instruction *OldI = dyn_cast<Instruction>(OldV)) if (isInstructionTriviallyDead(OldI)) { Changed = true; - DeadInsts.push_back(OldI); + DeadInsts.insert(OldI); } } @@ -3565,7 +3546,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) { /// We also record the alloca instructions deleted here so that they aren't /// subsequently handed to mem2reg to promote. void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { - DeadSplitInsts.clear(); while (!DeadInsts.empty()) { Instruction *I = DeadInsts.pop_back_val(); DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); @@ -3577,7 +3557,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { // Zero out the operand and see if it becomes trivially dead. *OI = 0; if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); + DeadInsts.insert(U); } if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index fa2faa2dad..74b2ee10e0 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -47,7 +47,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), + Constant *StrLen = M->getOrInsertFunction("strlen", + AttrListPtr::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -74,7 +76,9 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + Constant *StrNLen = M->getOrInsertFunction("strnlen", + AttrListPtr::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -102,7 +106,9 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI), + Constant *StrChr = M->getOrInsertFunction("strchr", + AttrListPtr::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -127,7 +133,9 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), + Value *StrNCmp = M->getOrInsertFunction("strncmp", + AttrListPtr::get(M->getContext(), + AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -155,7 +163,8 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrCpy = M->getOrInsertFunction(Name, + AttrListPtr::get(M->getContext(), AWI), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -178,7 +187,9 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrNCpy = M->getOrInsertFunction(Name, + AttrListPtr::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -203,7 +214,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, Attributes::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttrListPtr::get(AWI), + AttrListPtr::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -231,7 +242,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), + Value *MemChr = M->getOrInsertFunction("memchr", + AttrListPtr::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -261,7 +273,8 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, ArrayRef<Attributes::AttrVal>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), + Value *MemCmp = M->getOrInsertFunction("memcmp", + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -338,7 +351,8 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI), + Value *PutS = M->getOrInsertFunction("puts", + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -362,7 +376,8 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Attributes::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI), + F = M->getOrInsertFunction("fputc", + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -396,7 +411,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FPutsName, + AttrListPtr::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -429,7 +445,8 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FWriteName, + AttrListPtr::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index 5a552c34e1..f1268e6ef8 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -355,62 +355,8 @@ uint64_t AttributesImpl::getStackAlignment() const { // AttributeListImpl Definition //===----------------------------------------------------------------------===// -namespace llvm { - class AttributeListImpl; -} - -static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists; - -namespace llvm { -static ManagedStatic<sys::SmartMutex<true> > ALMutex; - -class AttributeListImpl : public FoldingSetNode { - sys::cas_flag RefCount; - - // AttributesList is uniqued, these should not be publicly available. - void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION; - AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION; - ~AttributeListImpl(); // Private implementation -public: - SmallVector<AttributeWithIndex, 4> Attrs; - - AttributeListImpl(ArrayRef<AttributeWithIndex> attrs) - : Attrs(attrs.begin(), attrs.end()) { - RefCount = 0; - } - - void AddRef() { - sys::SmartScopedLock<true> Lock(*ALMutex); - ++RefCount; - } - void DropRef() { - sys::SmartScopedLock<true> Lock(*ALMutex); - if (!AttributesLists.isConstructed()) - return; - sys::cas_flag new_val = --RefCount; - if (new_val == 0) - delete this; - } - - void Profile(FoldingSetNodeID &ID) const { - Profile(ID, Attrs); - } - static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeWithIndex> Attrs){ - for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - ID.AddInteger(Attrs[i].Attrs.Raw()); - ID.AddInteger(Attrs[i].Index); - } - } -}; - -} // end llvm namespace - -AttributeListImpl::~AttributeListImpl() { - // NOTE: Lock must be acquired by caller. - AttributesLists->RemoveNode(this); -} - -AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) { +AttrListPtr AttrListPtr::get(LLVMContext &C, + ArrayRef<AttributeWithIndex> Attrs) { // If there are no attributes then return a null AttributesList pointer. if (Attrs.empty()) return AttrListPtr(); @@ -425,51 +371,36 @@ AttrListPtr AttrListPtr::get(ArrayRef<AttributeWithIndex> Attrs) { #endif // Otherwise, build a key to look up the existing attributes. + LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; AttributeListImpl::Profile(ID, Attrs); - void *InsertPos; - - sys::SmartScopedLock<true> Lock(*ALMutex); - AttributeListImpl *PAL = - AttributesLists->FindNodeOrInsertPos(ID, InsertPos); + void *InsertPoint; + AttributeListImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID, + InsertPoint); // If we didn't find any existing attributes of the same shape then // create a new one and insert it. - if (!PAL) { - PAL = new AttributeListImpl(Attrs); - AttributesLists->InsertNode(PAL, InsertPos); + if (!PA) { + PA = new AttributeListImpl(Attrs); + pImpl->AttrsLists.InsertNode(PA, InsertPoint); } // Return the AttributesList that we found or created. - return AttrListPtr(PAL); + return AttrListPtr(PA); } //===----------------------------------------------------------------------===// // AttrListPtr Method Implementations //===----------------------------------------------------------------------===// -AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) { - if (LI) LI->AddRef(); -} - -AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) { - if (AttrList) AttrList->AddRef(); -} - const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) { - sys::SmartScopedLock<true> Lock(*ALMutex); if (AttrList == RHS.AttrList) return *this; - if (AttrList) AttrList->DropRef(); + AttrList = RHS.AttrList; - if (AttrList) AttrList->AddRef(); return *this; } -AttrListPtr::~AttrListPtr() { - if (AttrList) AttrList->DropRef(); -} - /// getNumSlots - Return the number of slots used in this attribute list. /// This is the number of arguments that have an attribute set on them /// (including the function itself). @@ -507,6 +438,7 @@ bool AttrListPtr::hasAttrSomewhere(Attributes::AttrVal Attr) const { for (unsigned i = 0, e = Attrs.size(); i != e; ++i) if (Attrs[i].Attrs.hasAttribute(Attr)) return true; + return false; } @@ -562,7 +494,7 @@ AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx, OldAttrList.begin()+i, OldAttrList.end()); } - return get(NewAttrList); + return get(C, NewAttrList); } AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx, @@ -601,7 +533,7 @@ AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx, NewAttrList.insert(NewAttrList.end(), OldAttrList.begin()+i, OldAttrList.end()); - return get(NewAttrList); + return get(C, NewAttrList); } void AttrListPtr::dump() const { diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h index b4a0f615f3..5c107e1ebb 100644 --- a/lib/VMCore/AttributesImpl.h +++ b/lib/VMCore/AttributesImpl.h @@ -15,12 +15,11 @@ #ifndef LLVM_ATTRIBUTESIMPL_H #define LLVM_ATTRIBUTESIMPL_H +#include "llvm/Attributes.h" #include "llvm/ADT/FoldingSet.h" namespace llvm { -class Attributes; - class AttributesImpl : public FoldingSetNode { uint64_t Bits; // FIXME: We will be expanding this. public: @@ -46,6 +45,27 @@ public: } }; +class AttributeListImpl : public FoldingSetNode { + // AttributesList is uniqued, these should not be publicly available. + void operator=(const AttributeListImpl &) LLVM_DELETED_FUNCTION; + AttributeListImpl(const AttributeListImpl &) LLVM_DELETED_FUNCTION; +public: + SmallVector<AttributeWithIndex, 4> Attrs; + + AttributeListImpl(ArrayRef<AttributeWithIndex> attrs) + : Attrs(attrs.begin(), attrs.end()) {} + + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, Attrs); + } + static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeWithIndex> Attrs){ + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { + ID.AddInteger(Attrs[i].Attrs.Raw()); + ID.AddInteger(Attrs[i].Index); + } + } +}; + } // end llvm namespace #endif diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index 74247bdde1..d35d2844b8 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -97,11 +97,18 @@ LLVMContextImpl::~LLVMContextImpl() { // Destroy attributes. for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(), - E = AttrsSet.end(); I != E;) { + E = AttrsSet.end(); I != E; ) { FoldingSetIterator<AttributesImpl> Elem = I++; delete &*Elem; } + // Destroy attribute lists. + for (FoldingSetIterator<AttributeListImpl> I = AttrsLists.begin(), + E = AttrsLists.end(); I != E; ) { + FoldingSetIterator<AttributeListImpl> Elem = I++; + delete &*Elem; + } + // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. SmallVector<MDNode*, 8> MDNodes; diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index ee31814c05..90cf424a3c 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -256,7 +256,8 @@ public: FPMapTy FPConstants; FoldingSet<AttributesImpl> AttrsSet; - + FoldingSet<AttributeListImpl> AttrsLists; + StringMap<Value*> MDStringCache; FoldingSet<MDNode> MDNodeSet; diff --git a/test/Assembler/global-addrspace-forwardref.ll b/test/Assembler/global-addrspace-forwardref.ll new file mode 100644 index 0000000000..f0f094a224 --- /dev/null +++ b/test/Assembler/global-addrspace-forwardref.ll @@ -0,0 +1,8 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; Make sure the address space of forward decls is preserved + +; CHECK: @a2 = global i8 addrspace(1)* @a +; CHECK: @a = addrspace(1) global i8 0 +@a2 = global i8 addrspace(1)* @a +@a = addrspace(1) global i8 0 diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll index e6eb9efd8c..d201ebdc85 100644 --- a/test/CodeGen/X86/inline-asm.ll +++ b/test/CodeGen/X86/inline-asm.ll @@ -52,3 +52,10 @@ entry: %0 = call { i32, i32, i32, i32, i32 } asm sideeffect "", "=&r,=&r,=&r,=&r,=&q,r,~{ecx},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %h) nounwind ret void } + +; Mix normal and EC defs of the same register. +define i32 @pr14376() nounwind noinline { +entry: + %asm = tail call i32 asm sideeffect "", "={ax},i,~{eax},~{flags},~{rax}"(i64 61) nounwind + ret i32 %asm +} diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg index 7bcb9ae82c..fc29f651aa 100644 --- a/test/ExecutionEngine/MCJIT/lit.local.cfg +++ b/test/ExecutionEngine/MCJIT/lit.local.cfg @@ -19,3 +19,6 @@ if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips', 'PowerPC']: if root.host_os in ['Darwin']: config.unsupported = True + +if 'powerpc' in root.target_triple and not 'powerpc64' in root.target_triple: + config.unsupported = True diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll index 50e03479f6..68a671cec8 100644 --- a/test/Transforms/InstCombine/alloca.ll +++ b/test/Transforms/InstCombine/alloca.ll @@ -94,3 +94,19 @@ entry: tail call void @f(i32* %b) ret void } + +; PR14371 +%opaque_type = type opaque +%real_type = type { { i32, i32* } } + +@opaque_global = external constant %opaque_type, align 4 + +define void @test7() { +entry: + %0 = alloca %real_type, align 4 + %1 = bitcast %real_type* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll index eaff87d695..8e064a4f2f 100644 --- a/test/Transforms/InstCombine/icmp.ll +++ b/test/Transforms/InstCombine/icmp.ll @@ -659,3 +659,21 @@ define i1 @test64(i8 %a, i32 %b) nounwind { ; CHECK-NEXT: %c = icmp eq i8 %1, %a ; CHECK-NEXT: ret i1 %c } + +define i1 @test65(i64 %A, i64 %B) { + %s1 = add i64 %A, %B + %s2 = add i64 %A, %B + %cmp = icmp eq i64 %s1, %s2 +; CHECK: @test65 +; CHECK-NEXT: ret i1 true + ret i1 %cmp +} + +define i1 @test66(i64 %A, i64 %B) { + %s1 = add i64 %A, %B + %s2 = add i64 %B, %A + %cmp = icmp eq i64 %s1, %s2 +; CHECK: @test66 +; CHECK-NEXT: ret i1 true + ret i1 %cmp +} diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll index ce586e15fb..e29b5dc9c0 100644 --- a/test/Transforms/Reassociate/crash.ll +++ b/test/Transforms/Reassociate/crash.ll @@ -144,3 +144,31 @@ define i32 @sozefx_(i32 %x, i32 %y) { %t6 = add i32 %t4, %t5 ret i32 %t6 } + +define i32 @bar(i32 %arg, i32 %arg1, i32 %arg2) { + %tmp1 = mul i32 %arg1, 2 + %tmp2 = mul i32 %tmp1, 3 + %tmp3 = mul i32 %arg2, 2 + %tmp4 = add i32 %tmp1, 1 ; dead code + %ret = add i32 %tmp2, %tmp3 + ret i32 %ret +} + +; PR14060 +define i8 @hang(i8 %p, i8 %p0, i8 %p1, i8 %p2, i8 %p3, i8 %p4, i8 %p5, i8 %p6, i8 %p7, i8 %p8, i8 %p9) { + %tmp = zext i1 false to i8 + %tmp16 = or i8 %tmp, 1 + %tmp22 = or i8 %p7, %p0 + %tmp23 = or i8 %tmp16, %tmp22 + %tmp28 = or i8 %p9, %p1 + %tmp31 = or i8 %tmp23, %p2 + %tmp32 = or i8 %tmp31, %tmp28 + %tmp38 = or i8 %p8, %p3 + %tmp39 = or i8 %tmp16, %tmp38 + %tmp43 = or i8 %tmp39, %p4 + %tmp44 = or i8 %tmp43, 1 + %tmp47 = or i8 %tmp32, %p5 + %tmp50 = or i8 %tmp47, %p6 + %tmp51 = or i8 %tmp44, %tmp50 + ret i8 %tmp51 +} diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index 110950f76a..b363eefb3f 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1100,12 +1100,12 @@ entry: %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1 store float %phi.real, float* %real store float %phi.imag, float* %imag + ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32 ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32 ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64 ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32 ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295 ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]] - ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32 ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64 ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296 ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]] diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll index d95e48f303..921016a9c2 100644 --- a/test/Transforms/SROA/phi-and-select.ll +++ b/test/Transforms/SROA/phi-and-select.ll @@ -390,3 +390,38 @@ if.then: %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0 br label %for.cond } + +define i64 @PR14132(i1 %flag) { +; CHECK: @PR14132 +; Here we form a PHI-node by promoting the pointer alloca first, and then in +; order to promote the other two allocas, we speculate the load of the +; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8 +; alloca, which is completely bogus. However, we were asserting on trying to +; rewrite it. Now it is replaced with undef. Eventually we may replace it with +; unrechable and even the CFG will go away here. +entry: + %a = alloca i64 + %b = alloca i8 + %ptr = alloca i64* +; CHECK-NOT: alloca + + %ptr.cast = bitcast i64** %ptr to i8** + store i64 0, i64* %a + store i8 1, i8* %b + store i64* %a, i64** %ptr + br i1 %flag, label %if.then, label %if.end + +if.then: + store i8* %b, i8** %ptr.cast + br label %if.end + +if.end: + %tmp = load i64** %ptr + %result = load i64* %tmp +; CHECK-NOT: store +; CHECK-NOT: load +; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ] + + ret i64 %result +; CHECK-NEXT: ret i64 %[[result]] +} diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll index 02e084bf11..ea28f5d1a6 100644 --- a/test/Transforms/SROA/vector-promotion.ll +++ b/test/Transforms/SROA/vector-promotion.ll @@ -220,3 +220,48 @@ entry: ret i32 %load ; CHECK: ret i32 } + +define <2 x i8> @PR14349.1(i32 %x) { +; CEHCK: @PR14349.1 +; The first testcase for broken SROA rewriting of split integer loads and +; stores due to smaller vector loads and stores. This particular test ensures +; that we can rewrite a split store of an integer to a store of a vector. +entry: + %a = alloca i32 +; CHECK-NOT: alloca + + store i32 %x, i32* %a +; CHECK-NOT: store + + %cast = bitcast i32* %a to <2 x i8>* + %vec = load <2 x i8>* %cast +; CHECK-NOT: load + + ret <2 x i8> %vec +; CHECK: %[[trunc:.*]] = trunc i32 %x to i16 +; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8> +; CHECK: ret <2 x i8> %[[cast]] +} + +define i32 @PR14349.2(<2 x i8> %x) { +; CEHCK: @PR14349.2 +; The first testcase for broken SROA rewriting of split integer loads and +; stores due to smaller vector loads and stores. This particular test ensures +; that we can rewrite a split load of an integer to a load of a vector. +entry: + %a = alloca i32 +; CHECK-NOT: alloca + + %cast = bitcast i32* %a to <2 x i8>* + store <2 x i8> %x, <2 x i8>* %cast +; CHECK-NOT: store + + %int = load i32* %a +; CHECK-NOT: load + + ret i32 %int +; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16 +; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32 +; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]] +; CHECK: ret i32 %[[insert]] +} diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index e1910784a6..fe55242930 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -621,7 +621,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) { OS << " }\n"; OS << " }\n"; - OS << " return AttrListPtr::get(ArrayRef<AttributeWithIndex>(AWI, " + OS << " return AttrListPtr::get(C, ArrayRef<AttributeWithIndex>(AWI, " "NumAttrs));\n"; OS << "}\n"; OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n"; |