diff options
Diffstat (limited to 'lib/Transforms/IPO/GlobalOpt.cpp')
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 141 |
1 files changed, 72 insertions, 69 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 0378231e08..408ac3c91b 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -812,31 +812,41 @@ static void ConstantPropUsersOf(Value *V) { /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, - BitCastInst *BCI, + const Type *AllocTy, Value* NElems, TargetData* TD) { - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV - << " CALL = " << *CI << " BCI = " << *BCI << '\n'); + DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); const Type *IntPtrTy = TD->getIntPtrType(GV->getContext()); + // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have + // returned NULL and we would not be here). + BitCastInst *BCI = NULL; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) + if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)))) + break; + ConstantInt *NElements = cast<ConstantInt>(NElems); if (NElements->getZExtValue() != 1) { // If we have an array allocation, transform it to a single element // allocation to make the code below simpler. - Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), - NElements->getZExtValue()); - Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); - Instruction* NewMI = cast<Instruction>(NewM); + Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue()); + unsigned TypeSize = TD->getTypeAllocSize(NewTy); + if (const StructType *ST = dyn_cast<StructType>(NewTy)) + TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); + Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy, + ConstantInt::get(IntPtrTy, TypeSize)); Value* Indices[2]; Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); - Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, - NewMI->getName()+".el0", CI); - BCI->replaceAllUsesWith(NewGEP); - BCI->eraseFromParent(); + Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2, + NewCI->getName()+".el0", CI); + Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI); + if (BCI) BCI->replaceAllUsesWith(NewGEP); + CI->replaceAllUsesWith(Cast); + if (BCI) BCI->eraseFromParent(); CI->eraseFromParent(); - BCI = cast<BitCastInst>(NewMI); - CI = extractMallocCallFromBitCast(NewMI); + BCI = dyn_cast<BitCastInst>(NewCI); + CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI); } // Create the new global variable. The contents of the malloc'd memory is @@ -850,8 +860,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, GV, GV->isThreadLocal()); - // Anything that used the malloc now uses the global directly. - BCI->replaceAllUsesWith(NewGV); + // Anything that used the malloc or its bitcast now uses the global directly. + if (BCI) BCI->replaceAllUsesWith(NewGV); + CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI)); Constant *RepValue = NewGV; if (NewGV->getType() != GV->getType()->getElementType()) @@ -919,9 +930,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, GV->getParent()->getGlobalList().insert(GV, InitBool); - // Now the GV is dead, nuke it and the malloc. + // Now the GV is dead, nuke it and the malloc (both CI and BCI). GV->eraseFromParent(); - BCI->eraseFromParent(); + if (BCI) BCI->eraseFromParent(); CI->eraseFromParent(); // To further other optimizations, loop over all users of NewGV and try to @@ -1255,12 +1266,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, - CallInst *CI, BitCastInst* BCI, - Value* NElems, - TargetData *TD) { - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI - << " BITCAST = " << *BCI << '\n'); +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, + Value* NElems, TargetData *TD) { + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast<StructType>(MAT); @@ -1268,8 +1276,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, // it into GV). If there are other uses, change them to be uses of // the global to simplify later code. This also deletes the store // into GV. - ReplaceUsesOfMallocWithGlobal(BCI, GV); - + ReplaceUsesOfMallocWithGlobal(CI, GV); + // Okay, at this point, there are no users of the malloc. Insert N // new mallocs at the same place as CI, and N globals. std::vector<Value*> FieldGlobals; @@ -1287,11 +1295,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(CI->getContext()), - FieldTy, NElems, - BCI->getName() + ".f" + Twine(FieldNo)); + unsigned TypeSize = TD->getTypeAllocSize(FieldTy); + if (const StructType* ST = dyn_cast<StructType>(FieldTy)) + TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); + const Type* IntPtrTy = TD->getIntPtrType(CI->getContext()); + Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, + ConstantInt::get(IntPtrTy, TypeSize), + NElems, + CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); - new StoreInst(NMI, NGV, BCI); + new StoreInst(NMI, NGV, CI); } // The tricky aspect of this transformation is handling the case when malloc @@ -1308,18 +1321,18 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, // } Value *RunningOr = 0; for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull"); + Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); if (!RunningOr) RunningOr = Cond; // First seteq else - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); } // Split the basic block at the old malloc. - BasicBlock *OrigBB = BCI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); + BasicBlock *OrigBB = CI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont"); // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. @@ -1356,9 +1369,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, } BranchInst::Create(ContBB, NullPtrBlock); - - // CI and BCI are no longer needed, remove them. - BCI->eraseFromParent(); + + // CI is no longer needed, remove it. CI->eraseFromParent(); /// InsertedScalarizedLoads - As we process loads, if we can't immediately @@ -1444,13 +1456,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, - BitCastInst *BCI, + const Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { - // If we can't figure out the type being malloced, then we can't optimize. - const Type *AllocTy = getMallocAllocatedType(CI); - assert(AllocTy); - // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1471,7 +1479,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // for. { SmallPtrSet<PHINode*, 8> PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) return false; } @@ -1479,16 +1487,15 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. - Value *NElems = getMallocArraySize(CI, TD); // We cannot optimize the malloc if we cannot determine malloc array size. - if (NElems) { + if (Value *NElems = getMallocArraySize(CI, TD)) { if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. if (TD && NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, TD); + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD); return true; } @@ -1506,28 +1513,28 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This the structure has an unreasonable number of fields, leave it // alone. if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - Value *NumElements = - ConstantInt::get(Type::getInt32Ty(CI->getContext()), - AT->getNumElements()); - Value *NewMI = CallInst::CreateMalloc(CI, - TD->getIntPtrType(CI->getContext()), - AllocSTy, NumElements, - BCI->getName()); - Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); - BCI->replaceAllUsesWith(Cast); - BCI->eraseFromParent(); + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); - BCI = cast<BitCastInst>(NewMI); - CI = extractMallocCallFromBitCast(NewMI); + CI = dyn_cast<BitCastInst>(Malloc) ? + extractMallocCallFromBitCast(Malloc): + cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, TD); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD), TD); return true; } } @@ -1559,14 +1566,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - if (getMallocAllocatedType(CI)) { - BitCastInst* BCI = NULL; - for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); - UI != E; ) - BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)); - if (BCI && TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD)) - return true; - } + const Type* MallocType = getMallocAllocatedType(CI); + if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, + GVI, TD)) + return true; } } |