summaryrefslogtreecommitdiff
path: root/lib/Transforms/IPO/GlobalOpt.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO/GlobalOpt.cpp')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp141
1 files changed, 72 insertions, 69 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 0378231e08..408ac3c91b 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -812,31 +812,41 @@ static void ConstantPropUsersOf(Value *V) {
/// malloc into a global, and any loads of GV as uses of the new global.
static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
- BitCastInst *BCI,
+ const Type *AllocTy,
Value* NElems,
TargetData* TD) {
- DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
- << " CALL = " << *CI << " BCI = " << *BCI << '\n');
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
+ // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
+ // returned NULL and we would not be here).
+ BitCastInst *BCI = NULL;
+ for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
+ if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+ break;
+
ConstantInt *NElements = cast<ConstantInt>(NElems);
if (NElements->getZExtValue() != 1) {
// If we have an array allocation, transform it to a single element
// allocation to make the code below simpler.
- Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
- NElements->getZExtValue());
- Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
- Instruction* NewMI = cast<Instruction>(NewM);
+ Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
+ unsigned TypeSize = TD->getTypeAllocSize(NewTy);
+ if (const StructType *ST = dyn_cast<StructType>(NewTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
+ ConstantInt::get(IntPtrTy, TypeSize));
Value* Indices[2];
Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
- Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
- NewMI->getName()+".el0", CI);
- BCI->replaceAllUsesWith(NewGEP);
- BCI->eraseFromParent();
+ Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
+ NewCI->getName()+".el0", CI);
+ Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
+ if (BCI) BCI->replaceAllUsesWith(NewGEP);
+ CI->replaceAllUsesWith(Cast);
+ if (BCI) BCI->eraseFromParent();
CI->eraseFromParent();
- BCI = cast<BitCastInst>(NewMI);
- CI = extractMallocCallFromBitCast(NewMI);
+ BCI = dyn_cast<BitCastInst>(NewCI);
+ CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
}
// Create the new global variable. The contents of the malloc'd memory is
@@ -850,8 +860,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
GV,
GV->isThreadLocal());
- // Anything that used the malloc now uses the global directly.
- BCI->replaceAllUsesWith(NewGV);
+ // Anything that used the malloc or its bitcast now uses the global directly.
+ if (BCI) BCI->replaceAllUsesWith(NewGV);
+ CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
@@ -919,9 +930,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
GV->getParent()->getGlobalList().insert(GV, InitBool);
- // Now the GV is dead, nuke it and the malloc.
+ // Now the GV is dead, nuke it and the malloc (both CI and BCI).
GV->eraseFromParent();
- BCI->eraseFromParent();
+ if (BCI) BCI->eraseFromParent();
CI->eraseFromParent();
// To further other optimizations, loop over all users of NewGV and try to
@@ -1255,12 +1266,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
- CallInst *CI, BitCastInst* BCI,
- Value* NElems,
- TargetData *TD) {
- DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI
- << " BITCAST = " << *BCI << '\n');
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+ Value* NElems, TargetData *TD) {
+ DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
const Type* MAT = getMallocAllocatedType(CI);
const StructType *STy = cast<StructType>(MAT);
@@ -1268,8 +1276,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
// it into GV). If there are other uses, change them to be uses of
// the global to simplify later code. This also deletes the store
// into GV.
- ReplaceUsesOfMallocWithGlobal(BCI, GV);
-
+ ReplaceUsesOfMallocWithGlobal(CI, GV);
+
// Okay, at this point, there are no users of the malloc. Insert N
// new mallocs at the same place as CI, and N globals.
std::vector<Value*> FieldGlobals;
@@ -1287,11 +1295,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
GV->isThreadLocal());
FieldGlobals.push_back(NGV);
- Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(CI->getContext()),
- FieldTy, NElems,
- BCI->getName() + ".f" + Twine(FieldNo));
+ unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+ if (const StructType* ST = dyn_cast<StructType>(FieldTy))
+ TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+ const Type* IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+ ConstantInt::get(IntPtrTy, TypeSize),
+ NElems,
+ CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
- new StoreInst(NMI, NGV, BCI);
+ new StoreInst(NMI, NGV, CI);
}
// The tricky aspect of this transformation is handling the case when malloc
@@ -1308,18 +1321,18 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
// }
Value *RunningOr = 0;
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
- Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
- Constant::getNullValue(FieldMallocs[i]->getType()),
- "isnull");
+ Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+ Constant::getNullValue(FieldMallocs[i]->getType()),
+ "isnull");
if (!RunningOr)
RunningOr = Cond; // First seteq
else
- RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
}
// Split the basic block at the old malloc.
- BasicBlock *OrigBB = BCI->getParent();
- BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
+ BasicBlock *OrigBB = CI->getParent();
+ BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
@@ -1356,9 +1369,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
}
BranchInst::Create(ContBB, NullPtrBlock);
-
- // CI and BCI are no longer needed, remove them.
- BCI->eraseFromParent();
+
+ // CI is no longer needed, remove it.
CI->eraseFromParent();
/// InsertedScalarizedLoads - As we process loads, if we can't immediately
@@ -1444,13 +1456,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
/// cast of malloc.
static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
- BitCastInst *BCI,
+ const Type *AllocTy,
Module::global_iterator &GVI,
TargetData *TD) {
- // If we can't figure out the type being malloced, then we can't optimize.
- const Type *AllocTy = getMallocAllocatedType(CI);
- assert(AllocTy);
-
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1471,7 +1479,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// for.
{
SmallPtrSet<PHINode*, 8> PHIs;
- if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+ if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
return false;
}
@@ -1479,16 +1487,15 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// transform the program to use global memory instead of malloc'd memory.
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
- Value *NElems = getMallocArraySize(CI, TD);
// We cannot optimize the malloc if we cannot determine malloc array size.
- if (NElems) {
+ if (Value *NElems = getMallocArraySize(CI, TD)) {
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (TD &&
NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
return true;
}
@@ -1506,28 +1513,28 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This the structure has an unreasonable number of fields, leave it
// alone.
if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
- AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
+ AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (const ArrayType *AT =
dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
- Value *NumElements =
- ConstantInt::get(Type::getInt32Ty(CI->getContext()),
- AT->getNumElements());
- Value *NewMI = CallInst::CreateMalloc(CI,
- TD->getIntPtrType(CI->getContext()),
- AllocSTy, NumElements,
- BCI->getName());
- Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
- BCI->replaceAllUsesWith(Cast);
- BCI->eraseFromParent();
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+ Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+ Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+ Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+ AllocSize, NumElements,
+ CI->getName());
+ Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+ CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
- BCI = cast<BitCastInst>(NewMI);
- CI = extractMallocCallFromBitCast(NewMI);
+ CI = dyn_cast<BitCastInst>(Malloc) ?
+ extractMallocCallFromBitCast(Malloc):
+ cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD), TD);
return true;
}
}
@@ -1559,14 +1566,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
- if (getMallocAllocatedType(CI)) {
- BitCastInst* BCI = NULL;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
- UI != E; )
- BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++));
- if (BCI && TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD))
- return true;
- }
+ const Type* MallocType = getMallocAllocatedType(CI);
+ if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+ GVI, TD))
+ return true;
}
}