summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/MemCpyOptimizer.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-11-21 00:28:59 +0000
committerChris Lattner <sabre@nondot.org>2010-11-21 00:28:59 +0000
commit2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3 (patch)
tree0368003df16ef9b625afc9cdca10357bf6f22268 /lib/Transforms/Scalar/MemCpyOptimizer.cpp
parenta6fd81dd7f6039fbc1a55f6f4d45659fffdd81fb (diff)
downloadllvm-2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3.tar.gz
llvm-2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3.tar.bz2
llvm-2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3.tar.xz
Implement PR8644: forwarding a memcpy value to a byval,
allowing the memcpy to be eliminated. Unfortunately, the requirements on byval's without explicit alignment are really weak and impossible to predict in the mid-level optimizer, so this doesn't kick in much with current frontends. The fix is to change clang to set alignment on all byval arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119916 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Scalar/MemCpyOptimizer.cpp')
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp171
1 files changed, 127 insertions, 44 deletions
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 6f53c322ef..4f1cfd622f 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -301,11 +301,13 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {
namespace {
class MemCpyOpt : public FunctionPass {
+ MemoryDependenceAnalysis *MD;
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+ MD = 0;
}
private:
@@ -327,6 +329,7 @@ namespace {
uint64_t cpyLen, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
uint64_t MSize);
+ bool processByValArgument(CallSite CS, unsigned ArgNo);
bool iterateOnFunction(Function &F);
};
@@ -359,9 +362,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
if (!LI->isVolatile() && LI->hasOneUse()) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
- MemDepResult dep = MD.getDependency(LI);
+ MemDepResult dep = MD->getDependency(LI);
CallInst *C = 0;
if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
C = dyn_cast<CallInst>(dep.getInst());
@@ -372,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LI->getPointerOperand()->stripPointerCasts(),
TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
if (changed) {
- MD.removeInstruction(SI);
+ MD->removeInstruction(SI);
SI->eraseFromParent();
LI->eraseFromParent();
++NumMemCpyInstr;
@@ -505,8 +506,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
DEBUG(dbgs() << "Replace stores:\n";
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
- dbgs() << *Range.TheStores[i];
- dbgs() << "With: " << *C); C=C;
+ dbgs() << *Range.TheStores[i] << '\n';
+ dbgs() << "With: " << *C << '\n'); C=C;
// Don't invalidate the iterator
BBI = BI;
@@ -657,11 +658,10 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
- MD.removeInstruction(C);
+ MD->removeInstruction(C);
- // Remove the memcpy
- MD.removeInstruction(cpy);
+ // Remove the memcpy.
+ MD->removeInstruction(cpy);
++NumMemCpyInstr;
return true;
@@ -675,7 +675,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
uint64_t MSize) {
// We can only transforms memcpy's where the dest of one is the source of the
// other.
- if (M->getSource() != MDep->getDest())
+ if (M->getSource() != MDep->getDest() || MDep->isVolatile())
return false;
// Second, the length of the memcpy's must be the same, or the preceeding one
@@ -684,27 +684,26 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
if (!C1) return false;
uint64_t DepSize = C1->getValue().getZExtValue();
-
if (DepSize < MSize)
return false;
- Intrinsic::ID ResultFn = Intrinsic::memcpy;
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. We still want to eliminate the intermediate
// value, but we have to generate a memmove instead of memcpy.
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ Intrinsic::ID ResultFn = Intrinsic::memcpy;
if (!AA.isNoAlias(M->getRawDest(), MSize, MDep->getRawSource(), DepSize))
ResultFn = Intrinsic::memmove;
- // If all checks passed, then we can transform these memcpy's
+ // If all checks passed, then we can transform M.
const Type *ArgTys[3] = {
M->getRawDest()->getType(),
MDep->getRawSource()->getType(),
M->getLength()->getType()
};
Function *MemCpyFun =
- Intrinsic::getDeclaration(M->getParent()->getParent()->getParent(),
+ Intrinsic::getDeclaration(MDep->getParent()->getParent()->getParent(),
ResultFn, ArgTys, 3);
// Make sure to use the lesser of the alignment of the source and the dest
@@ -717,29 +716,30 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
M->getRawDest(),
MDep->getRawSource(),
M->getLength(),
- ConstantInt::get(Type::getInt32Ty(M->getContext()), Align),
+ ConstantInt::get(Type::getInt32Ty(MemCpyFun->getContext()), Align),
M->getVolatileCst()
};
CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
// memcpy(a <- b)
// *b = 42;
// memcpy(c <- a)
// It would be invalid to transform the second memcpy into memcpy(c <- b).
- MemDepResult NewDep = MD.getDependency(C);
+ //
+ // TODO: If the code between M and MDep is transparent to the destination "c",
+ // then we could still perform the xform by moving M up to the first memcpy.
+ MemDepResult NewDep = MD->getDependency(C);
if (!NewDep.isClobber() || NewDep.getInst() != MDep) {
- MD.removeInstruction(C);
+ MD->removeInstruction(C);
C->eraseFromParent();
return false;
}
// Otherwise we're good! Nuke the instruction we're replacing.
- MD.removeInstruction(M);
+ MD->removeInstruction(M);
M->eraseFromParent();
++NumMemCpyInstr;
return true;
@@ -752,25 +752,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
-
- // We can only optimize statically-sized memcpy's.
- ConstantInt *cpyLen = dyn_cast<ConstantInt>(M->getLength());
- if (!cpyLen) return false;
+ // We can only optimize statically-sized memcpy's that are non-volatile.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0 || M->isVolatile()) return false;
// The are two possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
- MemDepResult dep = MD.getDependency(M);
- if (!dep.isClobber())
+ MemDepResult DepInfo = MD->getDependency(M);
+ if (!DepInfo.isClobber())
return false;
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(dep.getInst()))
- return processMemCpyMemCpyDependence(M, MDep, cpyLen->getZExtValue());
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
- if (CallInst *C = dyn_cast<CallInst>(dep.getInst())) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
bool changed = performCallSlotOptzn(M, M->getDest(), M->getSource(),
- cpyLen->getZExtValue(), C);
+ CopySize->getZExtValue(), C);
if (changed) M->eraseFromParent();
return changed;
}
@@ -805,33 +803,116 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
- getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M);
+ MD->removeInstruction(M);
++NumMoveToCpy;
return true;
}
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ if (!TD) return false;
-// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.
+ Value *ByValArg = CS.getArgument(ArgNo);
+
+ // MemDep doesn't have a way to do a local query with a memory location.
+ // Instead, just insert a load and ask for its dependences.
+ LoadInst *TmpLoad = new LoadInst(ByValArg, "", CS.getInstruction());
+ MemDepResult DepInfo = MD->getDependency(TmpLoad);
+
+ MD->removeInstruction(TmpLoad);
+ TmpLoad->eraseFromParent();
+
+ if (!DepInfo.isClobber())
+ return false;
+
+ // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, see if we can byval from the source of the memcpy instead of the
+ // result.
+ MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+ if (MDep == 0 || MDep->isVolatile() ||
+ ByValArg->stripPointerCasts() != MDep->getDest())
+ return false;
+
+ // The length of the memcpy must be larger or equal to the size of the byval.
+ // must be larger than the following one.
+ ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ if (C1 == 0 ||
+ C1->getValue().getZExtValue() < TD->getTypeAllocSize(ByValArg->getType()))
+ return false;
+
+ // Get the alignment of the byval. If it is greater than the memcpy, then we
+ // can't do the substitution. If the call doesn't specify the alignment, then
+ // it is some target specific value that we can't know.
+ unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+ if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
+ return false;
+
+ // Verify that the copied-from memory doesn't change in between the memcpy and
+ // the byval call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ Value *TmpCast = MDep->getSource();
+ if (MDep->getSource()->getType() != ByValArg->getType())
+ TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+ "tmpcast", CS.getInstruction());
+ Value *TmpVal =
+ UndefValue::get(cast<PointerType>(TmpCast->getType())->getElementType());
+ Instruction *TmpStore = new StoreInst(TmpVal, TmpCast, false,
+ CS.getInstruction());
+ DepInfo = MD->getDependency(TmpStore);
+ bool isUnsafe = !DepInfo.isClobber() || DepInfo.getInst() != MDep;
+ MD->removeInstruction(TmpStore);
+ TmpStore->eraseFromParent();
+
+ if (isUnsafe) {
+ // Clean up the inserted cast instruction.
+ if (TmpCast != MDep->getSource())
+ cast<Instruction>(TmpCast)->eraseFromParent();
+ return false;
+ }
+
+ DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+ << " " << *MDep << "\n"
+ << " " << *CS.getInstruction() << "\n");
+
+ // Otherwise we're good! Update the byval argument.
+ CS.setArgument(ArgNo, TmpCast);
+ ++NumMemCpyInstr;
+ return true;
+}
+
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
bool MemCpyOpt::iterateOnFunction(Function &F) {
bool MadeChange = false;
// Walk all instruction in the function.
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE;) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
+ bool RepeatInstruction = false;
+
if (StoreInst *SI = dyn_cast<StoreInst>(I))
MadeChange |= processStore(SI, BI);
- else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
- MadeChange |= processMemCpy(M);
- else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
- if (processMemMove(M)) {
- --BI; // Reprocess the new memcpy.
- MadeChange = true;
- }
+ else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) {
+ RepeatInstruction = processMemCpy(M);
+ } else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) {
+ RepeatInstruction = processMemMove(M);
+ } else if (CallSite CS = (Value*)I) {
+ for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+ if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ MadeChange |= processByValArgument(CS, i);
+ }
+
+ // Reprocess the instruction if desired.
+ if (RepeatInstruction) {
+ --BI;
+ MadeChange = true;
}
}
}
@@ -844,12 +925,14 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
//
bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
while (1) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
}
+ MD = 0;
return MadeChange;
}