diff options
author | Peter Collingbourne <peter@pcc.me.uk> | 2012-05-19 22:52:10 +0000 |
---|---|---|
committer | Peter Collingbourne <peter@pcc.me.uk> | 2012-05-19 22:52:10 +0000 |
commit | 9012c57e18d76d562b1f3e60bf19cccefa7b793e (patch) | |
tree | af18137e2c1f21f54c5bd534d9d53efc1185cc96 | |
parent | b8f2f29467b86a11e777e2ce071caf15ae6fcf75 (diff) | |
download | llvm-9012c57e18d76d562b1f3e60bf19cccefa7b793e.tar.gz llvm-9012c57e18d76d562b1f3e60bf19cccefa7b793e.tar.bz2 llvm-9012c57e18d76d562b1f3e60bf19cccefa7b793e.tar.xz |
Do not eliminate allocas whose alignment exceeds that of the
copied-in constant, as a subsequent user may rely on over alignment.
Fixes PR12885.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157134 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 47 | ||||
-rw-r--r-- | test/Transforms/ScalarRepl/memcpy-from-global.ll | 36 |
2 files changed, 71 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 026fea117b..113397fc11 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -29,6 +29,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/DIBuilder.h" @@ -1346,6 +1347,25 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { return false; } +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast<GEPOperator>(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast<PointerType>(V->getType())) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; +} + // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the alloca instructions in the function, removing them @@ -1379,23 +1399,26 @@ bool SROA::performScalarRepl(Function &F) { continue; // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global. If this is the case, we can change all users to use + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { - DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - ToDelete[i]->eraseFromParent(); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - Copy->eraseFromParent(); // Don't mutate the global. - AI->eraseFromParent(); - ++NumGlobals; - Changed = true; - continue; + if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); + Copy->eraseFromParent(); // Don't mutate the global. + AI->eraseFromParent(); + ++NumGlobals; + Changed = true; + continue; + } } // Check to see if we can perform the core SROA transformation. We cannot diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll index 59475adc77..5557a8fd87 100644 --- a/test/Transforms/ScalarRepl/memcpy-from-global.ll +++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll @@ -45,8 +45,10 @@ declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind %T = type { i8, [123 x i8] } +%U = type { i32, i32, i32, i32, i32 } @G = constant %T {i8 1, [123 x i8] zeroinitializer } +@H = constant [2 x %U] zeroinitializer, align 16 define void @test2() { %A = alloca %T @@ -108,3 +110,37 @@ define void @test5() { declare void @baz(i8* byval) + + +define void @test6() { + %A = alloca %U, align 16 + %a = bitcast %U* %A to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false) + call void @bar(i8* %a) readonly +; CHECK: @test6 +; CHECK-NEXT: %a = bitcast +; CHECK-NEXT: call void @bar(i8* %a) + ret void +} + +define void @test7() { + %A = alloca %U, align 16 + %a = bitcast %U* %A to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false) + call void @bar(i8* %a) readonly +; CHECK: @test7 +; CHECK-NEXT: %a = bitcast +; CHECK-NEXT: call void @bar(i8* %a) + ret void +} + +define void @test8() { + %A = alloca %U, align 16 + %a = bitcast %U* %A to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false) + call void @bar(i8* %a) readonly +; CHECK: @test8 +; CHECK: llvm.memcpy +; CHECK: bar + ret void +} |