summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Collingbourne <peter@pcc.me.uk>2012-05-19 22:52:10 +0000
committerPeter Collingbourne <peter@pcc.me.uk>2012-05-19 22:52:10 +0000
commit9012c57e18d76d562b1f3e60bf19cccefa7b793e (patch)
treeaf18137e2c1f21f54c5bd534d9d53efc1185cc96
parentb8f2f29467b86a11e777e2ce071caf15ae6fcf75 (diff)
downloadllvm-9012c57e18d76d562b1f3e60bf19cccefa7b793e.tar.gz
llvm-9012c57e18d76d562b1f3e60bf19cccefa7b793e.tar.bz2
llvm-9012c57e18d76d562b1f3e60bf19cccefa7b793e.tar.xz
Do not eliminate allocas whose alignment exceeds that of the
copied-in constant, as a subsequent user may rely on over alignment. Fixes PR12885. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157134 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp47
-rw-r--r--test/Transforms/ScalarRepl/memcpy-from-global.ll36
2 files changed, 71 insertions, 12 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 026fea117b..113397fc11 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -29,6 +29,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/DIBuilder.h"
@@ -1346,6 +1347,25 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
return false;
}
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ (CE->getOpcode() == Instruction::GetElementPtr &&
+ cast<GEPOperator>(CE)->hasAllZeroIndices()))
+ return getPointeeAlignment(CE->getOperand(0), TD);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (!GV->isDeclaration())
+ return TD.getPreferredAlignment(GV);
+
+ if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+ return TD.getABITypeAlignment(PT->getElementType());
+
+ return 0;
+}
+
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the alloca instructions in the function, removing them
@@ -1379,23 +1399,26 @@ bool SROA::performScalarRepl(Function &F) {
continue;
// Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global. If this is the case, we can change all users to use
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
// the constant global instead. This is commonly produced by the CFE by
// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
// is only subsequently read.
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- ToDelete[i]->eraseFromParent();
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- Copy->eraseFromParent(); // Don't mutate the global.
- AI->eraseFromParent();
- ++NumGlobals;
- Changed = true;
- continue;
+ if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ ToDelete[i]->eraseFromParent();
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+ Copy->eraseFromParent(); // Don't mutate the global.
+ AI->eraseFromParent();
+ ++NumGlobals;
+ Changed = true;
+ continue;
+ }
}
// Check to see if we can perform the core SROA transformation. We cannot
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index 59475adc77..5557a8fd87 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -45,8 +45,10 @@ declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
%T = type { i8, [123 x i8] }
+%U = type { i32, i32, i32, i32, i32 }
@G = constant %T {i8 1, [123 x i8] zeroinitializer }
+@H = constant [2 x %U] zeroinitializer, align 16
define void @test2() {
%A = alloca %T
@@ -108,3 +110,37 @@ define void @test5() {
declare void @baz(i8* byval)
+
+
+define void @test6() {
+ %A = alloca %U, align 16
+ %a = bitcast %U* %A to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
+ call void @bar(i8* %a) readonly
+; CHECK: @test6
+; CHECK-NEXT: %a = bitcast
+; CHECK-NEXT: call void @bar(i8* %a)
+ ret void
+}
+
+define void @test7() {
+ %A = alloca %U, align 16
+ %a = bitcast %U* %A to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
+ call void @bar(i8* %a) readonly
+; CHECK: @test7
+; CHECK-NEXT: %a = bitcast
+; CHECK-NEXT: call void @bar(i8* %a)
+ ret void
+}
+
+define void @test8() {
+ %A = alloca %U, align 16
+ %a = bitcast %U* %A to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
+ call void @bar(i8* %a) readonly
+; CHECK: @test8
+; CHECK: llvm.memcpy
+; CHECK: bar
+ ret void
+}