summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp81
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--test/CodeGen/AArch64/global-merge.ll30
-rw-r--r--test/CodeGen/ARM/global-merge-1.ll85
-rw-r--r--test/Transforms/GlobalMerge/AArch64/arm64.ll21
-rw-r--r--test/Transforms/GlobalMerge/AArch64/global-merge-1.ll22
-rw-r--r--test/Transforms/GlobalMerge/AArch64/global-merge-2.ll30
-rw-r--r--test/Transforms/GlobalMerge/AArch64/global-merge-3.ll27
-rw-r--r--test/Transforms/GlobalMerge/ARM/arm.ll62
9 files changed, 284 insertions, 75 deletions
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 5222712631..ecf9be861e 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -81,6 +81,13 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
cl::desc("Enable global merge pass on constants"),
cl::init(false));
+// FIXME: this could be a transitional option, and we probably need to remove
+// it if only we are sure this optimization could always benefit all targets.
+static cl::opt<bool>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+ cl::desc("Enable global merge pass on external linkage"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -129,9 +136,19 @@ namespace {
} // end anonymous namespace
char GlobalMerge::ID = 0;
-INITIALIZE_PASS(GlobalMerge, "global-merge",
- "Global Merge", false, false)
+static void *initializeGlobalMergePassOnce(PassRegistry &Registry) {
+ PassInfo *PI = new PassInfo(
+ "Merge global variables", "global-merge", &GlobalMerge::ID,
+ PassInfo::NormalCtor_t(callDefaultCtor<GlobalMerge>), false, false,
+ PassInfo::TargetMachineCtor_t(callTargetMachineCtor<GlobalMerge>));
+ Registry.registerPass(*PI, true);
+ return PI;
+}
+
+void llvm::initializeGlobalMergePass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializeGlobalMergePassOnce)
+}
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
@@ -154,11 +171,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ assert(Globals.size() > 1);
+
+ // FIXME: This simple solution merges globals all together as maximum as
+ // possible. However, with this solution it would be hard to remove dead
+ // global symbols at link-time. An alternative solution could be checking
+ // global symbols references function by function, and make the symbols
+ // being referred in the same function merged and we would probably need
+ // to introduce heuristic algorithm to solve the merge conflict from
+ // different functions.
for (size_t i = 0, e = Globals.size(); i != e; ) {
size_t j = 0;
uint64_t MergedSize = 0;
std::vector<Type*> Tys;
std::vector<Constant*> Inits;
+
+ bool HasExternal = false;
+ GlobalVariable *TheFirstExternal = 0;
for (j = i; j != e; ++j) {
Type *Ty = Globals[j]->getType()->getElementType();
MergedSize += DL->getTypeAllocSize(Ty);
@@ -167,17 +196,37 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
}
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
+
+ if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+ HasExternal = true;
+ TheFirstExternal = Globals[j];
+ }
}
+ // If merged variables doesn't have external linkage, we needn't to expose
+ // the symbol after merging.
+ GlobalValue::LinkageTypes Linkage = HasExternal
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+
+ // If merged variables have external linkage, we use symbol name of the
+ // first variable merged as the suffix of global symbol name. This would
+ // be able to avoid the link-time naming conflict for globalm symbols.
+ Twine MergedGVName = HasExternal
+ ? "_MergedGlobals_" + TheFirstExternal->getName()
+ : "_MergedGlobals";
+
StructType *MergedTy = StructType::get(M.getContext(), Tys);
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
- GlobalValue::InternalLinkage,
- MergedInit, "_MergedGlobals",
- nullptr,
- GlobalVariable::NotThreadLocal,
- AddrSpace);
+
+ GlobalVariable *MergedGV = new GlobalVariable(
+ M, MergedTy, isConst, Linkage, MergedInit, MergedGVName, nullptr,
+ GlobalVariable::NotThreadLocal, AddrSpace);
+
for (size_t k = i; k < j; ++k) {
+ GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+ std::string Name = Globals[k]->getName();
+
Constant *Idx[2] = {
ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, k-i)
@@ -185,6 +234,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
+
+ if (Linkage != GlobalValue::InternalLinkage) {
+ // Generate a new alias...
+ auto *PTy = cast<PointerType>(GEP->getType());
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ Linkage, Name, GEP, &M);
+ }
+
NumMerged++;
}
i = j;
@@ -245,8 +302,12 @@ bool GlobalMerge::doInitialization(Module &M) {
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I) {
- // Merge is safe for "normal" internal globals only
- if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ // Merge is safe for "normal" internal or external globals only
+ if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
+ !I->hasInternalLinkage())
continue;
PointerType *PT = dyn_cast<PointerType>(I->getType());
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index edf012d811..5c7db5b16e 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -38,6 +38,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDSEPass(Registry);
initializeGVNPass(Registry);
initializeEarlyCSEPass(Registry);
+ initializeGlobalMergePass(Registry);
initializeIndVarSimplifyPass(Registry);
initializeJumpThreadingPass(Registry);
initializeLICMPass(Registry);
diff --git a/test/CodeGen/AArch64/global-merge.ll b/test/CodeGen/AArch64/global-merge.ll
new file mode 100644
index 0000000000..aed1dc4d1c
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s
+
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
+; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
+
+; FIXME: add O1/O2 test for aarch64-none-linux-gnu and aarch64-apple-ios
+
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: f1:
+; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
+; CHECK-NOT: adrp
+
+; CHECK-APPLE-IOS-LABEL: f1:
+; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals
+; CHECK-APPLE-IOS-NOT: adrp
+ store i32 %a1, i32* @m, align 4
+ store i32 %a2, i32* @n, align 4
+ ret void
+}
+
+; CHECK: .local _MergedGlobals
+; CHECK: .comm _MergedGlobals,8,8
+; NO-MERGE-NOT: .local _MergedGlobals
+
+; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3
+; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3
diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll
new file mode 100644
index 0000000000..341597e618
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
+
+; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
+; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
+; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
+
+; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
+; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
+; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
+; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+@bar = internal global [5 x i32] zeroinitializer, align 4
+@baz = internal global [5 x i32] zeroinitializer, align 4
+@foo = internal global [5 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp
+define internal void @initialize() #0 {
+ %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
+ %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
+ %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
+ %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
+ %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
+ %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
+ %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
+ %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
+ %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+ %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
+ store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+ ret void
+}
+
+declare i32 @calc(...) #1
+
+; Function Attrs: nounwind ssp
+define internal void @calculate() #0 {
+ %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
+ %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
+ %3 = mul <4 x i32> %2, %1
+ store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
+ %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+ %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+ %6 = mul nsw i32 %5, %4
+ store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
+ ret void
+}
+
+; Function Attrs: nounwind readnone ssp
+define internal i32* @returnFoo() #2 {
+ ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0)
+}
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = metadata !{metadata !"LLVM version 3.4 "}
+!1 = metadata !{metadata !2, metadata !2, i64 0}
+!2 = metadata !{metadata !"int", metadata !3, i64 0}
+!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
+!4 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/GlobalMerge/AArch64/arm64.ll b/test/Transforms/GlobalMerge/AArch64/arm64.ll
index 4715fd8de2..9174f72574 100644
--- a/test/Transforms/GlobalMerge/AArch64/arm64.ll
+++ b/test/Transforms/GlobalMerge/AArch64/arm64.ll
@@ -1,23 +1,6 @@
-; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
+; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -S -o - | FileCheck %s
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
target triple = "arm64-apple-ios7.0.0"
diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll
new file mode 100644
index 0000000000..f8854dd545
--- /dev/null
+++ b/test/Transforms/GlobalMerge/AArch64/global-merge-1.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s
+
+; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s
+
+; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s
+
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+; CHECK: @_MergedGlobals = internal global { i32, i32 } zeroinitializer
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: @f1
+; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 0)
+; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 1)
+ store i32 %a1, i32* @m, align 4
+ store i32 %a2, i32* @n, align 4
+ ret void
+}
diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll
new file mode 100644
index 0000000000..1876483882
--- /dev/null
+++ b/test/Transforms/GlobalMerge/AArch64/global-merge-2.ll
@@ -0,0 +1,30 @@
+; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s
+
+@x = global i32 0, align 4
+@y = global i32 0, align 4
+@z = global i32 0, align 4
+
+; CHECK: @_MergedGlobals_x = global { i32, i32, i32 } zeroinitializer
+; CHECK: @x = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0)
+; CHECK: @y = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1)
+; CHECK: @z = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2)
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: @f1
+; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0)
+; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1)
+ store i32 %a1, i32* @x, align 4
+ store i32 %a2, i32* @y, align 4
+ ret void
+}
+
+define void @g1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: @g1
+; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1)
+; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2)
+ store i32 %a1, i32* @y, align 4
+ store i32 %a2, i32* @z, align 4
+ ret void
+}
diff --git a/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll b/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll
new file mode 100644
index 0000000000..811c3eef13
--- /dev/null
+++ b/test/Transforms/GlobalMerge/AArch64/global-merge-3.ll
@@ -0,0 +1,27 @@
+; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s
+; RUN: opt %s -mtriple=aarch64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s
+
+@x = global [1000 x i32] zeroinitializer, align 1
+@y = global [1000 x i32] zeroinitializer, align 1
+@z = internal global i32 1, align 4
+
+; CHECK: @_MergedGlobals_x = global { i32, [1000 x i32] } { i32 1, [1000 x i32] zeroinitializer }
+; CHECK: @_MergedGlobals_y = global { [1000 x i32] } zeroinitializer
+
+; CHECK: @x = alias getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1)
+; CHECK: @y = alias getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0)
+
+define void @f1(i32 %a1, i32 %a2, i32 %a3) {
+; CHECK-LABEL: @f1
+; CHECK: %x3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 1), i32 0, i64 3
+; CHECK: %y3 = getelementptr inbounds [1000 x i32]* getelementptr inbounds ({ [1000 x i32] }* @_MergedGlobals_y, i32 0, i32 0), i32 0, i64 3
+; CHECK: store i32 %a3, i32* getelementptr inbounds ({ i32, [1000 x i32] }* @_MergedGlobals_x, i32 0, i32 0), align 4
+
+ %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3
+ %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3
+ store i32 %a1, i32* %x3, align 4
+ store i32 %a2, i32* %y3, align 4
+ store i32 %a3, i32* @z, align 4
+ ret void
+}
diff --git a/test/Transforms/GlobalMerge/ARM/arm.ll b/test/Transforms/GlobalMerge/ARM/arm.ll
index 341597e618..e7553e9183 100644
--- a/test/Transforms/GlobalMerge/ARM/arm.ll
+++ b/test/Transforms/GlobalMerge/ARM/arm.ll
@@ -1,23 +1,4 @@
-; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
-
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
-; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
-; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
-; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; RUN: opt %s -mtriple=arm-linux-gnuabi -global-merge -S -o - | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios3.0.0"
@@ -26,28 +7,30 @@ target triple = "thumbv7-apple-ios3.0.0"
@baz = internal global [5 x i32] zeroinitializer, align 4
@foo = internal global [5 x i32] zeroinitializer, align 4
+; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer
+
; Function Attrs: nounwind ssp
define internal void @initialize() #0 {
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
+ store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4
%2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
+ store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4
%3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
+ store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4
%4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
+ store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4
%5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
+ store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4
%6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
+ store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4
%7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
+ store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4
%8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
+ store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4
%9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+ store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4
%10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+ store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4
ret void
}
@@ -59,10 +42,10 @@ define internal void @calculate() #0 {
%2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
%3 = mul <4 x i32> %2, %1
store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
- %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
- %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+ %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4
+ %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4
%6 = mul nsw i32 %5, %4
- store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
+ store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4
ret void
}
@@ -70,16 +53,3 @@ define internal void @calculate() #0 {
define internal i32* @returnFoo() #2 {
ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0)
}
-
-attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = metadata !{metadata !"LLVM version 3.4 "}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}