summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2013-07-26 08:20:39 +0000
committerChandler Carruth <chandlerc@gmail.com>2013-07-26 08:20:39 +0000
commit8d93d41027b6f71b33b8da82c69766498bb1519a (patch)
treedac86b502fd820855cc69d64b4133efe284209f8 /test
parent6ee1464ba599f1afbed502fa1b3ac18c8577fd97 (diff)
downloadllvm-8d93d41027b6f71b33b8da82c69766498bb1519a.tar.gz
llvm-8d93d41027b6f71b33b8da82c69766498bb1519a.tar.bz2
llvm-8d93d41027b6f71b33b8da82c69766498bb1519a.tar.xz
Re-implement the analysis of uses in mem2reg to be significantly more
robust. It now uses an InstVisitor and worklist to actually walk the uses of the Alloca transitively and detect the pattern which we can directly promote: loads & stores of the whole alloca and instructions we can completely ignore. Also, with this new implementation teach both the predicate for testing whether we can promote and the promotion engine itself to use the same code so we no longer have strange divergence between the two code paths. I've added some silly test cases to demonstrate that we can handle slightly more degenerate code patterns now. See the below for why this is even interesting. Performance impact: roughly 1% regression in the performance of SROA or ScalarRepl on a large C++-ish test case where most of the allocas are basically ready for promotion. The reason is because of silly redundant work that I've left FIXMEs for and which I'll address in the next commit. I wanted to separate this commit as it changes the behavior. Once the redundant work in removing the dead uses of the alloca is fixed, this code appears to be faster than the old version. =] So why is this useful? Because the previous requirement for promotion required a *specific* visit pattern of the uses of the alloca to verify: we *had* to look for no more than 1 intervening use. The end goal is to have SROA automatically detect when an alloca is already promotable and directly hand it to the mem2reg machinery rather than trying to partition and rewrite it. This is a 25% or more performance improvement for SROA, and a significant chunk of the delta between it and ScalarRepl. To get there, we need to make mem2reg actually capable of promoting allocas which *look* promotable to SROA without have SROA do tons of work to massage the code into just the right form. This is actually the tip of the iceberg. There are tremendous potential savings we can realize here by de-duplicating work between mem2reg and SROA. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187191 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/Transforms/Mem2Reg/ignore-lifetime.ll26
-rw-r--r--test/Transforms/Mem2Reg/use-analysis.ll70
2 files changed, 70 insertions, 26 deletions
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
deleted file mode 100644
index 5e4f9bfd8c..0000000000
--- a/test/Transforms/Mem2Reg/ignore-lifetime.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
-
-declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
-declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
-
-define void @test1() {
-; CHECK: test1
-; CHECK-NOT: alloca
- %A = alloca i32
- %B = bitcast i32* %A to i8*
- call void @llvm.lifetime.start(i64 2, i8* %B)
- store i32 1, i32* %A
- call void @llvm.lifetime.end(i64 2, i8* %B)
- ret void
-}
-
-define void @test2() {
-; CHECK: test2
-; CHECK-NOT: alloca
- %A = alloca {i8, i16}
- %B = getelementptr {i8, i16}* %A, i32 0, i32 0
- call void @llvm.lifetime.start(i64 2, i8* %B)
- store {i8, i16} zeroinitializer, {i8, i16}* %A
- call void @llvm.lifetime.end(i64 2, i8* %B)
- ret void
-}
diff --git a/test/Transforms/Mem2Reg/use-analysis.ll b/test/Transforms/Mem2Reg/use-analysis.ll
new file mode 100644
index 0000000000..b08b1f191b
--- /dev/null
+++ b/test/Transforms/Mem2Reg/use-analysis.ll
@@ -0,0 +1,70 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; Ensure we can look through a bitcast to i8* and the addition of lifetime
+; markers.
+;
+; CHECK-LABEL: @test1(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+ %A = alloca i32
+ %B = bitcast i32* %A to i8*
+ call void @llvm.lifetime.start(i64 2, i8* %B)
+ store i32 1, i32* %A
+ call void @llvm.lifetime.end(i64 2, i8* %B)
+ ret void
+}
+
+define void @test2() {
+; Ensure we can look through a GEP to i8* and the addition of lifetime
+; markers.
+;
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+ %A = alloca {i8, i16}
+ %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+ call void @llvm.lifetime.start(i64 2, i8* %B)
+ store {i8, i16} zeroinitializer, {i8, i16}* %A
+ call void @llvm.lifetime.end(i64 2, i8* %B)
+ ret void
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: @test3(
+;
+; Check that we recursively walk the uses of the alloca and thus can see
+; through round trip bitcasts, dead bitcasts, GEPs, multiple GEPs, and lifetime
+; markers.
+entry:
+ %a = alloca i32
+; CHECK-NOT: alloca
+
+ %b = bitcast i32* %a to i8*
+ %b2 = getelementptr inbounds i8* %b, i32 0
+ %b3 = getelementptr inbounds i8* %b2, i32 0
+ call void @llvm.lifetime.start(i64 -1, i8* %b3)
+; CHECK-NOT: call void @llvm.lifetime.start
+
+ store i32 %x, i32* %a
+; CHECK-NOT: store
+
+ %dead = bitcast i32* %a to i4096*
+ %dead1 = bitcast i4096* %dead to i42*
+ %dead2 = getelementptr inbounds i32* %a, i32 %x
+; CHECK-NOT: bitcast
+; CHECK-NOT: getelementptr
+
+ %ret = load i32* %a
+; CHECK-NOT: load
+
+ ret i32 %ret
+; CHECK: ret i32 %x
+}