From 96c8735e28f2f89be37cdd907f680c6c1bf16052 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 27 Oct 2012 14:25:44 +0000 Subject: LoopIdiom: Replace custom dependence analysis with DependenceAnalysis. Requires a lot less code and complexity on loop-idiom's side and the more precise analysis can catch more cases, like the one I included as a test case. This also fixes the edge-case miscompilation from PR9481. Compile time performance seems to be slightly worse, but this is mostly due to an extra LCSSA run scheduled by the PassManager and should be fixed there. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166874 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/LoopIdiom/multi-dimensional.ll | 49 ++++++++++++++++++++++++ test/Transforms/LoopIdiom/sideeffect.ll | 53 ++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 test/Transforms/LoopIdiom/multi-dimensional.ll create mode 100644 test/Transforms/LoopIdiom/sideeffect.ll (limited to 'test') diff --git a/test/Transforms/LoopIdiom/multi-dimensional.ll b/test/Transforms/LoopIdiom/multi-dimensional.ll new file mode 100644 index 0000000000..991f2688d7 --- /dev/null +++ b/test/Transforms/LoopIdiom/multi-dimensional.ll @@ -0,0 +1,49 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%struct.ham = type { [2 x [2 x [2 x [16 x [8 x i32]]]]], i32, %struct.zot } +%struct.zot = type { i32, i16, i16, [2 x [1152 x i32]] } + +define void @test1(%struct.ham* nocapture %arg) nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %tmp = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp8, %bb2 ] + %tmp4 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 1, i64 %tmp, i64 %tmp3 + store i32 0, i32* %tmp4, align 4 + %tmp5 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 1, i64 0, i64 %tmp, i64 %tmp3 + store i32 0, i32* %tmp5, align 4 + %tmp6 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 1, i64 %tmp, i64 %tmp3 + store i32 0, i32* %tmp6, align 4 + %tmp7 = getelementptr inbounds %struct.ham* %arg, i64 0, i32 0, i64 0, i64 0, i64 0, i64 %tmp, i64 %tmp3 + store i32 0, i32* %tmp7, align 4 + %tmp8 = add i64 %tmp3, 1 + %tmp9 = trunc i64 %tmp8 to i32 + %tmp10 = icmp eq i32 %tmp9, 8 + br i1 %tmp10, label %bb11, label %bb2 + +bb11: ; preds = %bb2 + %tmp12 = add i64 %tmp, 1 + %tmp13 = trunc i64 %tmp12 to i32 + %tmp14 = icmp eq i32 %tmp13, 16 + br i1 %tmp14, label %bb15, label %bb1 + +bb15: ; preds = %bb11 + ret void + +; CHECK: @test1 +; CHECK: bb1: +; CHECK-NOT: store +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64 +; CHECK-NOT: store +; CHECK: br +} diff --git a/test/Transforms/LoopIdiom/sideeffect.ll b/test/Transforms/LoopIdiom/sideeffect.ll new file mode 100644 index 0000000000..460e5233f9 --- /dev/null +++ b/test/Transforms/LoopIdiom/sideeffect.ll @@ -0,0 +1,53 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; PR9481 +define i32 @test1() nounwind uwtable ssp { +entry: + %a = alloca [10 x i8], align 1 + br label %for.body + +for.cond1.preheader: ; preds = %for.body + %arrayidx5.phi.trans.insert = getelementptr inbounds [10 x i8]* %a, i64 0, i64 0 + %.pre = load i8* %arrayidx5.phi.trans.insert, align 1 + br label %for.body3 + +for.body: ; preds = %for.body, %entry + %indvars.iv29 = phi i64 [ 0, %entry ], [ %indvars.iv.next30, %for.body ] + call void (...)* @bar() nounwind + %arrayidx = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv29 + store i8 23, i8* %arrayidx, align 1 + %indvars.iv.next30 = add i64 %indvars.iv29, 1 + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 + %exitcond32 = icmp eq i32 %lftr.wideiv31, 1000000 + br i1 %exitcond32, label %for.cond1.preheader, label %for.body + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %0 = phi i8 [ %.pre, %for.cond1.preheader ], [ %add, %for.body3 ] + %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + call void (...)* @bar() nounwind + %arrayidx7 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 %indvars.iv + %1 = load i8* %arrayidx7, align 1 + %add = add i8 %1, %0 + store i8 %add, i8* %arrayidx7, align 1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1000000 + br i1 %exitcond, label %for.end12, label %for.body3 + +for.end12: ; preds = %for.body3 + %arrayidx13 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 2 + %2 = load i8* %arrayidx13, align 1 + %conv14 = sext i8 %2 to i32 + %arrayidx15 = getelementptr inbounds [10 x i8]* %a, i64 0, i64 6 + %3 = load i8* %arrayidx15, align 1 + %conv16 = sext i8 %3 to i32 + %add17 = add nsw i32 %conv16, %conv14 + ret i32 %add17 + +; CHECK: @test1 +; CHECK-NOT: @llvm.memset +} + +declare void @bar(...) -- cgit v1.2.3