3 files changed, 221 insertions, 6 deletions
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 1c7f036232..91dc23c56a 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -2929,11 +2929,17 @@ ComputePostOrders(Function &F,
   Visited.clear();
 
   // Compute the exits, which are the starting points for reverse-CFG DFS.
+  // This includes blocks where all the successors are backedges that
+  // we're skipping.
   SmallVector<BasicBlock *, 4> Exits;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     BasicBlock *BB = I;
-    if (cast<TerminatorInst>(&BB->back())->getNumSuccessors() == 0)
-      Exits.push_back(BB);
+    TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI)
+      if (!Backedges.count(std::make_pair(BB, *SI)))
+        goto HasNonBackedgeSucc;
+    Exits.push_back(BB);
+  HasNonBackedgeSucc:;
   }
 
   // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 552f4e0b96..08bd8c0601 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1721,6 +1721,82 @@ define void @test61() {
   ret void
 }
 
+; Delete a retain matched by releases when one is inside the loop and the
+; other is outside the loop.
+
+; CHECK: define void @test62(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test62(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Like test62 but with no release in exit.
+; Don't delete anything!
+
+; CHECK: define void @test63(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: loop.more:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test63(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  ret void
+}
+
+; Like test62 but with no release in loop.more.
+; Don't delete anything!
+
+; CHECK: define void @test64(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: exit:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test64(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
 declare void @bar(i32 ()*)
 
 ; A few real-world testcases.
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 9eada8a2dd..a618a21d8b 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -484,12 +484,14 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test9b for the same testcase without a split backedge.
 
 ; CHECK: define void @test9(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: call i8* @objc_retain
 ; CHECK: }
 define void @test9() nounwind {
 entry:
@@ -551,13 +553,79 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; Like test9, but without a split backedge. This we can optimize.
 
-; CHECK: define void @test10(
+; CHECK: define void @test9b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
+define void @test9b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test10b for the same testcase without a split backedge.
+
+; CHECK: define void @test10(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: }
 define void @test10() nounwind {
 entry:
   %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
@@ -618,3 +686,68 @@ forcoll.empty:
   call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; Like test10, but without a split backedge. This we can optimize.
+
+; CHECK: define void @test10b(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK-NOT: @objc_retain
+; CHECK: }
+define void @test10b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  call void @callee()
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}