summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/lsr-reuse.ll
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-06-19 21:29:59 +0000
committerDan Gohman <gohman@apple.com>2010-06-19 21:29:59 +0000
commit1e3121c80a02359fda87cf77ce1fd7bbd5066991 (patch)
tree9139b8e9176ac3fce7372447078ce25a34a3b9a5 /test/CodeGen/X86/lsr-reuse.ll
parentb6211710acdf558b3b45c2d198e74aa602496893 (diff)
downloadllvm-1e3121c80a02359fda87cf77ce1fd7bbd5066991.tar.gz
llvm-1e3121c80a02359fda87cf77ce1fd7bbd5066991.tar.bz2
llvm-1e3121c80a02359fda87cf77ce1fd7bbd5066991.tar.xz
Include the use kind along with the expression in the key of the
use sharing map. The reconcileNewOffset logic already forces a separate use if the kinds differ, so incorporating the kind in the key means we can track more sharing opportunities. More sharing means fewer total uses to track, which means smaller problem sizes, which means the conservative throttles don't kick in as often. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106396 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/lsr-reuse.ll')
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll309
1 files changed, 309 insertions, 0 deletions
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index b80ee0897d..3f4f9ec1d1 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -440,3 +440,312 @@ bb5: ; preds = %bb3, %entry
%s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
ret i32 %s.1.lcssa
}
+
+; Two loops here are of particular interest; the one at %bb21, where
+; we don't want to leave extra induction variables around, or use an
+; lea to compute an exit condition inside the loop:
+
+; CHECK: test:
+
+; CHECK: BB10_4:
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addss %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: mulss (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movss %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT: addq $4, %r{{.*}}
+; CHECK-NEXT: decq %r{{.*}}
+; CHECK-NEXT: addq $4, %r{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: BB10_2:
+; CHECK-NEXT: testq %r{{.*}}, %r{{.*}}
+; CHECK-NEXT: jle
+; CHECK-NEXT: testb $15, %r{{.*}}
+; CHECK-NEXT: jne
+
+; And the one at %bb68, where we want to be sure to use superhero mode:
+
+; CHECK: BB10_10:
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: mulps 48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: mulps 32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: mulps 16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: mulps (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}})
+; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}})
+; CHECK-NEXT: movaps %xmm{{.*}}, 48(%r{{[^,]*}})
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: addq $64, %r{{.*}}
+; CHECK-NEXT: addq $64, %r{{.*}}
+; CHECK-NEXT: addq $-16, %r{{.*}}
+; CHECK-NEXT: BB10_11:
+; CHECK-NEXT: cmpq $15, %r{{.*}}
+; CHECK-NEXT: jg
+
+define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind {
+bb:
+ %t = alloca float, align 4 ; <float*> [#uses=3]
+ %t7 = alloca float, align 4 ; <float*> [#uses=2]
+ %t8 = load float* %arg3 ; <float> [#uses=8]
+ %t9 = ptrtoint float* %arg to i64 ; <i64> [#uses=1]
+ %t10 = ptrtoint float* %arg4 to i64 ; <i64> [#uses=1]
+ %t11 = xor i64 %t10, %t9 ; <i64> [#uses=1]
+ %t12 = and i64 %t11, 15 ; <i64> [#uses=1]
+ %t13 = icmp eq i64 %t12, 0 ; <i1> [#uses=1]
+ %t14 = xor i64 %arg1, 1 ; <i64> [#uses=1]
+ %t15 = xor i64 %arg5, 1 ; <i64> [#uses=1]
+ %t16 = or i64 %t15, %t14 ; <i64> [#uses=1]
+ %t17 = trunc i64 %t16 to i32 ; <i32> [#uses=1]
+ %t18 = icmp eq i32 %t17, 0 ; <i1> [#uses=1]
+ br i1 %t18, label %bb19, label %bb213
+
+bb19: ; preds = %bb
+ %t20 = load float* %arg2 ; <float> [#uses=1]
+ br label %bb21
+
+bb21: ; preds = %bb32, %bb19
+ %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ] ; <i64> [#uses=21]
+ %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
+ %t24 = sub i64 %arg6, %t22 ; <i64> [#uses=4]
+ %t25 = getelementptr float* %arg4, i64 %t22 ; <float*> [#uses=4]
+ %t26 = getelementptr float* %arg, i64 %t22 ; <float*> [#uses=3]
+ %t27 = icmp sgt i64 %t24, 0 ; <i1> [#uses=1]
+ br i1 %t27, label %bb28, label %bb37
+
+bb28: ; preds = %bb21
+ %t29 = ptrtoint float* %t25 to i64 ; <i64> [#uses=1]
+ %t30 = and i64 %t29, 15 ; <i64> [#uses=1]
+ %t31 = icmp eq i64 %t30, 0 ; <i1> [#uses=1]
+ br i1 %t31, label %bb37, label %bb32
+
+bb32: ; preds = %bb28
+ %t33 = load float* %t26 ; <float> [#uses=1]
+ %t34 = fmul float %t23, %t33 ; <float> [#uses=1]
+ store float %t34, float* %t25
+ %t35 = fadd float %t23, %t8 ; <float> [#uses=1]
+ %t36 = add i64 %t22, 1 ; <i64> [#uses=1]
+ br label %bb21
+
+bb37: ; preds = %bb28, %bb21
+ %t38 = fmul float %t8, 4.000000e+00 ; <float> [#uses=1]
+ store float %t38, float* %t
+ %t39 = fmul float %t8, 1.600000e+01 ; <float> [#uses=1]
+ store float %t39, float* %t7
+ %t40 = fmul float %t8, 0.000000e+00 ; <float> [#uses=1]
+ %t41 = fadd float %t23, %t40 ; <float> [#uses=1]
+ %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1]
+ %t43 = fadd float %t23, %t8 ; <float> [#uses=1]
+ %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1]
+ %t45 = fmul float %t8, 2.000000e+00 ; <float> [#uses=1]
+ %t46 = fadd float %t23, %t45 ; <float> [#uses=1]
+ %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1]
+ %t48 = fmul float %t8, 3.000000e+00 ; <float> [#uses=1]
+ %t49 = fadd float %t23, %t48 ; <float> [#uses=1]
+ %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5]
+ %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3]
+ %t52 = fadd <4 x float> %t50, %t51 ; <<4 x float>> [#uses=3]
+ %t53 = fadd <4 x float> %t52, %t51 ; <<4 x float>> [#uses=3]
+ %t54 = fadd <4 x float> %t53, %t51 ; <<4 x float>> [#uses=2]
+ %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8]
+ %t56 = icmp sgt i64 %t24, 15 ; <i1> [#uses=2]
+ br i1 %t13, label %bb57, label %bb118
+
+bb57: ; preds = %bb37
+ br i1 %t56, label %bb61, label %bb112
+
+bb58: ; preds = %bb68
+ %t59 = getelementptr float* %arg, i64 %t78 ; <float*> [#uses=1]
+ %t60 = getelementptr float* %arg4, i64 %t78 ; <float*> [#uses=1]
+ br label %bb112
+
+bb61: ; preds = %bb57
+ %t62 = add i64 %t22, 16 ; <i64> [#uses=1]
+ %t63 = add i64 %t22, 4 ; <i64> [#uses=1]
+ %t64 = add i64 %t22, 8 ; <i64> [#uses=1]
+ %t65 = add i64 %t22, 12 ; <i64> [#uses=1]
+ %t66 = add i64 %arg6, -16 ; <i64> [#uses=1]
+ %t67 = sub i64 %t66, %t22 ; <i64> [#uses=1]
+ br label %bb68
+
+bb68: ; preds = %bb68, %bb61
+ %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ] ; <i64> [#uses=3]
+ %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
+ %t74 = shl i64 %t69, 4 ; <i64> [#uses=5]
+ %t75 = add i64 %t22, %t74 ; <i64> [#uses=2]
+ %t76 = getelementptr float* %arg, i64 %t75 ; <float*> [#uses=1]
+ %t77 = bitcast float* %t76 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t78 = add i64 %t62, %t74 ; <i64> [#uses=2]
+ %t79 = add i64 %t63, %t74 ; <i64> [#uses=2]
+ %t80 = getelementptr float* %arg, i64 %t79 ; <float*> [#uses=1]
+ %t81 = bitcast float* %t80 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t82 = add i64 %t64, %t74 ; <i64> [#uses=2]
+ %t83 = getelementptr float* %arg, i64 %t82 ; <float*> [#uses=1]
+ %t84 = bitcast float* %t83 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t85 = add i64 %t65, %t74 ; <i64> [#uses=2]
+ %t86 = getelementptr float* %arg, i64 %t85 ; <float*> [#uses=1]
+ %t87 = bitcast float* %t86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t88 = getelementptr float* %arg4, i64 %t75 ; <float*> [#uses=1]
+ %t89 = bitcast float* %t88 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t90 = getelementptr float* %arg4, i64 %t79 ; <float*> [#uses=1]
+ %t91 = bitcast float* %t90 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t92 = getelementptr float* %arg4, i64 %t82 ; <float*> [#uses=1]
+ %t93 = bitcast float* %t92 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t94 = getelementptr float* %arg4, i64 %t85 ; <float*> [#uses=1]
+ %t95 = bitcast float* %t94 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t96 = mul i64 %t69, -16 ; <i64> [#uses=1]
+ %t97 = add i64 %t67, %t96 ; <i64> [#uses=2]
+ %t98 = load <4 x float>* %t77 ; <<4 x float>> [#uses=1]
+ %t99 = load <4 x float>* %t81 ; <<4 x float>> [#uses=1]
+ %t100 = load <4 x float>* %t84 ; <<4 x float>> [#uses=1]
+ %t101 = load <4 x float>* %t87 ; <<4 x float>> [#uses=1]
+ %t102 = fmul <4 x float> %t98, %t71 ; <<4 x float>> [#uses=1]
+ %t103 = fadd <4 x float> %t71, %t55 ; <<4 x float>> [#uses=2]
+ %t104 = fmul <4 x float> %t99, %t73 ; <<4 x float>> [#uses=1]
+ %t105 = fmul <4 x float> %t100, %t72 ; <<4 x float>> [#uses=1]
+ %t106 = fmul <4 x float> %t101, %t70 ; <<4 x float>> [#uses=1]
+ store <4 x float> %t102, <4 x float>* %t89
+ store <4 x float> %t104, <4 x float>* %t91
+ store <4 x float> %t105, <4 x float>* %t93
+ store <4 x float> %t106, <4 x float>* %t95
+ %t107 = fadd <4 x float> %t70, %t55 ; <<4 x float>> [#uses=1]
+ %t108 = fadd <4 x float> %t72, %t55 ; <<4 x float>> [#uses=1]
+ %t109 = fadd <4 x float> %t73, %t55 ; <<4 x float>> [#uses=1]
+ %t110 = icmp sgt i64 %t97, 15 ; <i1> [#uses=1]
+ %t111 = add i64 %t69, 1 ; <i64> [#uses=1]
+ br i1 %t110, label %bb68, label %bb58
+
+bb112: ; preds = %bb58, %bb57
+ %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1]
+ %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1]
+ %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1]
+ %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1]
+ %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0]
+ br label %bb194
+
+bb118: ; preds = %bb37
+ br i1 %t56, label %bb122, label %bb194
+
+bb119: ; preds = %bb137
+ %t120 = getelementptr float* %arg, i64 %t145 ; <float*> [#uses=1]
+ %t121 = getelementptr float* %arg4, i64 %t145 ; <float*> [#uses=1]
+ br label %bb194
+
+bb122: ; preds = %bb118
+ %t123 = add i64 %t22, -1 ; <i64> [#uses=1]
+ %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
+ %t125 = bitcast float* %t124 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t126 = load <4 x float>* %t125 ; <<4 x float>> [#uses=1]
+ %t127 = add i64 %t22, 16 ; <i64> [#uses=1]
+ %t128 = add i64 %t22, 3 ; <i64> [#uses=1]
+ %t129 = add i64 %t22, 7 ; <i64> [#uses=1]
+ %t130 = add i64 %t22, 11 ; <i64> [#uses=1]
+ %t131 = add i64 %t22, 15 ; <i64> [#uses=1]
+ %t132 = add i64 %t22, 4 ; <i64> [#uses=1]
+ %t133 = add i64 %t22, 8 ; <i64> [#uses=1]
+ %t134 = add i64 %t22, 12 ; <i64> [#uses=1]
+ %t135 = add i64 %arg6, -16 ; <i64> [#uses=1]
+ %t136 = sub i64 %t135, %t22 ; <i64> [#uses=1]
+ br label %bb137
+
+bb137: ; preds = %bb137, %bb122
+ %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3]
+ %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2]
+ %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1]
+ %t144 = shl i64 %t138, 4 ; <i64> [#uses=9]
+ %t145 = add i64 %t127, %t144 ; <i64> [#uses=2]
+ %t146 = add i64 %t128, %t144 ; <i64> [#uses=1]
+ %t147 = getelementptr float* %arg, i64 %t146 ; <float*> [#uses=1]
+ %t148 = bitcast float* %t147 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t149 = add i64 %t129, %t144 ; <i64> [#uses=1]
+ %t150 = getelementptr float* %arg, i64 %t149 ; <float*> [#uses=1]
+ %t151 = bitcast float* %t150 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t152 = add i64 %t130, %t144 ; <i64> [#uses=1]
+ %t153 = getelementptr float* %arg, i64 %t152 ; <float*> [#uses=1]
+ %t154 = bitcast float* %t153 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t155 = add i64 %t131, %t144 ; <i64> [#uses=1]
+ %t156 = getelementptr float* %arg, i64 %t155 ; <float*> [#uses=1]
+ %t157 = bitcast float* %t156 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t158 = add i64 %t22, %t144 ; <i64> [#uses=1]
+ %t159 = getelementptr float* %arg4, i64 %t158 ; <float*> [#uses=1]
+ %t160 = bitcast float* %t159 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t161 = add i64 %t132, %t144 ; <i64> [#uses=1]
+ %t162 = getelementptr float* %arg4, i64 %t161 ; <float*> [#uses=1]
+ %t163 = bitcast float* %t162 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t164 = add i64 %t133, %t144 ; <i64> [#uses=1]
+ %t165 = getelementptr float* %arg4, i64 %t164 ; <float*> [#uses=1]
+ %t166 = bitcast float* %t165 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t167 = add i64 %t134, %t144 ; <i64> [#uses=1]
+ %t168 = getelementptr float* %arg4, i64 %t167 ; <float*> [#uses=1]
+ %t169 = bitcast float* %t168 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ %t170 = mul i64 %t138, -16 ; <i64> [#uses=1]
+ %t171 = add i64 %t136, %t170 ; <i64> [#uses=2]
+ %t172 = load <4 x float>* %t148 ; <<4 x float>> [#uses=2]
+ %t173 = load <4 x float>* %t151 ; <<4 x float>> [#uses=2]
+ %t174 = load <4 x float>* %t154 ; <<4 x float>> [#uses=2]
+ %t175 = load <4 x float>* %t157 ; <<4 x float>> [#uses=2]
+ %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+ %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+ %t184 = fmul <4 x float> %t177, %t140 ; <<4 x float>> [#uses=1]
+ %t185 = fadd <4 x float> %t140, %t55 ; <<4 x float>> [#uses=2]
+ %t186 = fmul <4 x float> %t179, %t142 ; <<4 x float>> [#uses=1]
+ %t187 = fmul <4 x float> %t181, %t141 ; <<4 x float>> [#uses=1]
+ %t188 = fmul <4 x float> %t183, %t139 ; <<4 x float>> [#uses=1]
+ store <4 x float> %t184, <4 x float>* %t160
+ store <4 x float> %t186, <4 x float>* %t163
+ store <4 x float> %t187, <4 x float>* %t166
+ store <4 x float> %t188, <4 x float>* %t169
+ %t189 = fadd <4 x float> %t139, %t55 ; <<4 x float>> [#uses=1]
+ %t190 = fadd <4 x float> %t141, %t55 ; <<4 x float>> [#uses=1]
+ %t191 = fadd <4 x float> %t142, %t55 ; <<4 x float>> [#uses=1]
+ %t192 = icmp sgt i64 %t171, 15 ; <i1> [#uses=1]
+ %t193 = add i64 %t138, 1 ; <i64> [#uses=1]
+ br i1 %t192, label %bb137, label %bb119
+
+bb194: ; preds = %bb119, %bb118, %bb112
+ %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2]
+ %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1]
+ %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1]
+ %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1]
+ %t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2]
+ %t200 = icmp sgt i64 %t195, 0 ; <i1> [#uses=1]
+ br i1 %t200, label %bb201, label %bb211
+
+bb201: ; preds = %bb201, %bb194
+ %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
+ %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
+ %t204 = getelementptr float* %t198, i64 %t202 ; <float*> [#uses=1]
+ %t205 = getelementptr float* %t197, i64 %t202 ; <float*> [#uses=1]
+ %t206 = load float* %t204 ; <float> [#uses=1]
+ %t207 = fmul float %t203, %t206 ; <float> [#uses=1]
+ store float %t207, float* %t205
+ %t208 = fadd float %t203, %t8 ; <float> [#uses=2]
+ %t209 = add i64 %t202, 1 ; <i64> [#uses=2]
+ %t210 = icmp eq i64 %t209, %t195 ; <i1> [#uses=1]
+ br i1 %t210, label %bb211, label %bb201
+
+bb211: ; preds = %bb201, %bb194
+ %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1]
+ store float %t212, float* %arg2
+ ret void
+
+bb213: ; preds = %bb
+ ret void
+}