summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/lsr-reuse.ll
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-01-21 02:09:26 +0000
committerDan Gohman <gohman@apple.com>2010-01-21 02:09:26 +0000
commita10756ee657a4d43a48cca5c166919093930ed6b (patch)
tree52a9cf0b867521026963ca127f6a46d19f0f8f78 /test/CodeGen/X86/lsr-reuse.ll
parentac8b4bf66b7263018fe6c133604a30780c24982e (diff)
downloadllvm-a10756ee657a4d43a48cca5c166919093930ed6b.tar.gz
llvm-a10756ee657a4d43a48cca5c166919093930ed6b.tar.bz2
llvm-a10756ee657a4d43a48cca5c166919093930ed6b.tar.xz
Re-implement the main strength-reduction portion of LoopStrengthReduction.
This new version is much more aggressive about doing "full" reduction in cases where it reduces register pressure, and also more aggressive about rewriting induction variables to count down (or up) to zero when doing so reduces register pressure. It currently uses fairly simplistic algorithms for finding reuse opportunities, but it introduces a new framework allows it to combine multiple strategies at once to form hybrid solutions, instead of doing all full-reduction or all base+index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94061 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/lsr-reuse.ll')
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll159
1 files changed, 159 insertions, 0 deletions
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
new file mode 100644
index 0000000000..a1919bab38
--- /dev/null
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -0,0 +1,159 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+target datalayout = "e-p:64:64:64"
+target triple = "x86_64-unknown-unknown"
+
+; Full strength reduction reduces register pressure from 5 to 4 here.
+
+; CHECK: full_me:
+; CHECK: movsd (%rsi), %xmm0
+; CHECK: mulsd (%rdx), %xmm0
+; CHECK: movsd %xmm0, (%rdi)
+; CHECK: addq $8, %rsi
+; CHECK: addq $8, %rdx
+; CHECK: addq $8, %rdi
+; CHECK: decq %rcx
+; CHECK: jne
+
+define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}
+
+; In this test, the counting IV exit value is used, so full strength reduction
+; would not reduce register pressure. IndVarSimplify ought to simplify such
+; cases away, but it's useful here to verify that LSR's register pressure
+; heuristics are working as expected.
+
+; CHECK: count_me_0:
+; CHECK: movsd (%rsi,%rax,8), %xmm0
+; CHECK: mulsd (%rdx,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdi,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ ret i64 %q
+}
+
+; In this test, the trip count value is used, so full strength reduction
+; would not reduce register pressure.
+; (though it would reduce register pressure inside the loop...)
+
+; CHECK: count_me_1:
+; CHECK: movsd (%rsi,%rax,8), %xmm0
+; CHECK: mulsd (%rdx,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdi,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+ %t0 = icmp sgt i64 %n, 0
+ br i1 %t0, label %loop, label %return
+
+loop:
+ %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+ %Ai = getelementptr inbounds double* %A, i64 %i
+ %Bi = getelementptr inbounds double* %B, i64 %i
+ %Ci = getelementptr inbounds double* %C, i64 %i
+ %t1 = load double* %Bi
+ %t2 = load double* %Ci
+ %m = fmul double %t1, %t2
+ store double %m, double* %Ai
+ %i.next = add nsw i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, %n
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ %q = phi i64 [ 0, %entry ], [ %n, %loop ]
+ ret i64 %q
+}
+
+; This should be fully strength-reduced to reduce register pressure, however
+; the current heuristics get distracted by all the reuse with the stride-1
+; induction variable first.
+
+; But even so, be clever and start the stride-1 variable at a non-zero value
+; to eliminate an in-loop immediate value.
+
+; CHECK: count_me_2:
+; CHECK: movl $5, %eax
+; CHECK: align
+; CHECK: BB4_1:
+; CHECK: movsd (%rdi,%rax,8), %xmm0
+; CHECK: addsd (%rsi,%rax,8), %xmm0
+; CHECK: movsd %xmm0, (%rdx,%rax,8)
+; CHECK: movsd 40(%rdi,%rax,8), %xmm0
+; CHECK: addsd 40(%rsi,%rax,8), %xmm0
+; CHECK: movsd %xmm0, 40(%rdx,%rax,8)
+; CHECK: incq %rax
+; CHECK: cmpq $5005, %rax
+; CHECK: jne
+
+define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %i5 = add i64 %i, 5
+ %Ai = getelementptr double* %A, i64 %i5
+ %t2 = load double* %Ai
+ %Bi = getelementptr double* %B, i64 %i5
+ %t4 = load double* %Bi
+ %t5 = fadd double %t2, %t4
+ %Ci = getelementptr double* %C, i64 %i5
+ store double %t5, double* %Ci
+ %i10 = add i64 %i, 10
+ %Ai10 = getelementptr double* %A, i64 %i10
+ %t9 = load double* %Ai10
+ %Bi10 = getelementptr double* %B, i64 %i10
+ %t11 = load double* %Bi10
+ %t12 = fadd double %t9, %t11
+ %Ci10 = getelementptr double* %C, i64 %i10
+ store double %t12, double* %Ci10
+ %i.next = add i64 %i, 1
+ %exitcond = icmp eq i64 %i.next, 5000
+ br i1 %exitcond, label %return, label %loop
+
+return:
+ ret void
+}