summaryrefslogtreecommitdiff
path: root/test/CodeGen/SystemZ
diff options
context:
space:
mode:
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-05 11:23:46 +0000
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-05 11:23:46 +0000
commit93795574785de252703591e7fcc8f052c762f25e (patch)
treede693d743c5334444b688797de354cdc279bbdbe /test/CodeGen/SystemZ
parentf8e16c6f5a3a0d2cc6f7ae6dae0a8f55a89cfb2f (diff)
downloadllvm-93795574785de252703591e7fcc8f052c762f25e.tar.gz
llvm-93795574785de252703591e7fcc8f052c762f25e.tar.bz2
llvm-93795574785de252703591e7fcc8f052c762f25e.tar.xz
[SystemZ] Use BRCT and BRCTG to eliminate add-&-compare sequences
This patch just uses a peephole test for "add; compare; branch" sequences within a single block. The IR optimizers already convert loops to decrement-and-branch-on-nonzero form in some cases, so even this simplistic test triggers many times during a clang bootstrap and projects/test-suite run. It looks like there are still cases where we need to more strongly prefer branches on nonzero though. E.g. I saw a case where a loop that started out with a check for 0 ended up with a check for -1. I'll try to look at that sometime. I ended up adding the Reference class because MachineInstr::readsRegister() doesn't check for subregisters (by design, as far as I could tell). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187723 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/SystemZ')
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-07.py68
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-08.py69
-rw-r--r--test/CodeGen/SystemZ/loop-01.ll101
3 files changed, 237 insertions, 1 deletions
diff --git a/test/CodeGen/SystemZ/Large/branch-range-07.py b/test/CodeGen/SystemZ/Large/branch-range-07.py
new file mode 100644
index 0000000000..90c442092e
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-07.py
@@ -0,0 +1,68 @@
+# Test 32-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
+# of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# loopN:
+# load of countN
+# ...
+# loop0:
+# 0xffd8 bytes, from MVIY instructions
+# conditional branch to main
+# after0:
+# ...
+# decrement of countN
+# conditional branch to loopN
+# afterN:
+#
+# Each load occupies 4 bytes. Each decrement and branch occupies 4
+# bytes if BRCT can be used, otherwise it occupies 10 bytes (AHI + BRCL).
+# This means that loop 6 contains 5 * 4 + 0xffd8 + 5 * 4 == 0x10000 bytes
+# and is therefore (just) in range. Loop 7 is out of range.
+#
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: ahi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: ahi {{%r[0-9]+}}, -1
+# CHECK: jglh
+
+branch_blocks = 8
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i32 *%counts) {'
+print 'entry:'
+
+for i in xrange(branch_blocks - 1, -1, -1):
+ print ' %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i)
+ print ' %%initcount%d = load i32 *%%countptr%d' % (i, i)
+ print ' br label %%loop%d' % i
+
+ print 'loop%d:' % i
+ block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
+ block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
+ print (' %%count%d = phi i32 [ %%initcount%d, %%%s ],'
+ ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
+
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+ a, b = b, a + b
+ offset = 4096 + b % 500000
+ value = a % 256
+ print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+ print ' %%nextcount%d = add i32 %%count%d, -1' % (i, i)
+ print ' %%test%d = icmp ne i32 %%nextcount%d, 0' % (i, i)
+ print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
+ print ''
+ print 'after%d:' % i
+
+print ' ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/Large/branch-range-08.py b/test/CodeGen/SystemZ/Large/branch-range-08.py
new file mode 100644
index 0000000000..ac1b1370a3
--- /dev/null
+++ b/test/CodeGen/SystemZ/Large/branch-range-08.py
@@ -0,0 +1,69 @@
+# Test 64-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
+# of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# loopN:
+# load of countN
+# ...
+# loop0:
+# 0xffd8 bytes, from MVIY instructions
+# conditional branch to main
+# after0:
+# ...
+# decrement of countN
+# conditional branch to loopN
+# afterN:
+#
+# Each load occupies 6 bytes. Each decrement and branch occupies 4
+# bytes if BRCTG can be used, otherwise it occupies 10 bytes (AGHI + BRCL).
+# This means that loop 5 contains 4 * 6 + 0xffd8 + 4 * 4 == 0x10000 bytes
+# and is therefore (just) in range. Loop 6 is out of range.
+#
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+
+branch_blocks = 8
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i64 *%counts) {'
+print 'entry:'
+
+for i in xrange(branch_blocks - 1, -1, -1):
+ print ' %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i)
+ print ' %%initcount%d = load i64 *%%countptr%d' % (i, i)
+ print ' br label %%loop%d' % i
+
+ print 'loop%d:' % i
+ block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
+ block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
+ print (' %%count%d = phi i64 [ %%initcount%d, %%%s ],'
+ ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
+
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+ a, b = b, a + b
+ offset = 4096 + b % 500000
+ value = a % 256
+ print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+ print ' %%nextcount%d = add i64 %%count%d, -1' % (i, i)
+ print ' %%test%d = icmp ne i64 %%nextcount%d, 0' % (i, i)
+ print ' br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
+ print ''
+ print 'after%d:' % i
+
+print ' ret void'
+print '}'
diff --git a/test/CodeGen/SystemZ/loop-01.ll b/test/CodeGen/SystemZ/loop-01.ll
index 025a34eaf5..5800801735 100644
--- a/test/CodeGen/SystemZ/loop-01.ll
+++ b/test/CodeGen/SystemZ/loop-01.ll
@@ -5,7 +5,7 @@
; Test that strength reduction is applied to addresses with a scale factor,
; but that indexed addressing can still be used.
define void @f1(i32 *%dest, i32 %a) {
-; CHECK-LABEL: f1
+; CHECK-LABEL: f1:
; CHECK-NOT: sllg
; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
; CHECK: br %r14
@@ -23,3 +23,102 @@ loop:
exit:
ret void
}
+
+; Test a loop that should be converted into dbr form and then use BRCT.
+define void @f2(i32 *%src, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[REG:%r[0-5]]], 100
+; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
+; CHECK: brct [[REG]], [[LABEL]]
+; CHECK: br %r14
+entry:
+ br label %loop
+
+loop:
+ %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
+ %next = add i32 %count, 1
+ %val = load volatile i32 *%src
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+ %add = add i32 %val, 1
+ store volatile i32 %add, i32 *%dest
+ br label %loop.next
+
+loop.next:
+ %cont = icmp ne i32 %next, 100
+ br i1 %cont, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Like f2, but for BRCTG.
+define void @f3(i64 *%src, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lghi [[REG:%r[0-5]]], 100
+; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
+; CHECK: brctg [[REG]], [[LABEL]]
+; CHECK: br %r14
+entry:
+ br label %loop
+
+loop:
+ %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
+ %next = add i64 %count, 1
+ %val = load volatile i64 *%src
+ %cmp = icmp eq i64 %val, 0
+ br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+ %add = add i64 %val, 1
+ store volatile i64 %add, i64 *%dest
+ br label %loop.next
+
+loop.next:
+ %cont = icmp ne i64 %next, 100
+ br i1 %cont, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Test a loop with a 64-bit decremented counter in which the 32-bit
+; low part of the counter is used after the decrement. This is an example
+; of a subregister use being the only thing that blocks a conversion to BRCTG.
+define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) {
+; CHECK-LABEL: f4:
+; CHECK: aghi [[REG:%r[0-5]]], -1
+; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
+; CHECK: stg [[REG2]],
+; CHECK: jne {{\..*}}
+; CHECK: br %r14
+entry:
+ br label %loop
+
+loop:
+ %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
+ store volatile i64 %left, i64 *%dest2
+ %val = load volatile i32 *%src
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+ %add = add i32 %val, 1
+ store volatile i32 %add, i32 *%dest
+ br label %loop.next
+
+loop.next:
+ %next = add i64 %left, -1
+ %ext = zext i32 %val to i64
+ %shl = shl i64 %ext, 32
+ %and = and i64 %next, 4294967295
+ %or = or i64 %shl, %and
+ store volatile i64 %or, i64 *%dest2
+ %cont = icmp ne i64 %next, 0
+ br i1 %cont, label %loop, label %exit
+
+exit:
+ ret void
+}