summaryrefslogtreecommitdiff
path: root/test/Transforms/LoopStrengthReduce
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-03-29 10:18:08 +0000
committerTim Northover <tnorthover@apple.com>2014-03-29 10:18:08 +0000
commit7b837d8c75f78fe55c9b348b9ec2281169a48d2a (patch)
treee8e01e73cf4d0723a13e49e4b5d8a66f896d184f /test/Transforms/LoopStrengthReduce
parent69bd9577fc423edea13479eaacf7b1844faa6c6a (diff)
downloadllvm-7b837d8c75f78fe55c9b348b9ec2281169a48d2a.tar.gz
llvm-7b837d8c75f78fe55c9b348b9ec2281169a48d2a.tar.bz2
llvm-7b837d8c75f78fe55c9b348b9ec2281169a48d2a.tar.xz
ARM64: initial backend import
This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205090 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopStrengthReduce')
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg5
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll33
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll101
3 files changed, 139 insertions, 0 deletions
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
new file mode 100644
index 0000000000..a49957999f
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM64/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM64' in targets:
+ config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll
new file mode 100644
index 0000000000..16f6afa6f5
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memcpy.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=arm64 -mcpu=cyclone -pre-RA-sched=list-hybrid < %s | FileCheck %s
+; rdar://10232252
+; Prevent LSR of doing poor choice that cannot be folded in addressing mode
+
+; Remove the -pre-RA-sched=list-hybrid option after fixing:
+; <rdar://problem/12702735> [ARM64][coalescer] need better register
+; coalescing for simple unit tests.
+
+; CHECK: testCase
+; CHECK: %while.body{{$}}
+; CHECK: ldr [[STREG:x[0-9]+]], [{{x[0-9]+}}], #8
+; CHECK-NEXT: str [[STREG]], [{{x[0-9]+}}], #8
+; CHECK: %while.end
+define i32 @testCase() nounwind ssp {
+entry:
+ br label %while.body
+
+while.body: ; preds = %while.body, %entry
+ %len.06 = phi i64 [ 1288, %entry ], [ %sub, %while.body ]
+ %pDst.05 = phi i64* [ inttoptr (i64 6442450944 to i64*), %entry ], [ %incdec.ptr1, %while.body ]
+ %pSrc.04 = phi i64* [ inttoptr (i64 4294967296 to i64*), %entry ], [ %incdec.ptr, %while.body ]
+ %incdec.ptr = getelementptr inbounds i64* %pSrc.04, i64 1
+ %tmp = load volatile i64* %pSrc.04, align 8
+ %incdec.ptr1 = getelementptr inbounds i64* %pDst.05, i64 1
+ store volatile i64 %tmp, i64* %pDst.05, align 8
+ %sub = add i64 %len.06, -8
+ %cmp = icmp sgt i64 %sub, -1
+ br i1 %cmp, label %while.body, label %while.end
+
+while.end: ; preds = %while.body
+ tail call void inttoptr (i64 6442450944 to void ()*)() nounwind
+ ret i32 0
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll
new file mode 100644
index 0000000000..19208025a4
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM64/lsr-memset.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -O3 -march=arm64 -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s
+; <rdar://problem/11635990> [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys
+;
+; LSR on loop %while.cond should reassociate non-address mode
+; expressions at use %cmp16 to avoid sinking computation into %while.body18.
+;
+; Remove the -pre-RA-sched=list-hybrid option after fixing:
+; <rdar://problem/12702735> [ARM64][coalescer] need better register
+; coalescing for simple unit tests.
+
+; CHECK: @memset
+; CHECK: %while.body18{{$}}
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8
+; First set the IVREG variable, then use it
+; CHECK-NEXT: sub [[IVREG:x[0-9]+]],
+; CHECK: [[IVREG]], #8
+; CHECK-NEXT: cmp [[IVREG]], #7
+; CHECK-NEXT: b.hi
+define i8* @memset(i8* %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat {
+entry:
+ %cmp = icmp eq i64 %len, 0
+ br i1 %cmp, label %done, label %while.cond.preheader
+
+while.cond.preheader: ; preds = %entry
+ %conv = trunc i32 %val to i8
+ br label %while.cond
+
+while.cond: ; preds = %while.body, %while.cond.preheader
+ %ptr.0 = phi i8* [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ]
+ %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ]
+ %cond = icmp eq i64 %len.addr.0, 0
+ br i1 %cond, label %done, label %land.rhs
+
+land.rhs: ; preds = %while.cond
+ %0 = ptrtoint i8* %ptr.0 to i64
+ %and = and i64 %0, 7
+ %cmp5 = icmp eq i64 %and, 0
+ br i1 %cmp5, label %if.end9, label %while.body
+
+while.body: ; preds = %land.rhs
+ %incdec.ptr = getelementptr inbounds i8* %ptr.0, i64 1
+ store i8 %conv, i8* %ptr.0, align 1, !tbaa !0
+ %dec = add i64 %len.addr.0, -1
+ br label %while.cond
+
+if.end9: ; preds = %land.rhs
+ %conv.mask = and i32 %val, 255
+ %1 = zext i32 %conv.mask to i64
+ %2 = shl nuw nsw i64 %1, 8
+ %ins18 = or i64 %2, %1
+ %3 = shl nuw nsw i64 %1, 16
+ %ins15 = or i64 %ins18, %3
+ %4 = shl nuw nsw i64 %1, 24
+ %5 = shl nuw nsw i64 %1, 32
+ %mask8 = or i64 %ins15, %4
+ %6 = shl nuw nsw i64 %1, 40
+ %mask5 = or i64 %mask8, %5
+ %7 = shl nuw nsw i64 %1, 48
+ %8 = shl nuw i64 %1, 56
+ %mask2.masked = or i64 %mask5, %6
+ %mask = or i64 %mask2.masked, %7
+ %ins = or i64 %mask, %8
+ %9 = bitcast i8* %ptr.0 to i64*
+ %cmp1636 = icmp ugt i64 %len.addr.0, 7
+ br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph
+
+while.body18: ; preds = %if.end9, %while.body18
+ %wideptr.038 = phi i64* [ %incdec.ptr19, %while.body18 ], [ %9, %if.end9 ]
+ %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ]
+ %incdec.ptr19 = getelementptr inbounds i64* %wideptr.038, i64 1
+ store i64 %ins, i64* %wideptr.038, align 8, !tbaa !2
+ %sub = add i64 %len.addr.137, -8
+ %cmp16 = icmp ugt i64 %sub, 7
+ br i1 %cmp16, label %while.body18, label %while.end20
+
+while.end20: ; preds = %while.body18
+ %cmp21 = icmp eq i64 %sub, 0
+ br i1 %cmp21, label %done, label %while.body29.lr.ph
+
+while.body29.lr.ph: ; preds = %while.end20, %if.end9
+ %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ]
+ %wideptr.0.lcssa48 = phi i64* [ %incdec.ptr19, %while.end20 ], [ %9, %if.end9 ]
+ %10 = bitcast i64* %wideptr.0.lcssa48 to i8*
+ br label %while.body29
+
+while.body29: ; preds = %while.body29, %while.body29.lr.ph
+ %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ]
+ %ptr.134 = phi i8* [ %10, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ]
+ %dec26 = add i64 %len.addr.235, -1
+ %incdec.ptr31 = getelementptr inbounds i8* %ptr.134, i64 1
+ store i8 %conv, i8* %ptr.134, align 1, !tbaa !0
+ %cmp27 = icmp eq i64 %dec26, 0
+ br i1 %cmp27, label %done, label %while.body29
+
+done: ; preds = %while.cond, %while.body29, %while.end20, %entry
+ ret i8* %dest
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
+!2 = metadata !{metadata !"long long", metadata !0}