summaryrefslogtreecommitdiff
path: root/test/CodeGen/ARM
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2012-09-17 23:03:25 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2012-09-17 23:03:25 +0000
commit87f7864c6d81ae134335b8271ac12c937c81dffc (patch)
tree775f7cdefe682b919193144641cebe372ce0326c /test/CodeGen/ARM
parent98279e8d65fe5c86d0370b3e2a62f244985bec33 (diff)
downloadllvm-87f7864c6d81ae134335b8271ac12c937c81dffc.tar.gz
llvm-87f7864c6d81ae134335b8271ac12c937c81dffc.tar.bz2
llvm-87f7864c6d81ae134335b8271ac12c937c81dffc.tar.xz
Merge into undefined lanes under -new-coalescer.
Add LIS::pruneValue() and extendToIndices(). These two functions are used by the register coalescer when merging two live ranges requires more than a trivial value mapping as supported by LiveInterval::join(). The pruneValue() function can remove the part of a value number that is going to conflict in join(). Afterwards, extendToIndices can restore the live range, using any new dominating value numbers and updating the SSA form. Use this complex value mapping to support merging a register into a vector lane that has a conflicting value, but the clobbered lane is undef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164074 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll50
1 files changed, 49 insertions, 1 deletions
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index fb0f4c67c9..dfb5b17306 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a9 -new-coalescer | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios0.0.0"
@@ -66,3 +66,51 @@ do.end: ; preds = %do.body
declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+; CHECK: f3
+; This function has lane insertions that span basic blocks.
+; The trivial REG_SEQUENCE lowering can't handle that, but the coalescer can.
+;
+; void f3(float *p, float *q) {
+; float32x2_t x;
+; x[1] = p[3];
+; if (q)
+; x[0] = q[0] + q[1];
+; else
+; x[0] = p[2];
+; vst1_f32(p+4, x);
+; }
+;
+; CHECK-NOT: vmov
+; CHECK-NOT: vorr
+define void @f3(float* %p, float* %q) nounwind ssp {
+entry:
+ %arrayidx = getelementptr inbounds float* %p, i32 3
+ %0 = load float* %arrayidx, align 4
+ %vecins = insertelement <2 x float> undef, float %0, i32 1
+ %tobool = icmp eq float* %q, null
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %1 = load float* %q, align 4
+ %arrayidx2 = getelementptr inbounds float* %q, i32 1
+ %2 = load float* %arrayidx2, align 4
+ %add = fadd float %1, %2
+ %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
+ br label %if.end
+
+if.else: ; preds = %entry
+ %arrayidx4 = getelementptr inbounds float* %p, i32 2
+ %3 = load float* %arrayidx4, align 4
+ %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
+ %add.ptr = getelementptr inbounds float* %p, i32 4
+ %4 = bitcast float* %add.ptr to i8*
+ tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind