diff options
author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2011-03-09 19:27:06 +0000 |
---|---|---|
committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2011-03-09 19:27:06 +0000 |
commit | 5d96e5a1ccbdc4d64db4e15418392bb7c61e4d6f (patch) | |
tree | b48fc27fb9e25bb4271021437c25a3d3f5281d1f /test/CodeGen | |
parent | dda386c44d994d59593f5281155378fbe865d364 (diff) | |
download | llvm-5d96e5a1ccbdc4d64db4e15418392bb7c61e4d6f.tar.gz llvm-5d96e5a1ccbdc4d64db4e15418392bb7c61e4d6f.tar.bz2 llvm-5d96e5a1ccbdc4d64db4e15418392bb7c61e4d6f.tar.xz |
Make physreg coalescing independent on the number of uses of the virtual register.
The damage done by physreg coalescing only depends on the number of instructions
the extended physreg live range covers. This fixes PR9438.
The heuristic is still luck-based, and physreg coalescing really should be
disabled completely. We need a register allocator with better hinting support
before that is possible.
Convert a test to FileCheck and force spilling by inserting an extra call. The
previous spilling behavior was dependent on misguided physreg coalescing
decisions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@127351 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen')
-rw-r--r-- | test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/X86/fold-pcmpeqd-2.ll | 17 |
2 files changed, 36 insertions, 3 deletions
diff --git a/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll new file mode 100644 index 0000000000..e48edf7e30 --- /dev/null +++ b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll @@ -0,0 +1,22 @@ +; RUN: llc -mcpu=yonah < %s +; PR9438 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-unknown-freebsd9.0" + +; The 'call fastcc' ties down %ebx, %ecx, and %edx. +; A MUL8r ties down %al, leaving no GR32_ABCD registers available. +; The coalescer can easily overallocate physical registers, +; and register allocation fails. + +declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind + +define i32 @cvtchar(i8* nocapture %sp) nounwind { + %temp.i = alloca [2 x i8], align 1 + %tmp1 = load i8* %sp, align 1 + %div = udiv i8 %tmp1, 10 + %rem = urem i8 %div, 10 + %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0 + store i8 %rem, i8* %arrayidx.i, align 1 + %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind + ret i32 undef +} diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll index 49f879504e..c85a97a3fa 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -1,10 +1,20 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1 +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -; This testcase should need to spill the -1 value on x86-32, +; This testcase should need to spill the -1 value on both x86-32 and x86-64, ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it ; should use a constant-pool load instead. +; Constant pool all-ones vector: +; CHECK: .long 4294967295 +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .long 4294967295 +; CHECK-NEXT: .long 4294967295 + +; No pcmpeqd instructions, everybody uses the constant pool. +; CHECK: program_1: +; CHECK-NOT: pcmpeqd + %struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }> %struct._cl_image_format_t = type <{ i32, i32, i32 }> %struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }> @@ -57,6 +67,7 @@ forbody: ; preds = %forcond %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1] + call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1] %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1] %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] |