diff options
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r-- | test/CodeGen/X86/MergeConsecutiveStores.ll | 150 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce-2.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce-3.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/loop-strength-reduce.ll | 5 |
4 files changed, 158 insertions, 12 deletions
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll new file mode 100644 index 0000000000..435f38c8ad --- /dev/null +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -0,0 +1,150 @@ +; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } + +@a = common global [10000 x %struct.A] zeroinitializer, align 8 + +; Move all of the constants using a single vector store. +; CHECK: merge_const_store +; CHECK: movq %xmm0 +; CHECK: ret +define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge +.lr.ph: + %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] + %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] + %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 1, i8* %2, align 1 + %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + store i8 2, i8* %3, align 1 + %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 + store i8 3, i8* %4, align 1 + %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 + store i8 4, i8* %5, align 1 + %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 + store i8 5, i8* %6, align 1 + %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 + store i8 6, i8* %7, align 1 + %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 + store i8 7, i8* %8, align 1 + %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 + store i8 8, i8* %9, align 1 + %10 = add nsw i32 %i.02, 1 + %11 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %10, %count + br i1 %exitcond, label %._crit_edge, label %.lr.ph +._crit_edge: + ret void +} + +; Move the first 4 constants as a single vector. Move the rest as scalars. +; CHECK: merge_nonconst_store +; CHECK: movd %xmm0 +; CHECK: movb +; CHECK: movb +; CHECK: movb +; CHECK: movb +; CHECK: ret +define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge +.lr.ph: + %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] + %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ] + %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 1, i8* %2, align 1 + %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + store i8 2, i8* %3, align 1 + %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2 + store i8 3, i8* %4, align 1 + %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3 + store i8 4, i8* %5, align 1 + %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4 + store i8 %zz, i8* %6, align 1 ; <----------- Not a const; + %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5 + store i8 6, i8* %7, align 1 + %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6 + store i8 7, i8* %8, align 1 + %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7 + store i8 8, i8* %9, align 1 + %10 = add nsw i32 %i.02, 1 + %11 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %10, %count + br i1 %exitcond, label %._crit_edge, label %.lr.ph +._crit_edge: + ret void +} + + +;CHECK: merge_loads +; load: +;CHECK: movw +; store: +;CHECK: movw +;CHECK: ret +define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 + %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 + br label %4 + +; <label>:4 ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ] + %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ] + %5 = load i8* %2, align 1 + %6 = load i8* %3, align 1 + %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 %5, i8* %7, align 1 + %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + store i8 %6, i8* %8, align 1 + %9 = add nsw i32 %i.02, 1 + %10 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %9, %count + br i1 %exitcond, label %._crit_edge, label %4 + +._crit_edge: ; preds = %4, %0 + ret void +} + +; The loads and the stores are interleved. Can't merge them. +;CHECK: no_merge_loads +;CHECK: movb +;CHECK: movb +;CHECK: movb +;CHECK: movb +;CHECK: ret +define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0 + %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1 + br label %a4 + +a4: ; preds = %4, %.lr.ph + %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ] + %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ] + %a5 = load i8* %2, align 1 + %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0 + store i8 %a5, i8* %a7, align 1 + %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1 + %a6 = load i8* %3, align 1 + store i8 %a6, i8* %a8, align 1 + %a9 = add nsw i32 %i.02, 1 + %a10 = getelementptr inbounds %struct.A* %.01, i64 1 + %exitcond = icmp eq i32 %a9, %count + br i1 %exitcond, label %._crit_edge, label %a4 + +._crit_edge: ; preds = %4, %0 + ret void +} + + diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll index b546462b68..b094fed2f6 100644 --- a/test/CodeGen/X86/loop-strength-reduce-2.ll +++ b/test/CodeGen/X86/loop-strength-reduce-2.ll @@ -1,20 +1,18 @@ -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC -; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC +; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=pic | FileCheck %s -check-prefix=PIC +; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s -check-prefix=STATIC ; ; Make sure the common loop invariant A is hoisted up to preheader, ; since too many registers are needed to subsume it into the addressing modes. ; It's safe to sink A in when it's not pic. ; PIC: align -; PIC: movl $4, -4([[REG:%e[a-z]+]]) -; PIC: movl $5, ([[REG]]) +; PIC: movlpd %xmm0, -4([[REG:%e[a-z]+]]) ; PIC: addl $4, [[REG]] ; PIC: decl {{%e[[a-z]+}} ; PIC: jne ; STATIC: align -; STATIC: movl $4, -4(%ecx) -; STATIC: movl $5, (%ecx) +; STATIC: movlpd %xmm0, -4(%ecx) ; STATIC: addl $4, %ecx ; STATIC: decl %eax ; STATIC: jne diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll index b1c9fb9c07..e2d77f2a6f 100644 --- a/test/CodeGen/X86/loop-strength-reduce-3.ll +++ b/test/CodeGen/X86/loop-strength-reduce-3.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck %s ; CHECK: align -; CHECK: movl $4, -4(%ecx) -; CHECK: movl $5, (%ecx) +; CHECK: movlpd %xmm0, -4(%ecx) ; CHECK: addl $4, %ecx ; CHECK: decl %eax ; CHECK: jne diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll index 42c6ac4983..d197451eee 100644 --- a/test/CodeGen/X86/loop-strength-reduce.ll +++ b/test/CodeGen/X86/loop-strength-reduce.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s ; CHECK: align -; CHECK: movl $4, -4(%ecx) -; CHECK: movl $5, (%ecx) +; CHECK: movlpd %xmm0, -4(%ecx) ; CHECK: addl $4, %ecx ; CHECK: decl %eax ; CHECK: jne |