diff options
author | Jim Grosbach <grosbach@apple.com> | 2013-11-22 19:57:47 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2013-11-22 19:57:47 +0000 |
commit | e1af5f6ad178e76429b58759042f061247d90435 (patch) | |
tree | 1d793a0ec2a79a8a05dc9d445d48bdf998050808 /test | |
parent | 22bc1320b5e8e9ac8007686e73f0c55493254188 (diff) | |
download | llvm-e1af5f6ad178e76429b58759042f061247d90435.tar.gz llvm-e1af5f6ad178e76429b58759042f061247d90435.tar.bz2 llvm-e1af5f6ad178e76429b58759042f061247d90435.tar.xz |
X86: Perform integer comparisons at i32 or larger.
Utilizing the 8 and 16 bit comparison instructions, even when an input can
be folded into the comparison instruction itself, is typically not worth it.
There are too many partial register stalls as a result, leading to significant
slowdowns. By always performing comparisons on at least 32-bit
registers, performance of the calculation chain leading to the
comparison improves. Continue to use the smaller comparisons when
minimizing size, as that allows better folding of loads into the
comparison instructions.
rdar://15386341
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195496 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/2007-10-17-IllegalAsm.ll | 87 | ||||
-rw-r--r-- | test/CodeGen/X86/3addr-16bit.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/codegen-prepare-extload.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/ctpop-combine.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/memcmp.ll | 22 | ||||
-rw-r--r-- | test/CodeGen/X86/shrink-compare.ll | 8 |
6 files changed, 21 insertions, 106 deletions
diff --git a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/test/CodeGen/X86/2007-10-17-IllegalAsm.ll deleted file mode 100644 index c0bb55ed14..0000000000 --- a/test/CodeGen/X86/2007-10-17-IllegalAsm.ll +++ /dev/null @@ -1,87 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x -; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x -; PR1734 - -target triple = "x86_64-unknown-linux-gnu" - %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } - %struct.eh_status = type opaque - %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** } - %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } - %struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 } - %struct.initial_value_struct = type opaque - %struct.lang_decl = type opaque - %struct.language_function = type opaque - %struct.location_t = type { i8*, i32 } - %struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 } - %struct.rtunion = type { i8* } - %struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] } - %struct.rtx_def = type { i16, i8, i8, %struct.u } - %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* } - %struct.stack_local_entry = type opaque - %struct.temp_slot = type opaque - %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 } - %struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* } - %struct.tree_decl_u1 = type { i64 } - %struct.tree_decl_u2 = type { %struct.function* } - %struct.tree_node = type { %struct.tree_decl } - %struct.u = type { [1 x %struct.rtunion] } - %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } - %struct.varasm_status = type opaque - %struct.varray_data = type { [1 x i64] } - %struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data } - %union.tree_ann_d = type opaque - -define void @layout_type(%struct.tree_node* %type) { -entry: - %tmp32 = load i32* null, align 8 ; <i32> [#uses=3] - %tmp3435 = trunc i32 %tmp32 to i8 ; <i8> [#uses=1] - %tmp53 = icmp eq %struct.tree_node* null, null ; <i1> [#uses=1] - br i1 %tmp53, label %cond_next57, label %UnifiedReturnBlock - -cond_next57: ; preds = %entry - %tmp65 = and i32 %tmp32, 255 ; <i32> [#uses=1] - switch i32 %tmp65, label %UnifiedReturnBlock [ - i32 6, label %bb140 - i32 7, label %bb140 - i32 8, label %bb140 - i32 13, label %bb478 - ] - -bb140: ; preds = %cond_next57, %cond_next57, %cond_next57 - %tmp219 = load i32* null, align 8 ; <i32> [#uses=1] - %tmp221222 = trunc i32 %tmp219 to i8 ; <i8> [#uses=1] - %tmp223 = icmp eq i8 %tmp221222, 24 ; <i1> [#uses=1] - br i1 %tmp223, label %cond_true226, label %cond_next340 - -cond_true226: ; preds = %bb140 - switch i8 %tmp3435, label %cond_true288 [ - i8 6, label %cond_next340 - i8 9, label %cond_next340 - i8 7, label %cond_next340 - i8 8, label %cond_next340 - i8 10, label %cond_next340 - ] - -cond_true288: ; preds = %cond_true226 - unreachable - -cond_next340: ; preds = %cond_true226, %cond_true226, %cond_true226, %cond_true226, %cond_true226, %bb140 - ret void - -bb478: ; preds = %cond_next57 - br i1 false, label %cond_next500, label %cond_true497 - -cond_true497: ; preds = %bb478 - unreachable - -cond_next500: ; preds = %bb478 - %tmp513 = load i32* null, align 8 ; <i32> [#uses=1] - %tmp545 = and i32 %tmp513, 8192 ; <i32> [#uses=1] - %tmp547 = and i32 %tmp32, -8193 ; <i32> [#uses=1] - %tmp548 = or i32 %tmp547, %tmp545 ; <i32> [#uses=1] - store i32 %tmp548, i32* null, align 8 - ret void - -UnifiedReturnBlock: ; preds = %cond_next57, %entry - ret void -} diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll index fafdfdb748..2d6a5e7665 100644 --- a/test/CodeGen/X86/3addr-16bit.ll +++ b/test/CodeGen/X86/3addr-16bit.ll @@ -34,7 +34,7 @@ entry: ; 64BIT-LABEL: t2: ; 64BIT-NOT: movw %si, %ax -; 64BIT: decl %eax +; 64BIT: leal -1(%rsi), %eax ; 64BIT: movzwl %ax %0 = icmp eq i16 %k, %c ; <i1> [#uses=1] %1 = add i16 %k, -1 ; <i16> [#uses=3] @@ -59,7 +59,7 @@ entry: ; 64BIT-LABEL: t3: ; 64BIT-NOT: movw %si, %ax -; 64BIT: addl $2, %eax +; 64BIT: leal 2(%rsi), %eax %0 = add i16 %k, 2 ; <i16> [#uses=3] %1 = icmp eq i16 %k, %c ; <i1> [#uses=1] br i1 %1, label %bb, label %bb1 @@ -82,7 +82,7 @@ entry: ; 64BIT-LABEL: t4: ; 64BIT-NOT: movw %si, %ax -; 64BIT: addl %edi, %eax +; 64BIT: leal (%rsi,%rdi), %eax %0 = add i16 %k, %c ; <i16> [#uses=3] %1 = icmp eq i16 %k, %c ; <i1> [#uses=1] br i1 %1, label %bb, label %bb1 diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll index 14df815663..9320706d97 100644 --- a/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/test/CodeGen/X86/codegen-prepare-extload.ll @@ -5,7 +5,7 @@ ; CodeGenPrepare should move the zext into the block with the load ; so that SelectionDAG can select it with the load. -; CHECK: movzbl ({{%rdi|%rcx}}), %eax +; CHECK: movsbl ({{%rdi|%rcx}}), %eax define void @foo(i8* %p, i32* %q) { entry: diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll index 786f7f9b1c..463505bd95 100644 --- a/test/CodeGen/X86/ctpop-combine.ll +++ b/test/CodeGen/X86/ctpop-combine.ll @@ -35,6 +35,6 @@ define i32 @test3(i64 %x) nounwind readnone { %conv = zext i1 %cmp to i32 ret i32 %conv ; CHECK-LABEL: test3: -; CHECK: cmpb $2 +; CHECK: cmpl $2 ; CHECK: ret } diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll index cb0797d3eb..0a534926c6 100644 --- a/test/CodeGen/X86/memcmp.ll +++ b/test/CodeGen/X86/memcmp.ll @@ -22,8 +22,9 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp2: -; CHECK: movw ([[A0:%rdi|%rcx]]), %ax -; CHECK: cmpw ([[A1:%rsi|%rdx]]), %ax +; CHECK: movzwl +; CHECK-NEXT: movzwl +; CHECK-NEXT: cmpl ; NOBUILTIN-LABEL: memcmp2: ; NOBUILTIN: callq } @@ -41,7 +42,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp2a: -; CHECK: cmpw $28527, ([[A0]]) +; CHECK: movzwl +; CHECK-NEXT: cmpl $28527, } @@ -58,8 +60,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp4: -; CHECK: movl ([[A0]]), %eax -; CHECK: cmpl ([[A1]]), %eax +; CHECK: movl +; CHECK-NEXT: cmpl } define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind { @@ -75,7 +77,7 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp4a: -; CHECK: cmpl $1869573999, ([[A0]]) +; CHECK: cmpl $1869573999, } define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { @@ -91,8 +93,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp8: -; CHECK: movq ([[A0]]), %rax -; CHECK: cmpq ([[A1]]), %rax +; CHECK: movq +; CHECK: cmpq } define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind { @@ -108,7 +110,7 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp8a: -; CHECK: movabsq $8029759185026510694, %rax -; CHECK: cmpq %rax, ([[A0]]) +; CHECK: movabsq $8029759185026510694, +; CHECK: cmpq } diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll index bb892011e2..fc7ee061f3 100644 --- a/test/CodeGen/X86/shrink-compare.ll +++ b/test/CodeGen/X86/shrink-compare.ll @@ -2,7 +2,7 @@ declare void @bar() -define void @test1(i32* nocapture %X) nounwind { +define void @test1(i32* nocapture %X) nounwind minsize { entry: %tmp1 = load i32* %X, align 4 %and = and i32 %tmp1, 255 @@ -19,7 +19,7 @@ if.end: ; CHECK: cmpb $47, (%{{rdi|rcx}}) } -define void @test2(i32 %X) nounwind { +define void @test2(i32 %X) nounwind minsize { entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 47 @@ -35,7 +35,7 @@ if.end: ; CHECK: cmpb $47, %{{dil|cl}} } -define void @test3(i32 %X) nounwind { +define void @test3(i32 %X) nounwind minsize { entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 255 @@ -70,7 +70,7 @@ lor.end: ; preds = %lor.rhs, %entry @x = global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 1 }, align 4 ; PR16551 -define void @test5(i32 %X) nounwind { +define void @test5(i32 %X) nounwind minsize { entry: %bf.load = load i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4 %bf.lshr = lshr i56 %bf.load, 32 |