From 42febc6e9963f82d5c56c3c7e6afe5e00769af41 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 11 May 2011 14:40:50 +0000 Subject: Fixes a bug in the DAGCombiner. LoadSDNodes have two values (data, chain). If there is a store after the load node, then there is a chain, which means that there is another user. Thus, asking hasOneUser would fail. Instead we ask hasNUsesOfValue on the 'data' value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@131183 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- test/CodeGen/ARM/vstlane.ll | 12 ++++++++---- test/CodeGen/X86/2011-05-09-loaduse.ll | 13 +++++++++++++ test/CodeGen/X86/vec_extract-sse4.ll | 8 ++++---- test/CodeGen/X86/vec_extract.ll | 6 +++--- 5 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/X86/2011-05-09-loaduse.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9a0afa9eee..131e43cd1c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6566,7 +6566,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index d1bc15ad57..08b72325ed 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -54,7 +54,8 @@ define void @vst1lanef(float* %A, <2 x float>* %B) nounwind { define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst1laneQi8: -;CHECK: vst1.8 {d17[1]}, [r0] +; // Can use scalar load. No need to use vectors. +; // CHE-CK: vst1.8 {d17[1]}, [r0] %tmp1 = load <16 x i8>* %B %tmp2 = extractelement <16 x i8> %tmp1, i32 9 store i8 %tmp2, i8* %A, align 8 @@ -72,7 +73,8 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst1laneQi32: -;CHECK: vst1.32 {d17[1]}, [r0, :32] +; // Can use scalar load. No need to use vectors. +; // CHE-CK: vst1.32 {d17[1]}, [r0, :32] %tmp1 = load <4 x i32>* %B %tmp2 = extractelement <4 x i32> %tmp1, i32 3 store i32 %tmp2, i32* %A, align 8 @@ -82,7 +84,8 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind { ;Check for a post-increment updating store. define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind { ;CHECK: vst1laneQi32_update: -;CHECK: vst1.32 {d17[1]}, [r1, :32]! +; // Can use scalar load. No need to use vectors. +; // CHE-CK: vst1.32 {d17[1]}, [r1, :32]! %A = load i32** %ptr %tmp1 = load <4 x i32>* %B %tmp2 = extractelement <4 x i32> %tmp1, i32 3 @@ -94,7 +97,8 @@ define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind { define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst1laneQf: -;CHECK: vst1.32 {d17[1]}, [r0] +; // Can use scalar load. No need to use vectors. +; // CHE-CK: vst1.32 {d17[1]}, [r0] %tmp1 = load <4 x float>* %B %tmp2 = extractelement <4 x float> %tmp1, i32 3 store float %tmp2, float* %A diff --git a/test/CodeGen/X86/2011-05-09-loaduse.ll b/test/CodeGen/X86/2011-05-09-loaduse.ll new file mode 100644 index 0000000000..8673d7433f --- /dev/null +++ b/test/CodeGen/X86/2011-05-09-loaduse.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s + +;CHECK: test +;CHECK-not: pshufd +;CHECK: ret +define float @test(<4 x float>* %A) nounwind { +entry: + %T = load <4 x float>* %A + %R = extractelement <4 x float> %T, i32 3 + store <4 x float>, <4 x float>* %A + ret float %R +} + diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll index dab5dd144f..f4876543d3 100644 --- a/test/CodeGen/X86/vec_extract-sse4.ll +++ b/test/CodeGen/X86/vec_extract-sse4.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t -; RUN: grep extractps %t | count 1 -; RUN: grep pextrd %t | count 1 +; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t +; RUN: not grep extractps %t +; RUN: not grep pextrd %t ; RUN: not grep pshufd %t -; RUN: not grep movss %t +; RUN: grep movss %t | count 2 define void @t1(float* %R, <4 x float>* %P1) nounwind { %X = load <4 x float>* %P1 diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll index b0137304e8..2c8796bc4f 100644 --- a/test/CodeGen/X86/vec_extract.ll +++ b/test/CodeGen/X86/vec_extract.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t -; RUN: grep movss %t | count 3 +; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse41 -o %t +; RUN: grep movss %t | count 4 ; RUN: grep movhlps %t | count 1 -; RUN: grep pshufd %t | count 1 +; RUN: not grep pshufd %t ; RUN: grep unpckhpd %t | count 1 define void @test1(<4 x float>* %F, float* %f) nounwind { -- cgit v1.2.3