From 16f81f783207fa359f3afc589e2135d4805c9b98 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 25 Nov 2013 05:20:10 +0000 Subject: Merging r195476: ------------------------------------------------------------------------ r195476 | hliao | 2013-11-22 09:56:57 -0800 (Fri, 22 Nov 2013) | 6 lines Fix PR18014 - When simplifying the mask generation for BLEND, check whether that mask is also consumed by other non-BLEND insns. If true, skip that simplification. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 +++++++++ test/CodeGen/X86/pr18014.ll | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 test/CodeGen/X86/pr18014.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9df0232a34..627aa86e75 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17007,6 +17007,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (BitWidth == 1) return SDValue(); + // Check all uses of that condition operand to check whether it will be + // consumed by non-BLEND instructions, which may depend on all bits are set + // properly. + for (SDNode::use_iterator I = Cond->use_begin(), + E = Cond->use_end(); I != E; ++I) + if (I->getOpcode() != ISD::VSELECT) + // TODO: Add other opcodes eventually lowered into BLEND. + return SDValue(); + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); diff --git a/test/CodeGen/X86/pr18014.ll b/test/CodeGen/X86/pr18014.ll new file mode 100644 index 0000000000..e3860b88bf --- /dev/null +++ b/test/CodeGen/X86/pr18014.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=penryn | FileCheck %s + +; Ensure PSRAD is generated as the condition is consumed by both PADD and +; BLENDVPS. PAND requires all bits setting properly. + +define <4 x i32> @foo(<4 x i32>* %p, <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { + %sext_cond = sext <4 x i1> %cond to <4 x i32> + %t1 = add <4 x i32> %v1, %sext_cond + %t2 = select <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2 + store <4 x i32> %t2, <4 x i32>* %p + ret <4 x i32> %t1 +; CHECK: foo +; CHECK: pslld +; CHECK: psrad +; CHECK: ret +} -- cgit v1.2.3