diff options
author | Nadav Rotem <nadav.rotem@intel.com> | 2012-01-15 19:27:55 +0000 |
---|---|---|
committer | Nadav Rotem <nadav.rotem@intel.com> | 2012-01-15 19:27:55 +0000 |
commit | cc6165695fa1713230184d743368b8b3642faa5d (patch) | |
tree | 51c2d38fe6efb00576d04673458ac01d90c09ba5 /lib | |
parent | ed4c8c633c52a40ad1a3e8687f290be4aeb1f0e8 (diff) | |
download | llvm-cc6165695fa1713230184d743368b8b3642faa5d.tar.gz llvm-cc6165695fa1713230184d743368b8b3642faa5d.tar.bz2 llvm-cc6165695fa1713230184d743368b8b3642faa5d.tar.xz |
[AVX] Optimize x86 VSELECT instructions using SimplifyDemandedBits.
We know that the blend instructions only use the MSB, so if the mask is
sign-extended then we can convert it into a SHL instruction. This is a
common pattern because the type-legalizer sign-extends the i1 type which
is used by the LLVM-IR for the condition.
Added a new optimization in SimplifyDemandedBits for SIGN_EXTEND_INREG -> SHL.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148225 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 23 |
2 files changed, 44 insertions, 6 deletions
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index bd930d8ef1..5574b21441 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1608,23 +1608,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == DemandedMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + EVT InVT = Op.getOperand(0).getValueType(); + EVT ShTy = getShiftAmountTy(InVT); + // In this code we may handle vector types. We can't use the + // getShiftAmountTy API because it only works on scalars. + // We use the shift value type because we know that its an integer + // with enough bits. + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, + Op.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, ShiftAmt)); + } // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()); + BitWidth - ExVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, - EVT.getScalarType().getSizeInBits()) & + ExVT.getScalarType().getSizeInBits()) & NewMask; // Since the sign extended bits are demanded, we know that the sign @@ -1642,7 +1659,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, - TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 68cd44116d..b5198c8510 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12868,6 +12868,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); SDValue Cond = N->getOperand(0); @@ -13144,6 +13145,26 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } + // If we know that this node is legal then we know that it is going to be + // matched by one of the SSE/AVX BLEND instructions. These instructions only + // depend on the highest bit in each word. Try to use SimplifyDemandedBits + // to simplify previous instructions. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && + !DCI.isBeforeLegalize() && + TLI.isOperationLegal(ISD::VSELECT, VT)) { + unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), + DCI.isBeforeLegalizeOps()); + if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || + TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + return SDValue(); } @@ -14609,7 +14630,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::VSELECT: - case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); + case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); |