From d0976a3d200016d49448d0b08234220a7f07eb3d Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Mon, 23 Jun 2014 21:55:40 +0000 Subject: [FastISel][X86] Add support for floating-point select. This extends the select lowering to support floating-point selects. The lowering depends on SSE instructions and that the conditon comes from a floating-point compare. Under this conditions it is possible to emit an optimized instruction sequence that doesn't require any branches to simulate the select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211544 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 128 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3c9acba5cb..af9eaf32a3 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -113,6 +113,8 @@ private: bool X86FastEmitCMoveSelect(const Instruction *I); + bool X86FastEmitSSESelect(const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -235,6 +237,41 @@ getX86ConditonCode(CmpInst::Predicate Predicate) { return std::make_pair(CC, NeedSwap); } +static std::pair +getX86SSECondtionCode(CmpInst::Predicate Predicate) { + unsigned CC; + bool NeedSwap = false; + + // SSE Condition code mapping: + // 0 - EQ + // 1 - LT + // 2 - LE + // 3 - UNORD + // 4 - NEQ + // 5 - NLT + // 6 - NLE + // 7 - ORD + switch (Predicate) { + default: llvm_unreachable("Unexpected predicate"); + case CmpInst::FCMP_OEQ: CC = 0; break; + case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through + case CmpInst::FCMP_OLT: CC = 1; break; + case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through + case CmpInst::FCMP_OLE: CC = 2; break; + case CmpInst::FCMP_UNO: CC = 3; break; + case CmpInst::FCMP_UNE: CC = 4; break; + case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through + case CmpInst::FCMP_UGE: CC = 5; break; + case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through + case CmpInst::FCMP_UGT: CC = 6; break; + case CmpInst::FCMP_ORD: CC = 7; break; + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_ONE: CC = 8; break; + } + + return std::make_pair(CC, NeedSwap); +} + bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) @@ -1728,6 +1765,93 @@ bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) { return true; } +/// \brief Emit SSE instructions to lower the select. +/// +/// Try to use SSE1/SSE2 instructions to simulate a select without branches. +/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary +/// SSE instructions are available. +bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) { + MVT RetVT; + if (!isTypeLegal(I->getType(), RetVT)) + return false; + + const auto *CI = dyn_cast(I->getOperand(0)); + if (!CI) + return false; + + if (I->getType() != CI->getOperand(0)->getType() || + !((Subtarget->hasSSE1() && RetVT == MVT::f32) || + (Subtarget->hasSSE2() && RetVT == MVT::f64) )) + return false; + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. + // We don't have to materialize a zero constant for this case and can just use + // %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *CmpRHSC = dyn_cast(CmpRHS); + if (CmpRHSC && CmpRHSC->isNullValue()) + CmpRHS = CmpLHS; + } + + unsigned CC; + bool NeedSwap; + std::tie(CC, NeedSwap) = getX86SSECondtionCode(Predicate); + if (CC > 7) + return false; + + if (NeedSwap) + std::swap(CmpLHS, CmpRHS); + + static unsigned OpcTable[2][2][4] = { + { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, + { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, + { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, + { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } + }; + + bool HasAVX = Subtarget->hasAVX(); + unsigned *Opc = nullptr; + switch (RetVT.SimpleTy) { + default: return false; + case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; + case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; + } + + const Value *LHS = I->getOperand(1); + const Value *RHS = I->getOperand(2); + + unsigned LHSReg = getRegForValue(LHS); + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned RHSReg = getRegForValue(RHS); + bool RHSIsKill = hasTrivialKill(RHS); + + unsigned CmpLHSReg = getRegForValue(CmpLHS); + bool CmpLHSIsKill = hasTrivialKill(CmpLHS); + + unsigned CmpRHSReg = getRegForValue(CmpRHS); + bool CmpRHSIsKill = hasTrivialKill(CmpRHS); + + if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) + return false; + + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, + LHSReg, LHSIsKill); + unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, + RHSReg, RHSIsKill); + unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, + AndReg, /*IsKill=*/true); + UpdateValueMap(I, ResultReg); + return true; +} + bool X86FastISel::X86SelectSelect(const Instruction *I) { MVT RetVT; if (!isTypeLegal(I->getType(), RetVT)) @@ -1762,6 +1886,10 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { if (X86FastEmitCMoveSelect(I)) return true; + // Try to use a sequence of SSE instructions to simulate a conditonal move. + if (X86FastEmitSSESelect(I)) + return true; + return false; } -- cgit v1.2.3