summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2014-06-23 21:55:40 +0000
committerJuergen Ributzka <juergen@apple.com>2014-06-23 21:55:40 +0000
commitd0976a3d200016d49448d0b08234220a7f07eb3d (patch)
tree7da6ee41d338bfcb90942cff3c95b94ba32c6641 /lib/Target/X86/X86FastISel.cpp
parent5f4e6e1ec0154137efb15ea497ebd8cbe1850ea3 (diff)
downloadllvm-d0976a3d200016d49448d0b08234220a7f07eb3d.tar.gz
llvm-d0976a3d200016d49448d0b08234220a7f07eb3d.tar.bz2
llvm-d0976a3d200016d49448d0b08234220a7f07eb3d.tar.xz
[FastISel][X86] Add support for floating-point select.
This extends the select lowering to support floating-point selects. The lowering depends on SSE instructions and that the conditon comes from a floating-point compare. Under this conditions it is possible to emit an optimized instruction sequence that doesn't require any branches to simulate the select. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211544 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r--lib/Target/X86/X86FastISel.cpp128
1 files changed, 128 insertions, 0 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3c9acba5cb..af9eaf32a3 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -113,6 +113,8 @@ private:
bool X86FastEmitCMoveSelect(const Instruction *I);
+ bool X86FastEmitSSESelect(const Instruction *I);
+
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@@ -235,6 +237,41 @@ getX86ConditonCode(CmpInst::Predicate Predicate) {
return std::make_pair(CC, NeedSwap);
}
+static std::pair<unsigned, bool>
+getX86SSECondtionCode(CmpInst::Predicate Predicate) {
+ unsigned CC;
+ bool NeedSwap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (Predicate) {
+ default: llvm_unreachable("Unexpected predicate");
+ case CmpInst::FCMP_OEQ: CC = 0; break;
+ case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLT: CC = 1; break;
+ case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLE: CC = 2; break;
+ case CmpInst::FCMP_UNO: CC = 3; break;
+ case CmpInst::FCMP_UNE: CC = 4; break;
+ case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGE: CC = 5; break;
+ case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGT: CC = 6; break;
+ case CmpInst::FCMP_ORD: CC = 7; break;
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_ONE: CC = 8; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
@@ -1728,6 +1765,93 @@ bool X86FastISel::X86FastEmitCMoveSelect(const Instruction *I) {
return true;
}
+/// \brief Emit SSE instructions to lower the select.
+///
+/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
+/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
+/// SSE instructions are available.
+bool X86FastISel::X86FastEmitSSESelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
+ if (!CI)
+ return false;
+
+ if (I->getType() != CI->getOperand(0)->getType() ||
+ !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
+ (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
+ }
+
+ unsigned CC;
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86SSECondtionCode(Predicate);
+ if (CC > 7)
+ return false;
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ static unsigned OpcTable[2][2][4] = {
+ { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
+ { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
+ { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ };
+
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned *Opc = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned CmpLHSReg = getRegForValue(CmpLHS);
+ bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
+
+ unsigned CmpRHSReg = getRegForValue(CmpRHS);
+ bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
+
+ if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
bool X86FastISel::X86SelectSelect(const Instruction *I) {
MVT RetVT;
if (!isTypeLegal(I->getType(), RetVT))
@@ -1762,6 +1886,10 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
if (X86FastEmitCMoveSelect(I))
return true;
+ // Try to use a sequence of SSE instructions to simulate a conditonal move.
+ if (X86FastEmitSSESelect(I))
+ return true;
+
return false;
}