From 59889f7f496f549965764820a0150c4068c02f5b Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sun, 7 Apr 2013 22:11:09 +0000 Subject: Cleanup and improve PPC fsel generation First, we should not cheat: fsel-based lowering of select_cc is a finite-math-only optimization (the ISA manual, section F.3 of v2.06, makes this clear, as does a note in our own README). This also adds fsel-based lowering of EQ and NE condition codes. As it turned out, fsel generation was covered by a grand total of zero regression test cases. I've added some test cases to cover the existing behavior (which is now finite-math only), as well as the new EQ cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179000 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 40 ++++++++-- test/CodeGen/PowerPC/fsel.ll | 137 +++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/PowerPC/fsel.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index cec0b8f3c7..333976b06c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4673,10 +4673,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; - ISD::CondCode CC = cast(Op.getOperand(4))->get(); + // We might be able to do better than this under some circumstances, but in + // general, fsel-based lowering of select is a finite-math-only optimization. + // For more information, see section F.3 of the 2.06 ISA specification. + if (!DAG.getTarget().Options.NoInfsFPMath || + !DAG.getTarget().Options.NoNaNsFPMath) + return Op; - // Cannot handle SETEQ/SETNE. - if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; + ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); @@ -4686,9 +4690,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. + SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. + case ISD::SETNE: + std::swap(TV, FV); + case ISD::SETEQ: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); + if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits + Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt @@ -4711,30 +4726,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. + case ISD::SETNE: + std::swap(TV, FV); + case ISD::SETEQ: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits + Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } diff --git a/test/CodeGen/PowerPC/fsel.ll b/test/CodeGen/PowerPC/fsel.ll new file mode 100644 index 0000000000..8cd43e616b --- /dev/null +++ b/test/CodeGen/PowerPC/fsel.ll @@ -0,0 +1,137 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=CHECK-FM %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define double @zerocmp1(double %a, double %y, double %z) #0 { +entry: + %cmp = fcmp ult double %a, 0.000000e+00 + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y + +; CHECK: @zerocmp1 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @zerocmp1 +; CHECK-FM: fsel 1, 1, 2, 3 +; CHECK-FM: blr +} + +define double @zerocmp2(double %a, double %y, double %z) #0 { +entry: + %cmp = fcmp ogt double %a, 0.000000e+00 + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @zerocmp2 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @zerocmp2 +; CHECK-FM: fneg [[REG:[0-9]+]], 1 +; CHECK-FM: fsel 1, [[REG]], 3, 2 +; CHECK-FM: blr +} + +define double @zerocmp3(double %a, double %y, double %z) #0 { +entry: + %cmp = fcmp oeq double %a, 0.000000e+00 + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @zerocmp3 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @zerocmp3 +; CHECK-FM: fsel [[REG:[0-9]+]], 1, 2, 3 +; CHECK-FM: fneg [[REG2:[0-9]+]], 1 +; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3 +; CHECK-FM: blr +} + +define double @min1(double %a, double %b) #0 { +entry: + %cmp = fcmp ole double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond + +; CHECK: @min1 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @min1 +; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1 +; CHECK-FM: fsel 1, [[REG]], 1, 2 +; CHECK-FM: blr +} + +define double @max1(double %a, double %b) #0 { +entry: + %cmp = fcmp oge double %a, %b + %cond = select i1 %cmp, double %a, double %b + ret double %cond + +; CHECK: @max1 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @max1 +; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 +; CHECK-FM: fsel 1, [[REG]], 1, 2 +; CHECK-FM: blr +} + +define double @cmp1(double %a, double %b, double %y, double %z) #0 { +entry: + %cmp = fcmp ult double %a, %b + %z.y = select i1 %cmp, double %z, double %y + ret double %z.y + +; CHECK: @cmp1 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @cmp1 +; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 +; CHECK-FM: fsel 1, [[REG]], 3, 4 +; CHECK-FM: blr +} + +define double @cmp2(double %a, double %b, double %y, double %z) #0 { +entry: + %cmp = fcmp ogt double %a, %b + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @cmp2 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @cmp2 +; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1 +; CHECK-FM: fsel 1, [[REG]], 4, 3 +; CHECK-FM: blr +} + +define double @cmp3(double %a, double %b, double %y, double %z) #0 { +entry: + %cmp = fcmp oeq double %a, %b + %y.z = select i1 %cmp, double %y, double %z + ret double %y.z + +; CHECK: @cmp3 +; CHECK-NOT: fsel +; CHECK: blr + +; CHECK-FM: @cmp3 +; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 +; CHECK-FM: fsel [[REG2:[0-9]+]], [[REG]], 3, 4 +; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]] +; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4 +; CHECK-FM: blr +} + +attributes #0 = { nounwind readnone } + -- cgit v1.2.3