Add PPC FP rounding instructions fri[mnpz]

These instructions are available on the P5x (and later) and on the A2. They implement the standard floating-point rounding operations (floor, trunc, etc.). One caveat: frin (round to nearest) does not implement "ties to even", and so is only enabled in fast-math mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178337 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-03-29 08:57:48 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-03-29 08:57:48 +0000
commit: f5d5c434606161fb017a34cb656fa4aa5a3e076b (patch)
tree: 4641a3e130c65183f13495d0228b9454e7bf7b60 /lib/Target/PowerPC
parent: ef484a376cce3729b45ad86eab5724aa83a61823 (diff)
download: llvm-f5d5c434606161fb017a34cb656fa4aa5a3e076b.tar.gz
llvm-f5d5c434606161fb017a34cb656fa4aa5a3e076b.tar.bz2
llvm-f5d5c434606161fb017a34cb656fa4aa5a3e076b.tar.xz
5 files changed, 64 insertions, 12 deletions
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 27f7157a28..01cd55ee27 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -59,6 +59,8 @@ def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
                                         "Enable the fsqrt instruction">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                         "Enable the stfiwx instruction">;
+def FeatureFPRND     : SubtargetFeature<"fprnd", "HasFPRND", "true",
+                                        "Enable the fri[mnpz] instructions">;
 def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
@@ -76,7 +78,6 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // CMPB         p6, p6x, p7        cmpb
 // DFP          p6, p6x, p7        decimal floating-point instructions
 // FLT_CVT      p7                 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
-// FPRND        p5x, p6, p6x, p7   frim, frin, frip, friz
 // FRE          p5 through p7      fre (vs. fres, available since p3)
 // FRSQRTES     p5 through p7      frsqrtes (vs. frsqrte, available since p3)
 // LFIWAX       p6, p6x, p7        lfiwax
@@ -132,14 +133,14 @@ def : ProcessorModel<"e5500", PPCE5500Model,
                    FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
 def : Processor<"a2", PPCA2Itineraries,
                   [DirectiveA2, FeatureBookE, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
-                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
-               /*, Feature64BitRegs */]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
+                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"a2q", PPCA2Itineraries,
                   [DirectiveA2, FeatureBookE, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, FeatureISEL,
-                   FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
-               /*, Feature64BitRegs */, FeatureQPX]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
+                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>;
 def : Processor<"pwr3", G5Itineraries,
                   [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
                    FeatureSTFIWX, Feature64Bit]>;
@@ -151,19 +152,21 @@ def : Processor<"pwr5", G5Itineraries,
                    FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
 def : Processor<"pwr5x", G5Itineraries,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
+                   Feature64Bit]>;
 def : Processor<"pwr6", G5Itineraries,
                   [DirectivePwr6, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"pwr6x", G5Itineraries,
                   [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
-                   FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+                   FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
+                   Feature64Bit]>;
 def : Processor<"pwr7", G5Itineraries,
                   [DirectivePwr7, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
-                   FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */]>;
+                   FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
+                   FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : Processor<"ppc64", G5Itineraries,
                   [Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 74e811b35c..84af0bbb31 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -158,6 +158,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
+  if (Subtarget->hasFPRND()) {
+    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+
+    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+
+    // frin does not implement "ties to even." Thus, this is safe only in
+    // fast-math mode.
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+    }
+  }
+
   // PowerPC does not have BSWAP, CTPOP or CTTZ
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index ea0be97e0d..af3b57f2ed 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1128,9 +1128,38 @@ let Uses = [RM] in {
   def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
                         "fctiwz $frD, $frB", FPGeneral,
                         [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
   def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
                         "frsp $frD, $frB", FPGeneral,
                         [(set f32:$frD, (fround f64:$frB))]>;
+
+  // The frin -> nearbyint mapping is valid only in fast-math mode.
+  def FRIND  : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frin $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fnearbyint f64:$frB))]>;
+  def FRINS  : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frin $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fnearbyint f32:$frB))]>;
+
+  def FRIPD  : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frip $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fceil f64:$frB))]>;
+  def FRIPS  : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frip $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fceil f32:$frB))]>;
+  def FRIZD  : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "friz $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (ftrunc f64:$frB))]>;
+  def FRIZS  : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "friz $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (ftrunc f32:$frB))]>;
+  def FRIMD  : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "frim $frD, $frB", FPGeneral,
+                        [(set f64:$frD, (ffloor f64:$frB))]>;
+  def FRIMS  : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "frim $frD, $frB", FPGeneral,
+                        [(set f32:$frD, (ffloor f32:$frB))]>;
+
   def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
                         "fsqrt $frD, $frB", FPSqrt,
                         [(set f64:$frD, (fsqrt f64:$frB))]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index be64700d8c..b793c37de8 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
   , HasQPX(false)
   , HasFSQRT(false)
   , HasSTFIWX(false)
+  , HasFPRND(false)
   , HasISEL(false)
   , HasPOPCNTD(false)
   , HasLDBRX(false)
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 36436f6e40..bf5bd844c0 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -78,6 +78,7 @@ protected:
   bool HasQPX;
   bool HasFSQRT;
   bool HasSTFIWX;
+  bool HasFPRND;
   bool HasISEL;
   bool HasPOPCNTD;
   bool HasLDBRX;
@@ -157,6 +158,7 @@ public:
   // Specific obvious features.
   bool hasFSQRT() const { return HasFSQRT; }
   bool hasSTFIWX() const { return HasSTFIWX; }
+  bool hasFPRND() const { return HasFPRND; }
   bool hasAltivec() const { return HasAltivec; }
   bool hasQPX() const { return HasQPX; }
   bool hasMFOCRF() const { return HasMFOCRF; }
author	Hal Finkel <hfinkel@anl.gov>	2013-03-29 08:57:48 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-03-29 08:57:48 +0000
commit	f5d5c434606161fb017a34cb656fa4aa5a3e076b (patch)
tree	4641a3e130c65183f13495d0228b9454e7bf7b60 /lib/Target/PowerPC
parent	ef484a376cce3729b45ad86eab5724aa83a61823 (diff)
download	llvm-f5d5c434606161fb017a34cb656fa4aa5a3e076b.tar.gz llvm-f5d5c434606161fb017a34cb656fa4aa5a3e076b.tar.bz2 llvm-f5d5c434606161fb017a34cb656fa4aa5a3e076b.tar.xz