[PowerPC] Add handling for conversions to fast-isel.

Yet another chunk of fast-isel code. This one handles various conversions involving floating-point. (It also includes some miscellaneous handling throughout the back end for LWA_32 and LWAX_32 that should have been part of the load-store patch.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189677 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2013-08-30 15:18:11 +0000
committer: Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2013-08-30 15:18:11 +0000
commit: 9bc94276e796d644cb425a7c7d38cc44dbf4e9c1 (patch)
tree: d294503b8a5692ef83cd100c8133c4fb39f8e10c /lib/Target/PowerPC
parent: 2cc396bfb06a8e4df2fae6450277e7a05ebf6e4b (diff)
download: llvm-9bc94276e796d644cb425a7c7d38cc44dbf4e9c1.tar.gz
llvm-9bc94276e796d644cb425a7c7d38cc44dbf4e9c1.tar.bz2
llvm-9bc94276e796d644cb425a7c7d38cc44dbf4e9c1.tar.xz
4 files changed, 288 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 08d7665065..79af2ef1e6 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
 using namespace llvm;
 
 #include "PPCGenAsmWriter.inc"
@@ -78,6 +79,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     }
   }
   
+  // For fast-isel, a COPY_TO_REGCLASS may survive this long.  This is
+  // used when converting a 32-bit float to a 64-bit float as part of
+  // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()),
+  // as otherwise we have problems with incorrect register classes
+  // in machine instruction verification.  For now, just avoid trying
+  // to print it as such an instruction has no effect (a 32-bit float
+  // in a register is already in 64-bit form, just with lower
+  // precision).  FIXME: Is there a better solution?
+  if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+    return;
+  
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bbfad87f24..e4a631a9cf 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -704,6 +704,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     break;
   case PPC::LD:
   case PPC::STD:
+  case PPC::LWA_32:
   case PPC::LWA: {
     // Verify alignment is legal, so we don't create relocations
     // that can't be supported.
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index aeda78bf6b..3bceed418a 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -109,6 +109,10 @@ class PPCFastISel : public FastISel {
     bool SelectBranch(const Instruction *I);
     bool SelectIndirectBr(const Instruction *I);
     bool SelectCmp(const Instruction *I);
+    bool SelectFPExt(const Instruction *I);
+    bool SelectFPTrunc(const Instruction *I);
+    bool SelectIToFP(const Instruction *I, bool IsSigned);
+    bool SelectFPToI(const Instruction *I, bool IsSigned);
     bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
     bool SelectRet(const Instruction *I);
     bool SelectIntExt(const Instruction *I);
@@ -135,6 +139,9 @@ class PPCFastISel : public FastISel {
                                     const TargetRegisterClass *RC);
     unsigned PPCMaterialize64BitInt(int64_t Imm,
                                     const TargetRegisterClass *RC);
+    unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+                             unsigned SrcReg, bool IsSigned);
+    unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
 
   // Call handling routines.
   private:
@@ -786,6 +793,260 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
   return true;
 }
 
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+  Value *Src  = I->getOperand(0);
+  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
+  EVT DestVT = TLI.getValueType(I->getType(), true);
+
+  if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+    return false;
+
+  unsigned SrcReg = getRegForValue(Src);
+  if (!SrcReg)
+    return false;
+
+  // No code is generated for a FP extend.
+  UpdateValueMap(I, SrcReg);
+  return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+  Value *Src  = I->getOperand(0);
+  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
+  EVT DestVT = TLI.getValueType(I->getType(), true);
+
+  if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+    return false;
+
+  unsigned SrcReg = getRegForValue(Src);
+  if (!SrcReg)
+    return false;
+
+  // Round the result to single precision.
+  unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+    .addReg(SrcReg);
+
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case.  Can use a 4-byte
+// stack slot and 4-byte store/load sequence.  Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+                                     bool IsSigned) {
+
+  // If necessary, extend 32-bit int to 64-bit.
+  if (SrcVT == MVT::i32) {
+    unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+    if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+      return 0;
+    SrcReg = TmpReg;
+  }
+
+  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+  Address Addr;
+  Addr.BaseType = Address::FrameIndexBase;
+  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+  // Store the value from the GPR.
+  if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+    return 0;
+
+  // Load the integer value into an FPR.  The kind of load used depends
+  // on a number of conditions.
+  unsigned LoadOpc = PPC::LFD;
+
+  if (SrcVT == MVT::i32) {
+    Addr.Offset = 4;
+    if (!IsSigned)
+      LoadOpc = PPC::LFIWZX;
+    else if (PPCSubTarget.hasLFIWAX())
+      LoadOpc = PPC::LFIWAX;
+  }
+
+  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+  unsigned ResultReg = 0;
+  if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+    return 0;
+
+  return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+  MVT DstVT;
+  Type *DstTy = I->getType();
+  if (!isTypeLegal(DstTy, DstVT))
+    return false;
+
+  if (DstVT != MVT::f32 && DstVT != MVT::f64)
+    return false;
+
+  Value *Src = I->getOperand(0);
+  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+  if (!SrcEVT.isSimple())
+    return false;
+
+  MVT SrcVT = SrcEVT.getSimpleVT();
+
+  if (SrcVT != MVT::i8  && SrcVT != MVT::i16 &&
+      SrcVT != MVT::i32 && SrcVT != MVT::i64)
+    return false;
+
+  unsigned SrcReg = getRegForValue(Src);
+  if (SrcReg == 0)
+    return false;
+
+  // We can only lower an unsigned convert if we have the newer
+  // floating-point conversion operations.
+  if (!IsSigned && !PPCSubTarget.hasFPCVT())
+    return false;
+
+  // FIXME: For now we require the newer floating-point conversion operations
+  // (which are present only on P7 and A2 server models) when converting
+  // to single-precision float.  Otherwise we have to generate a lot of
+  // fiddly code to avoid double rounding.  If necessary, the fiddly code
+  // can be found in PPCTargetLowering::LowerINT_TO_FP().
+  if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+    return false;
+
+  // Extend the input if necessary.
+  if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+    unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+    if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+      return false;
+    SrcVT = MVT::i64;
+    SrcReg = TmpReg;
+  }
+
+  // Move the integer value to an FPR.
+  unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+  if (FPReg == 0)
+    return false;
+
+  // Determine the opcode for the conversion.
+  const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+  unsigned DestReg = createResultReg(RC);
+  unsigned Opc;
+
+  if (DstVT == MVT::f32)
+    Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+  else
+    Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+  // Generate the convert.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+    .addReg(FPReg);
+
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+                                      unsigned SrcReg, bool IsSigned) {
+  // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+  // Note that if have STFIWX available, we could use a 4-byte stack
+  // slot for i32, but this being fast-isel we'll just go with the
+  // easiest code gen possible.
+  Address Addr;
+  Addr.BaseType = Address::FrameIndexBase;
+  Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+  // Store the value from the FPR.
+  if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+    return 0;
+
+  // Reload it into a GPR.  If we want an i32, modify the address
+  // to have a 4-byte offset so we load from the right place.
+  if (VT == MVT::i32)
+    Addr.Offset = 4;
+
+  // Look at the currently assigned register for this instruction
+  // to determine the required register class.
+  unsigned AssignedReg = FuncInfo.ValueMap[I];
+  const TargetRegisterClass *RC =
+    AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+  unsigned ResultReg = 0;
+  if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+    return 0;
+
+  return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+  MVT DstVT, SrcVT;
+  Type *DstTy = I->getType();
+  if (!isTypeLegal(DstTy, DstVT))
+    return false;
+
+  if (DstVT != MVT::i32 && DstVT != MVT::i64)
+    return false;
+
+  Value *Src = I->getOperand(0);
+  Type *SrcTy = Src->getType();
+  if (!isTypeLegal(SrcTy, SrcVT))
+    return false;
+
+  if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+    return false;
+
+  unsigned SrcReg = getRegForValue(Src);
+  if (SrcReg == 0)
+    return false;
+
+  // Convert f32 to f64 if necessary.  This is just a meaningless copy
+  // to get the register class right.  COPY_TO_REGCLASS is needed since
+  // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+  const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+  if (InRC == &PPC::F4RCRegClass) {
+    unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+      .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+    SrcReg = TmpReg;
+  }
+
+  // Determine the opcode for the conversion, which takes place
+  // entirely within FPRs.
+  unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+  unsigned Opc;
+
+  if (DstVT == MVT::i32)
+    if (IsSigned)
+      Opc = PPC::FCTIWZ;
+    else
+      Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+  else
+    Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+  // Generate the convert.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+    .addReg(SrcReg);
+
+  // Now move the integer value from a float register to an integer register.
+  unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+  if (IntReg == 0)
+    return false;
+
+  UpdateValueMap(I, IntReg);
+  return true;
+}
+
 // Attempt to fast-select a binary integer operation that isn't already
 // handled automatically.
 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
@@ -1135,6 +1396,18 @@ bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
       return SelectBranch(I);
     case Instruction::IndirectBr:
       return SelectIndirectBr(I);
+    case Instruction::FPExt:
+      return SelectFPExt(I);
+    case Instruction::FPTrunc:
+      return SelectFPTrunc(I);
+    case Instruction::SIToFP:
+      return SelectIToFP(I, /*IsSigned*/ true);
+    case Instruction::UIToFP:
+      return SelectIToFP(I, /*IsSigned*/ false);
+    case Instruction::FPToSI:
+      return SelectFPToI(I, /*IsSigned*/ true);
+    case Instruction::FPToUI:
+      return SelectFPToI(I, /*IsSigned*/ false);
     case Instruction::Add:
       return SelectBinaryIntOp(I, ISD::ADD);
     case Instruction::Or:
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index adba613d6e..aee28302d6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -69,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
   ImmToIdxMap[PPC::STH]  = PPC::STHX;   ImmToIdxMap[PPC::STW]  = PPC::STWX;
   ImmToIdxMap[PPC::STFS] = PPC::STFSX;  ImmToIdxMap[PPC::STFD] = PPC::STFDX;
   ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+  ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32;
 
   // 64-bit
   ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
@@ -532,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) {
   default:
     return false;
   case PPC::LWA:
+  case PPC::LWA_32:
   case PPC::LD:
   case PPC::STD:
     return true;
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	2013-08-30 15:18:11 +0000
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>	2013-08-30 15:18:11 +0000
commit	9bc94276e796d644cb425a7c7d38cc44dbf4e9c1 (patch)
tree	d294503b8a5692ef83cd100c8133c4fb39f8e10c /lib/Target/PowerPC
parent	2cc396bfb06a8e4df2fae6450277e7a05ebf6e4b (diff)
download	llvm-9bc94276e796d644cb425a7c7d38cc44dbf4e9c1.tar.gz llvm-9bc94276e796d644cb425a7c7d38cc44dbf4e9c1.tar.bz2 llvm-9bc94276e796d644cb425a7c7d38cc44dbf4e9c1.tar.xz