summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2013-03-31 10:12:51 +0000
committerHal Finkel <hfinkel@anl.gov>2013-03-31 10:12:51 +0000
commit8049ab15e4b638a07d6f230329945c2310eca27b (patch)
tree89ddcf3d670a9c7f4fafe2db694222cabdd48485
parent9ad0f4907b3ba0916a8b6cdb95d298d2ddb7d405 (diff)
downloadllvm-8049ab15e4b638a07d6f230329945c2310eca27b.tar.gz
llvm-8049ab15e4b638a07d6f230329945c2310eca27b.tar.bz2
llvm-8049ab15e4b638a07d6f230329945c2310eca27b.tar.xz
Add the PPC lfiwax instruction
This instruction is available on modern PPC64 CPUs, and is now used to improve the SINT_TO_FP lowering (by eliminating the need for the separate sign extension instruction and decreasing the amount of needed stack space). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178446 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPC.td28
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp43
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h5
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td10
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h2
-rw-r--r--test/CodeGen/PowerPC/i32-to-float.ll14
8 files changed, 82 insertions, 22 deletions
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 01cd55ee27..602e33cd29 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -59,6 +59,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
"Enable the fri[mnpz] instructions">;
def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
@@ -80,7 +82,6 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz
// FRE p5 through p7 fre (vs. fres, available since p3)
// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3)
-// LFIWAX p6, p6x, p7 lfiwax
// LFIWZX p7 lfiwzx
// POPCNTB p5 through p7 popcntb and related instructions
// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates
@@ -133,14 +134,15 @@ def : ProcessorModel<"e5500", PPCE5500Model,
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
def : Processor<"a2", PPCA2Itineraries,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
- FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
+ FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"a2q", PPCA2Itineraries,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
- FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */, FeatureQPX]>;
+ FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
+ FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */,
+ FeatureQPX]>;
def : Processor<"pwr3", G5Itineraries,
[DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
FeatureSTFIWX, Feature64Bit]>;
@@ -157,16 +159,18 @@ def : Processor<"pwr5x", G5Itineraries,
def : Processor<"pwr6", G5Itineraries,
[DirectivePwr6, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureLFIWAX, FeatureFPRND, Feature64Bit
+ /*, Feature64BitRegs */]>;
def : Processor<"pwr6x", G5Itineraries,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
- Feature64Bit]>;
+ FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit]>;
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureFPRND, FeatureISEL, FeaturePOPCNTD,
- FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureLFIWAX, FeatureFPRND, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bf31029a1d..12269212af 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4809,20 +4809,43 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
- Op.getOperand(0));
+ SDValue Ld;
+ if (PPCSubTarget.hasLFIWAX()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- // STD the extended value into the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
- MachinePointerInfo(), false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
- // Load the value as a double.
- SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, false, 0);
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other), Ops, 2,
+ MVT::i32, MMO);
+ } else {
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
// FCFID it and return it.
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index bce05a16c6..a924ac4e00 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -242,6 +242,11 @@ namespace llvm {
/// or i32.
LBRX,
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
/// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
/// produces an ADDIS8 instruction that adds the TOC base register to
/// sym@toc@ha.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index d3d7dc66c9..478e6127ab 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -20,6 +20,10 @@ include "PPCInstrFormats.td"
def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
SDTCisVT<0, f64>, SDTCisPtrTy<1>
]>;
+def SDT_PPClfiwax : SDTypeProfile<1, 1, [ // lfiwax
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
@@ -63,6 +67,8 @@ def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwax,
+ [SDNPHasChain, SDNPMayLoad]>;
// Extract FPSCR (not modeled at the DAG level).
def PPCmffs : SDNode<"PPCISD::MFFS",
@@ -843,6 +849,10 @@ def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
"lfdx $frD, $src", LdStLFD,
[(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 482a3bc939..67cf13603b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -528,6 +528,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool noImmForm = false;
switch (OpC) {
+ case PPC::LFIWAX:
case PPC::LVEBX:
case PPC::LVEHX:
case PPC::LVEWX:
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index b793c37de8..57e18ed07e 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -39,6 +39,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
, HasQPX(false)
, HasFSQRT(false)
, HasSTFIWX(false)
+ , HasLFIWAX(false)
, HasFPRND(false)
, HasISEL(false)
, HasPOPCNTD(false)
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index bf5bd844c0..3958bc9c00 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -78,6 +78,7 @@ protected:
bool HasQPX;
bool HasFSQRT;
bool HasSTFIWX;
+ bool HasLFIWAX;
bool HasFPRND;
bool HasISEL;
bool HasPOPCNTD;
@@ -158,6 +159,7 @@ public:
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
bool hasFPRND() const { return HasFPRND; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll
index 0807717e50..bed940c5de 100644
--- a/test/CodeGen/PowerPC/i32-to-float.ll
+++ b/test/CodeGen/PowerPC/i32-to-float.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -14,6 +15,13 @@ entry:
; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]]
; CHECK: frsp 1, [[REG3]]
; CHECK: blr
+
+; CHECK-A2: @foo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid [[REG2:[0-9]+]], [[REG]]
+; CHECK-A2: frsp 1, [[REG2]]
+; CHECK-A2: blr
}
define double @goo(i32 %a) nounwind {
@@ -27,5 +35,11 @@ entry:
; CHECK: lfd [[REG2:[0-9]+]],
; CHECK: fcfid 1, [[REG2]]
; CHECK: blr
+
+; CHECK-A2: @goo
+; CHECK-A2: stw 3,
+; CHECK-A2: lfiwax [[REG:[0-9]+]],
+; CHECK-A2: fcfid 1, [[REG]]
+; CHECK-A2: blr
}