summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2014-03-29 05:29:01 +0000
committerHal Finkel <hfinkel@anl.gov>2014-03-29 05:29:01 +0000
commit44b2b9dc1a6192fda90990ec9eec922e3f8d2049 (patch)
tree799b084ff01548b0c8e4e2a051363a6a4ac11fde /lib
parentc06afdcb65acd3f1fb28ce6280fed3a2d0db764c (diff)
downloadllvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.gz
llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.bz2
llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.xz
[PowerPC] Add subregister classes for f64 VSX values
We had stored both f64 values and v2f64, etc. values in the VSX registers. This worked, but was suboptimal because we would always spill 16-byte values even through we almost always had scalar 8-byte values. This resulted in an increase in stack-size use, extra memory bandwidth, etc. To fix this, I've added 64-bit subregisters of the Altivec registers, and combined those with the existing scalar floating-point registers to form a class of VSX scalar floating-point registers. The ABI code has also been enhanced to use this register class and some other necessary improvements have been made. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205075 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp24
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp26
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp15
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp33
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td97
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp17
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h3
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td36
8 files changed, 192 insertions, 59 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 5f0109a1f3..8bb91cf0b9 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -114,6 +114,25 @@ static unsigned VSRegs[64] = {
PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
};
+static unsigned VSFRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
static unsigned CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -479,6 +498,11 @@ public:
Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
}
+ void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+ }
+
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 904f871b81..c4a7544d49 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -112,6 +112,26 @@ static const unsigned VSRegs[] = {
PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
};
+static const unsigned VSFRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
static const unsigned GPRegs[] = {
PPC::R0, PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -189,6 +209,12 @@ static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, VSRegs);
}
+static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSFRegs);
+}
+
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 6a43ecc250..527430238c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -573,7 +573,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
- addRegisterClass(MVT::f64, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
@@ -2156,7 +2156,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = &PPC::F8RCRegClass;
+ if (PPCSubTarget.hasVSX())
+ RC = &PPC::VSFRCRegClass;
+ else
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
@@ -2559,7 +2562,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+ &PPC::VSFRCRegClass :
+ &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
@@ -8506,8 +8511,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
} else if (Constraint == "wc") { // an individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
} else if (Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf" || Constraint == "ws") {
+ Constraint == "wf") {
return std::make_pair(0U, &PPC::VSRCRegClass);
+ } else if (Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
}
std::pair<unsigned, const TargetRegisterClass*> R =
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0f79901791..939bbdc6cc 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -744,6 +744,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// copies are generated, they are close enough to some use that the
// lower-latency form is preferable.
Opc = PPC::XXLOR;
+ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::XXLORf;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@@ -815,6 +817,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -906,6 +914,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -1638,7 +1650,7 @@ protected:
// The addend and this instruction must be in the same block.
- if (AddendMI->getParent() != MI->getParent())
+ if (!AddendMI || AddendMI->getParent() != MI->getParent())
continue;
// The addend must be a full copy within the same register class.
@@ -1646,9 +1658,18 @@ protected:
if (!AddendMI->isFullCopy())
continue;
- if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
- MRI.getRegClass(AddendMI->getOperand(1).getReg()))
- continue;
+ unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+ MRI.getRegClass(AddendSrcReg))
+ continue;
+ } else {
+ // If AddendSrcReg is a physical register, make sure the destination
+ // register class contains it.
+ if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+ ->contains(AddendSrcReg))
+ continue;
+ }
// In theory, there could be other uses of the addend copy before this
// fma. We could deal with this, but that would require additional
@@ -1678,8 +1699,8 @@ protected:
OtherProdOp = 2;
}
- // If there are no killed product operands, then this transformation is
- // likely not profitable.
+ // If there are no killed product operands, then this transformation is
+ // likely not profitable.
if (!KilledProdOp)
continue;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 1ece55977a..14f2e4636c 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -18,6 +18,13 @@ def vsrc : RegisterOperand<VSRC> {
let ParserMatchClass = PPCRegVSRCAsmOperand;
}
+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+ let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -41,7 +48,7 @@ let Uses = [RM] in {
// Load indexed instructions
let mayLoad = 1, canFoldAsLoad = 1 in {
def LXSDX : XForm_1<31, 588,
- (outs vsrc:$XT), (ins memrr:$src),
+ (outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
@@ -62,7 +69,7 @@ let Uses = [RM] in {
// Store indexed instructions
let mayStore = 1 in {
def STXSDX : XX1Form<31, 716,
- (outs), (ins vsrc:$XT, memrr:$dst),
+ (outs), (ins vsfrc:$XT, memrr:$dst),
"stxsdx $XT, $dst", IIC_LdStSTFD,
[(store f64:$XT, xoaddr:$dst)]>;
@@ -79,11 +86,11 @@ let Uses = [RM] in {
// Add/Mul Instructions
let isCommutable = 1 in {
def XSADDDP : XX3Form<60, 32,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsadddp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
def XSMULDP : XX3Form<60, 48,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsmuldp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
@@ -110,7 +117,7 @@ let Uses = [RM] in {
// Subtract Instructions
def XSSUBDP : XX3Form<60, 40,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xssubdp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
@@ -127,14 +134,14 @@ let Uses = [RM] in {
let BaseName = "XSMADDADP" in {
let isCommutable = 1 in
def XSMADDADP : XX3Form<60, 33,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmaddadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
def XSMADDMDP : XX3Form<60, 41,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -143,14 +150,14 @@ let Uses = [RM] in {
let BaseName = "XSMSUBADP" in {
let isCommutable = 1 in
def XSMSUBADP : XX3Form<60, 49,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmsubadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
def XSMSUBMDP : XX3Form<60, 57,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -159,14 +166,14 @@ let Uses = [RM] in {
let BaseName = "XSNMADDADP" in {
let isCommutable = 1 in
def XSNMADDADP : XX3Form<60, 161,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
def XSNMADDMDP : XX3Form<60, 169,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -175,14 +182,14 @@ let Uses = [RM] in {
let BaseName = "XSNMSUBADP" in {
let isCommutable = 1 in
def XSNMSUBADP : XX3Form<60, 177,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
let IsVSXFMAAlt = 1 in
def XSNMSUBMDP : XX3Form<60, 185,
- (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
"xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
@@ -318,28 +325,28 @@ let Uses = [RM] in {
// Division Instructions
def XSDIVDP : XX3Form<60, 56,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsdivdp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
def XSSQRTDP : XX2Form<60, 75,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xssqrtdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fsqrt f64:$XB))]>;
def XSREDP : XX2Form<60, 90,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsredp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfre f64:$XB))]>;
def XSRSQRTEDP : XX2Form<60, 74,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrsqrtedp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
def XSTDIVDP : XX3Form_1<60, 61,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xstdivdp $crD, $XA, $XB", IIC_VecFP, []>;
def XSTSQRTDP : XX2Form_1<60, 106,
- (outs crrc:$crD), (ins vsrc:$XB),
+ (outs crrc:$crD), (ins vsfrc:$XB),
"xstsqrtdp $crD, $XB", IIC_VecFP, []>;
def XVDIVDP : XX3Form<60, 120,
@@ -394,10 +401,10 @@ let Uses = [RM] in {
// Compare Instructions
def XSCMPODP : XX3Form_1<60, 43,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xscmpodp $crD, $XA, $XB", IIC_VecFPCompare, []>;
def XSCMPUDP : XX3Form_1<60, 35,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xscmpudp $crD, $XA, $XB", IIC_VecFPCompare, []>;
defm XVCMPEQDP : XX3Form_Rcr<60, 99,
@@ -421,19 +428,19 @@ let Uses = [RM] in {
// Move Instructions
def XSABSDP : XX2Form<60, 345,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsabsdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fabs f64:$XB))]>;
def XSNABSDP : XX2Form<60, 361,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsnabsdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fneg (fabs f64:$XB)))]>;
def XSNEGDP : XX2Form<60, 377,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsnegdp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fneg f64:$XB))]>;
def XSCPSGNDP : XX3Form<60, 176,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xscpsgndp $XT, $XA, $XB", IIC_VecFP,
[(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
@@ -476,33 +483,33 @@ let Uses = [RM] in {
// Conversion Instructions
def XSCVDPSP : XX2Form<60, 265,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsp $XT, $XB", IIC_VecFP, []>;
def XSCVDPSXDS : XX2Form<60, 344,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsxds $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfctidz f64:$XB))]>;
def XSCVDPSXWS : XX2Form<60, 88,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpsxws $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfctiwz f64:$XB))]>;
def XSCVDPUXDS : XX2Form<60, 328,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpuxds $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfctiduz f64:$XB))]>;
def XSCVDPUXWS : XX2Form<60, 72,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvdpuxws $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
def XSCVSPDP : XX2Form<60, 329,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvspdp $XT, $XB", IIC_VecFP, []>;
def XSCVSXDDP : XX2Form<60, 376,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvsxddp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfcfid f64:$XB))]>;
def XSCVUXDDP : XX2Form<60, 360,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xscvuxddp $XT, $XB", IIC_VecFP,
[(set f64:$XT, (PPCfcfidu f64:$XB))]>;
@@ -568,23 +575,23 @@ let Uses = [RM] in {
// Rounding Instructions
def XSRDPI : XX2Form<60, 73,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpi $XT, $XB", IIC_VecFP,
[(set f64:$XT, (frnd f64:$XB))]>;
def XSRDPIC : XX2Form<60, 107,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpic $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fnearbyint f64:$XB))]>;
def XSRDPIM : XX2Form<60, 121,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpim $XT, $XB", IIC_VecFP,
[(set f64:$XT, (ffloor f64:$XB))]>;
def XSRDPIP : XX2Form<60, 105,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpip $XT, $XB", IIC_VecFP,
[(set f64:$XT, (fceil f64:$XB))]>;
def XSRDPIZ : XX2Form<60, 89,
- (outs vsrc:$XT), (ins vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
"xsrdpiz $XT, $XB", IIC_VecFP,
[(set f64:$XT, (ftrunc f64:$XB))]>;
@@ -633,10 +640,10 @@ let Uses = [RM] in {
// Max/Min Instructions
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
def XSMINDP : XX3Form<60, 168,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
"xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
def XVMAXDP : XX3Form<60, 224,
@@ -676,6 +683,10 @@ let Uses = [RM] in {
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlor $XT, $XA, $XB", IIC_VecGeneral,
[(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
+ let isCodeGenOnly = 1 in
+ def XXLORf: XX3Form<60, 146,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
def XXLXOR : XX3Form<60, 154,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlxor $XT, $XA, $XB", IIC_VecGeneral,
@@ -724,12 +735,12 @@ def : InstAlias<"xxswapd $XT, $XB",
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (COPY_TO_REGCLASS $A, VSRC))>;
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
def : Pat<(f64 (vector_extract v2f64:$S, 0)),
- (f64 (COPY_TO_REGCLASS $S, VSRC))>;
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
def : Pat<(f64 (vector_extract v2f64:$S, 1)),
- (f64 (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSRC))>;
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
// Additional fnmsub patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index c68e922355..78c5a124fb 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -229,16 +229,33 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
+ case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
case PPC::VSHRCRegClassID:
return 32 - DefaultSafety;
case PPC::VSRCRegClassID:
+ case PPC::VSFRCRegClassID:
return 64 - DefaultSafety;
case PPC::CRRCRegClassID:
return 8 - DefaultSafety;
}
}
+const TargetRegisterClass*
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+ if (Subtarget.hasVSX()) {
+ // With VSX, we can inflate various sub-register classes to the full VSX
+ // register set.
+
+ if (RC == &PPC::F8RCRegClass)
+ return &PPC::VSFRCRegClass;
+ else if (RC == &PPC::VRRCRegClass)
+ return &PPC::VSRCRegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 4871834c26..7a8c2aa475 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -40,6 +40,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index dab222b7bb..e11f7d4a80 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -49,9 +49,19 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
-// VR - One of the 32 128-bit vector registers
-class VR<bits<5> num, string n> : PPCReg<n> {
+// VF - One of the 32 64-bit floating-point subregisters of the vector
+// registers (used by VSX).
+class VF<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
+ let HWEncoding{5} = 1;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<VF SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 0;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
}
// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
@@ -99,9 +109,14 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
+// Floating-point vector subregisters (for VSX)
+foreach Index = 0-31 in {
+ def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+}
+
// Vector registers
foreach Index = 0-31 in {
- def V#Index : VR<Index, "v"#Index>,
+ def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
@@ -235,18 +250,27 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
// VSX register classes (the allocation order mirrors that of the corresponding
// subregister classes).
-def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128,
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
(add (sequence "VSL%u", 0, 13),
(sequence "VSL%u", 31, 14))>;
-def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128,
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
(add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
VSH22, VSH21, VSH20)>;
-def VSRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64,v2i64], 128,
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
(add VSLRC, VSHRC)>;
+// Register classes for the 64-bit "scalar" VSX subregisters.
+def VFRC : RegisterClass<"PPC", [f64], 64,
+ (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7,
+ VF8, VF9, VF10, VF11, VF12, VF13, VF14,
+ VF15, VF16, VF17, VF18, VF19, VF31, VF30,
+ VF29, VF28, VF27, VF26, VF25, VF24, VF23,
+ VF22, VF21, VF20)>;
+def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,