summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2014-03-13 07:58:58 +0000
committerHal Finkel <hfinkel@anl.gov>2014-03-13 07:58:58 +0000
commitab849adec4467646aaf25239dc78f47fe5076479 (patch)
tree0bb1eb26f4ea30d566593283719045e2075ff7b5
parent79c15b23c9c67f306d4d4514b46b2d006d2049d4 (diff)
downloadllvm-ab849adec4467646aaf25239dc78f47fe5076479.tar.gz
llvm-ab849adec4467646aaf25239dc78f47fe5076479.tar.bz2
llvm-ab849adec4467646aaf25239dc78f47fe5076479.tar.xz
[PowerPC] Initial support for the VSX instruction set
VSX is an ISA extension supported on the POWER7 and later cores that enhances floating-point vector and scalar capabilities. Among other things, this adds <2 x double> support and generally helps to reduce register pressure. The interesting part of this ISA feature is the register configuration: there are 64 new 128-bit vector registers, the 32 of which are super-registers of the existing 32 scalar floating-point registers, and the second 32 of which overlap with the 32 Altivec vector registers. This makes things like vector insertion and extraction tricky: this can be free but only if we force a restriction to the right register subclass when needed. A new "minipass" PPCVSXCopy takes care of this (although it could do a more-optimal job of it; see the comment about unnecessary copies below). Please note that, currently, VSX is not enabled by default when targeting anything because it is not yet ready for that. The assembler and disassembler are fully implemented and tested. However: - CodeGen support causes miscompiles; test-suite runtime failures: MultiSource/Benchmarks/FreeBench/distray/distray MultiSource/Benchmarks/McCat/08-main/main MultiSource/Benchmarks/Olden/voronoi/voronoi MultiSource/Benchmarks/mafft/pairlocalalign MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4 SingleSource/Benchmarks/CoyoteBench/almabench SingleSource/Benchmarks/Misc/matmul_f64_4x4 - The lowering currently falls back to using Altivec instructions far more than it should. Worse, there are some things that are scalarized through the stack that shouldn't be. - A lot of unnecessary copies make it past the optimizers, and this needs to be fixed. - Many more regression tests are needed. Normally, I'd fix these things prior to committing, but there are some students and other contributors who would like to work this, and so it makes sense to move this development process upstream where it can be subject to the regular code-review procedures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203768 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp31
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp26
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp12
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/PPC.h1
-rw-r--r--lib/Target/PowerPC/PPC.td3
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp38
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp65
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td167
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp199
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td9
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td679
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td43
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp10
-rw-r--r--test/CodeGen/PowerPC/vsx.ll46
-rw-r--r--test/MC/Disassembler/PowerPC/vsx.txt452
-rw-r--r--test/MC/PowerPC/vsx.s298
22 files changed, 2071 insertions, 23 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 21af0374a0..5f0109a1f3 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -95,6 +95,25 @@ static unsigned VRegs[32] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static unsigned VSRegs[64] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
static unsigned CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -345,6 +364,11 @@ public:
return (unsigned) Imm.Val;
}
+ unsigned getVSReg() const {
+ assert(isVSRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@@ -362,6 +386,7 @@ public:
bool isToken() const { return Kind == Token; }
bool isImm() const { return Kind == Immediate || Kind == Expression; }
+ bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
@@ -382,6 +407,7 @@ public:
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
bool isCCRegNumber() const { return (Kind == Expression
&& isUInt<3>(getExprCRVal())) ||
(Kind == Immediate
@@ -448,6 +474,11 @@ public:
Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
}
+ void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+ }
+
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 23866dc70b..904f871b81 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -92,6 +92,26 @@ static const unsigned VRegs[] = {
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static const unsigned VSRegs[] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+
static const unsigned GPRegs[] = {
PPC::R0, PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -163,6 +183,12 @@ static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, VRegs);
}
+static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRegs);
+}
+
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index b1c480b4aa..dc54b52a24 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -199,6 +199,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 3 && "Invalid u2imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
@@ -316,7 +323,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8a4c03d645..4d1df78ee4 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,7 +43,7 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = 0);
-
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f0d5af2446..2fad831241 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -35,6 +35,7 @@ namespace llvm {
FunctionPass *createPPCCTRLoopsVerify();
#endif
FunctionPass *createPPCEarlyReturnPass();
+ FunctionPass *createPPCVSXCopyPass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 2a9f65a6b1..5cb7eca4a4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -90,7 +90,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
- "Enable VSX instructions">;
+ "Enable VSX instructions",
+ [FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 2a2ee25c5a..daf90c86ef 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -129,7 +129,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 65ea49a8e2..5b09588e0f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -572,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
Opc = PPC::FCMPUS;
} else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
- Opc = PPC::FCMPUD;
+ Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
@@ -640,7 +640,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
+static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
+ bool HasVSX) {
switch (CC) {
case ISD::SETEQ:
case ISD::SETUEQ:
@@ -654,7 +655,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
return PPC::VCMPEQUW;
// v4f32 != v4f32 could be translate to unordered not equal
else if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
break;
case ISD::SETLT:
case ISD::SETGT:
@@ -667,7 +670,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
else if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
break;
case ISD::SETULT:
case ISD::SETUGT:
@@ -682,17 +687,23 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
break;
case ISD::SETOEQ:
if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
break;
case ISD::SETOLT:
case ISD::SETOGT:
case ISD::SETOLE:
if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
break;
case ISD::SETOGE:
if (VecVT == MVT::v4f32)
- return PPC::VCMPGEFP;
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
break;
default:
break;
@@ -703,7 +714,7 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
// getVCmpEQInst: return the equal compare instruction for the specified vector
// type. Since this is for altivec specific code, only support the altivec
// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
+static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
switch (VecVT) {
case MVT::v16i8:
return PPC::VCMPEQUB;
@@ -712,13 +723,14 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
case MVT::v4i32:
return PPC::VCMPEQUW;
case MVT::v4f32:
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ case MVT::v2f64:
+ return PPC::XVCMPEQDP;
default:
llvm_unreachable("Invalid integer vector compare condition");
}
}
-
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
@@ -808,7 +820,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC);
+ unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX());
switch (CC) {
case ISD::SETEQ:
@@ -839,7 +851,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
}
@@ -848,7 +860,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETOLE:
case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 680112da9a..a35c83c19d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -505,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath) {
+ if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -532,6 +532,40 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget->hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ addRegisterClass(MVT::f64, &PPC::VSRCRegClass);
+
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+ }
}
if (Subtarget->has64BitSupport()) {
@@ -2094,6 +2128,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
+ case MVT::v2f64:
RC = &PPC::VRRCRegClass;
break;
}
@@ -2340,7 +2375,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
+ ObjectVT==MVT::v2f64) {
if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
@@ -2497,6 +2533,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
@@ -2959,7 +2996,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
+ ArgVT==MVT::v2f64) {
if (!isVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
@@ -4143,6 +4181,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
@@ -6917,7 +6956,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -6979,7 +7019,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -7891,6 +7932,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ // FIXME: Update this for VSX!
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
@@ -8314,6 +8356,9 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
}
} else if (Constraint == "wc") { // individual CR bits.
return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -8335,6 +8380,13 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
// Look at the constraint type.
if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
switch (*constraint) {
default:
@@ -8393,6 +8445,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
} else if (Constraint == "wc") { // an individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
}
std::pair<unsigned, const TargetRegisterClass*> R =
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 9c7b17a4c2..7fed2c65da 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -593,6 +593,173 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+// XX*-Form (VSX)
+class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-13} = 0;
+ let Inst{14-15} = D;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = 0;
+ let Inst{22-23} = D;
+ let Inst{24-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = RC;
+ let Inst{22-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-25} = XC{4-0};
+ let Inst{26-27} = xo;
+ let Inst{28} = XC{5};
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d9907a0baf..408dd06b22 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -165,6 +165,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::RESTORE_CR:
case PPC::RESTORE_CRBIT:
case PPC::LVX:
+ case PPC::LXVD2X:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -190,6 +191,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::SPILL_CR:
case PPC::SPILL_CRBIT:
case PPC::STVX:
+ case PPC::STXVD2X:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -655,6 +657,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
+ // We can end up with self copies and similar things as a result of VSX copy
+ // legalization. Promote (or just ignore) them here.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (PPC::F8RCRegClass.contains(DestReg) &&
+ PPC::VSLRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (SrcReg == SuperReg)
+ return;
+
+ DestReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(DestReg) &&
+ PPC::VSHRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (SrcReg == SuperReg)
+ return;
+
+ DestReg = SuperReg;
+ } else if (PPC::F8RCRegClass.contains(SrcReg) &&
+ PPC::VSLRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (DestReg == SuperReg)
+ return;
+
+ SrcReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(SrcReg) &&
+ PPC::VSHRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (DestReg == SuperReg)
+ return;
+
+ SrcReg = SuperReg;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -666,6 +709,14 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::MCRF;
else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::VOR;
+ else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
+ // FIXME: There are really two different ways this can be done, and we
+ // should pick the better one depending on the situation:
+ // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
+ // issue in VSU pipeline 0.
+ // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
+ // can go to either pipeline.
+ Opc = PPC::XXLOR;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@@ -731,6 +782,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -818,6 +875,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -1485,6 +1546,144 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 14c70a23b0..deb8b4617e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -412,6 +412,14 @@ def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -2431,6 +2439,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
+include "PPCInstrVSX.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
new file mode 100644
index 0000000000..cfc526ef80
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -0,0 +1,679 @@
+//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VSX extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>;
+ let Defs = [CR6] in
+ def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT;
+ }
+}
+
+def HasVSX : Predicate<"PPCSubTarget.hasVSX()">;
+let Predicates = [HasVSX] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
+let Uses = [RM] in {
+
+ // Load indexed instructions
+ let mayLoad = 1, canFoldAsLoad = 1 in {
+ def LXSDX : XForm_1<31, 588,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVD2X : XForm_1<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVDSX : XForm_1<31, 332,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+ // TODO: match load + splat to lxvdsx.
+
+ def LXVW4X : XForm_1<31, 780,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvw4x $XT, $src", IIC_LdStLFD,
+ [(set v4f32:$XT, (load xoaddr:$src))]>;
+ }
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def STXSDX : XX1Form<31, 716,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxsdx $XT, $dst", IIC_LdStSTFD,
+ [(store f64:$XT, xoaddr:$dst)]>;
+
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ def STXVW4X : XX1Form<31, 908,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ [(store v4f32:$XT, xoaddr:$dst)]>;
+ }
+
+ // Add/Mul Instructions
+ let isCommutable = 1 in {
+ def XSADDDP : XX3Form<60, 32,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ def XSMULDP : XX3Form<60, 48,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+
+ def XVADDDP : XX3Form<60, 96,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+
+ def XVADDSP : XX3Form<60, 64,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+
+ def XVMULDP : XX3Form<60, 112,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+
+ def XVMULSP : XX3Form<60, 80,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ }
+
+ // Subtract Instructions
+ def XSSUBDP : XX3Form<60, 40,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xssubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+
+ def XVSUBDP : XX3Form<60, 104,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ def XVSUBSP : XX3Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+
+ // FMA Instructions
+ def XSMADDADP : XX3Form<60, 33,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSMADDMDP : XX3Form<60, 41,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ // TODO: Select between these based first on whether one of the operands has
+ // no further uses. We probably want to do this after scheduling but before
+ // register allocation.
+
+ def XSMSUBADP : XX3Form<60, 49,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSMSUBMDP : XX3Form<60, 57,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XSNMADDADP : XX3Form<60, 161,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSNMADDMDP : XX3Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XSNMSUBADP : XX3Form<60, 177,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSNMSUBMDP : XX3Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMADDADP : XX3Form<60, 97,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMADDMDP : XX3Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMADDASP : XX3Form<60, 65,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMADDMSP : XX3Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMSUBADP : XX3Form<60, 113,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMSUBMDP : XX3Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMSUBASP : XX3Form<60, 81,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMSUBMSP : XX3Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMADDADP : XX3Form<60, 225,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMADDMDP : XX3Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMADDASP : XX3Form<60, 193,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMADDMSP : XX3Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMSUBADP : XX3Form<60, 241,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMSUBMDP : XX3Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMSUBASP : XX3Form<60, 209,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMSUBMSP : XX3Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ // Division Instructions
+ def XSDIVDP : XX3Form<60, 56,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsdivdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ def XSSQRTDP : XX2Form<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xssqrtdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsqrt f64:$XB))]>;
+
+ def XSREDP : XX2Form<60, 90,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsredp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfre f64:$XB))]>;
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ def XSTDIVDP : XX3Form_1<60, 61,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xstdivdp $crD, $XA, $XB", IIC_VecFP, []>;
+ def XSTSQRTDP : XX2Form_1<60, 106,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xstsqrtdp $crD, $XB", IIC_VecFP, []>;
+
+ def XVDIVDP : XX3Form<60, 120,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ def XVDIVSP : XX3Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+
+ def XVSQRTDP : XX2Form<60, 203,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ def XVSQRTSP : XX2Form<60, 139,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_VecFP, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_VecFP, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_VecFP, []>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_VecFP, []>;
+
+ def XVREDP : XX2Form<60, 218,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvredp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
+ def XVRESP : XX2Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvresp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
+
+ def XVRSQRTEDP : XX2Form<60, 202,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
+ def XVRSQRTESP : XX2Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
+
+ // Compare Instructions
+ def XSCMPODP : XX3Form_1<60, 43,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xscmpodp $crD, $XA, $XB", IIC_VecFPCompare, []>;
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_VecFPCompare, []>;
+
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPEQSP : XX3Form_Rcr<60, 67,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGEDP : XX3Form_Rcr<60, 115,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGESP : XX3Form_Rcr<60, 83,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTDP : XX3Form_Rcr<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTSP : XX3Form_Rcr<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+
+ // Move Instructions
+ def XSABSDP : XX2Form<60, 345,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fabs f64:$XB))]>;
+ def XSNABSDP : XX2Form<60, 361,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ def XSNEGDP : XX2Form<60, 377,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsnegdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg f64:$XB))]>;
+ def XSCPSGNDP : XX3Form<60, 176,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
+
+ def XVABSDP : XX2Form<60, 473,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fabs v2f64:$XB))]>;
+
+ def XVABSSP : XX2Form<60, 409,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fabs v4f32:$XB))]>;
+
+ def XVCPSGNDP : XX3Form<60, 240,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
+ def XVCPSGNSP : XX3Form<60, 208,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
+
+ def XVNABSDP : XX2Form<60, 489,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
+ def XVNABSSP : XX2Form<60, 425,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
+
+ def XVNEGDP : XX2Form<60, 505,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg v2f64:$XB))]>;
+ def XVNEGSP : XX2Form<60, 441,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg v4f32:$XB))]>;
+
+ // Conversion Instructions
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXDS : XX2Form<60, 344,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpsxds $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXWS : XX2Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPUXDS : XX2Form<60, 328,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpuxds $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPUXWS : XX2Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpuxws $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDP : XX2Form<60, 329,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvspdp $XT, $XB", IIC_VecFP, []>;
+ def XSCVSXDDP : XX2Form<60, 376,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvsxddp $XT, $XB", IIC_VecFP, []>;
+ def XSCVUXDDP : XX2Form<60, 360,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvuxddp $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXDS : XX2Form<60, 472,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXWS : XX2Form<60, 216,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXDS : XX2Form<60, 456,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXWS : XX2Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVSPDP : XX2Form<60, 457,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXDS : XX2Form<60, 408,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXWS : XX2Form<60, 152,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXDS : XX2Form<60, 392,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXWS : XX2Form<60, 136,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDDP : XX2Form<60, 504,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxddp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDSP : XX2Form<60, 440,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWSP : XX2Form<60, 184,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDDP : XX2Form<60, 488,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxddp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDSP : XX2Form<60, 424,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWDP : XX2Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+
+ // Rounding Instructions
+ def XSRDPI : XX2Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpi $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (frnd f64:$XB))]>;
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XSRDPIM : XX2Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpim $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ffloor f64:$XB))]>;
+ def XSRDPIP : XX2Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpip $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fceil f64:$XB))]>;
+ def XSRDPIZ : XX2Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpiz $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ftrunc f64:$XB))]>;
+
+ def XVRDPI : XX2Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpi $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRDPIM : XX2Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpim $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ def XVRDPIP : XX2Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpip $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ def XVRDPIZ : XX2Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpiz $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+
+ def XVRSPI : XX2Form<60, 137,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspi $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ def XVRSPIM : XX2Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspim $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ def XVRSPIP : XX2Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspip $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ def XVRSPIZ : XX2Form<60, 153,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspiz $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+
+ // Max/Min Instructions
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+} // Uses = [RM]
+
+ // Logical Instructions
+ def XXLAND : XX3Form<60, 130,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxland $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLANDC : XX3Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlandc $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLNOR : XX3Form<60, 162,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLOR : XX3Form<60, 146,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLXOR : XX3Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlxor $XT, $XA, $XB", IIC_VecGeneral, []>;
+
+ // Permutation Instructions
+ def XXMRGHW : XX3Form<60, 18,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
+ def XXMRGLW : XX3Form<60, 50,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
+
+ def XXPERMDI : XX3Form_2<60, 10,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ def XXSEL : XX4Form<60, 3,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
+ "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
+
+ def XXSLDWI : XX3Form_2<60, 2,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+} // neverHasSideEffects
+} // AddedComplexity
+
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (EXTRACT_SUBREG (v2f64 (COPY_TO_REGCLASS $S, VSLRC)), sub_64)>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (EXTRACT_SUBREG (v2f64 (COPY_TO_REGCLASS (XXPERMDI $S, $S, 3),
+ VSLRC)), sub_64)>;
+
+// Additional fnmsub patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+} // AddedComplexity
+} // HasVSX
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index a20b303d38..c68e922355 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -229,7 +229,11 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSHRCRegClassID:
return 32 - DefaultSafety;
+ case PPC::VSRCRegClassID:
+ return 64 - DefaultSafety;
case PPC::CRRCRegClassID:
return 8 - DefaultSafety;
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index f1ecda198f..339d4e4d71 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
+def sub_128 : SubRegIndex<128>;
}
@@ -52,6 +54,23 @@ class VR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
+// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
+// floating-point registers.
+class VSRL<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
+// registers.
+class VSRH<VR SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 1;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_128];
+}
+
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
@@ -86,6 +105,16 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// VSX registers
+foreach Index = 0-31 in {
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ DwarfRegAlias<!cast<FPR>("F"#Index)>;
+}
+foreach Index = 0-31 in {
+ def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
+ DwarfRegAlias<!cast<VR>("V"#Index)>;
+}
+
// The reprsentation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">;
def ZERO8 : GP8<ZERO, "0">;
@@ -204,6 +233,20 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+// VSX register classes (the allocation order mirrors that of the corresponding
+// subregister classes).
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
+ (add (sequence "VSL%u", 0, 13),
+ (sequence "VSL%u", 31, 14))>;
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
+ (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
+ VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
+ VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
+ VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
+ VSH22, VSH21, VSH20)>;
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
+ (add VSLRC, VSHRC)>;
+
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 87b7c9f957..b07abe4461 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -88,6 +88,7 @@ void PPCSubtarget::initializeEnvironment() {
UseCRBits = false;
HasAltivec = false;
HasQPX = false;
+ HasVSX = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e9f8310189..87e012ece1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -178,6 +178,7 @@ public:
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
+ bool hasVSX() const { return HasVSX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 36d2100ac5..2449abf2ca 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -160,6 +160,8 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
+ addPass(createPPCVSXCopyPass());
+
return false;
}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 984519c079..2491a9e539 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -139,7 +139,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
- return 32;
+ return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
@@ -208,6 +208,14 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
+ // Double-precision scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ }
+
// Estimated cost of a load-hit-store delay. This was obtained
// experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
new file mode 100644
index 0000000000..ba53b7968c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test1(double %a, double %b) {
+entry:
+ %v = fmul double %a, %b
+ ret double %v
+
+; CHECK-LABEL: @test1
+; CHECK: xsmuldp 1, 1, 2
+; CHECK: blr
+}
+
+define double @test2(double %a, double %b) {
+entry:
+ %v = fdiv double %a, %b
+ ret double %v
+
+; CHECK-LABEL: @test2
+; CHECK: xsdivdp 1, 1, 2
+; CHECK: blr
+}
+
+define double @test3(double %a, double %b) {
+entry:
+ %v = fadd double %a, %b
+ ret double %v
+
+; CHECK-LABEL: @test3
+; CHECK: xsadddp 1, 1, 2
+; CHECK: blr
+}
+
+define <2 x double> @test4(<2 x double> %a, <2 x double> %b) {
+entry:
+ %v = fadd <2 x double> %a, %b
+ ret <2 x double> %v
+
+; FIXME: Check that the ABI for the return value is correct here!
+
+; CHECK-LABEL: @test4
+; CHECK: xvadddp {{[0-9]+}}, 34, 35
+; CHECK: blr
+}
+
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt
new file mode 100644
index 0000000000..b5e2751225
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -0,0 +1,452 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+
+# CHECK: lxsdx 7, 5, 31
+0x7c 0xe5 0xfc 0x98
+
+# CHECK: lxvd2x 7, 5, 31
+0x7c 0xe5 0xfe 0x98
+
+# CHECK: lxvdsx 7, 5, 31
+0x7c 0xe5 0xfa 0x98
+
+# CHECK: lxvw4x 7, 5, 31
+0x7c 0xe5 0xfe 0x18
+
+# CHECK: stxsdx 8, 5, 31
+0x7d 0x05 0xfd 0x98
+
+# CHECK: stxvd2x 8, 5, 31
+0x7d 0x05 0xff 0x98
+
+# CHECK: stxvw4x 8, 5, 31
+0x7d 0x05 0xff 0x18
+
+# CHECK: xsabsdp 7, 27
+0xf0 0xe0 0xdd 0x64
+
+# CHECK: xsadddp 7, 63, 27
+0xf0 0xff 0xd9 0x04
+
+# CHECK: xscmpodp 6, 63, 27
+0xf3 0x1f 0xd9 0x5c
+
+# CHECK: xscmpudp 6, 63, 27
+0xf3 0x1f 0xd9 0x1c
+
+# CHECK: xscpsgndp 7, 63, 27
+0xf0 0xff 0xdd 0x84
+
+# CHECK: xscvdpsp 7, 27
+0xf0 0xe0 0xdc 0x24
+
+# CHECK: xscvdpsxds 7, 27
+0xf0 0xe0 0xdd 0x60
+
+# CHECK: xscvdpsxws 7, 27
+0xf0 0xe0 0xd9 0x60
+
+# CHECK: xscvdpuxds 7, 27
+0xf0 0xe0 0xdd 0x20
+
+# CHECK: xscvdpuxws 7, 27
+0xf0 0xe0 0xd9 0x20
+
+# CHECK: xscvspdp 7, 27
+0xf0 0xe0 0xdd 0x24
+
+# CHECK: xscvsxddp 7, 27
+0xf0 0xe0 0xdd 0xe0
+
+# CHECK: xscvuxddp 7, 27
+0xf0 0xe0 0xdd 0xa0
+
+# CHECK: xsdivdp 7, 63, 27
+0xf0 0xff 0xd9 0xc4
+
+# CHECK: xsmaddadp 7, 63, 27
+0xf0 0xff 0xd9 0x0c
+
+# CHECK: xsmaddmdp 7, 63, 27
+0xf0 0xff 0xd9 0x4c
+
+# CHECK: xsmaxdp 7, 63, 27
+0xf0 0xff 0xdd 0x04
+
+# CHECK: xsmindp 7, 63, 27
+0xf0 0xff 0xdd 0x44
+
+# CHECK: xsmsubadp 7, 63, 27
+0xf0 0xff 0xd9 0x8c
+
+# CHECK: xsmsubmdp 7, 63, 27
+0xf0 0xff 0xd9 0xcc
+
+# CHECK: xsmuldp 7, 63, 27
+0xf0 0xff 0xd9 0x84
+
+# CHECK: xsnabsdp 7, 27
+0xf0 0xe0 0xdd 0xa4
+
+# CHECK: xsnegdp 7, 27
+0xf0 0xe0 0xdd 0xe4
+
+# CHECK: xsnmaddadp 7, 63, 27
+0xf0 0xff 0xdd 0x0c
+
+# CHECK: xsnmaddmdp 7, 63, 27
+0xf0 0xff 0xdd 0x4c
+
+# CHECK: xsnmsubadp 7, 63, 27
+0xf0 0xff 0xdd 0x8c
+
+# CHECK: xsnmsubmdp 7, 63, 27
+0xf0 0xff 0xdd 0xcc
+
+# CHECK: xsrdpi 7, 27
+0xf0 0xe0 0xd9 0x24
+
+# CHECK: xsrdpic 7, 27
+0xf0 0xe0 0xd9 0xac
+
+# CHECK: xsrdpim 7, 27
+0xf0 0xe0 0xd9 0xe4
+
+# CHECK: xsrdpip 7, 27
+0xf0 0xe0 0xd9 0xa4
+
+# CHECK: xsrdpiz 7, 27
+0xf0 0xe0 0xd9 0x64
+
+# CHECK: xsredp 7, 27
+0xf0 0xe0 0xd9 0x68
+
+# CHECK: xsrsqrtedp 7, 27
+0xf0 0xe0 0xd9 0x28
+
+# CHECK: xssqrtdp 7, 27
+0xf0 0xe0 0xd9 0x2c
+
+# CHECK: xssubdp 7, 63, 27
+0xf0 0xff 0xd9 0x44
+
+# CHECK: xstdivdp 6, 63, 27
+0xf3 0x1f 0xd9 0xec
+
+# CHECK: xstsqrtdp 6, 27
+0xf3 0x00 0xd9 0xa8
+
+# CHECK: xvabsdp 7, 27
+0xf0 0xe0 0xdf 0x64
+
+# CHECK: xvabssp 7, 27
+0xf0 0xe0 0xde 0x64
+
+# CHECK: xvadddp 7, 63, 27
+0xf0 0xff 0xdb 0x04
+
+# CHECK: xvaddsp 7, 63, 27
+0xf0 0xff 0xda 0x04
+
+# CHECK: xvcmpeqdp 7, 63, 27
+0xf0 0xff 0xdb 0x1c
+
+# CHECK: xvcmpeqdp. 7, 63, 27
+0xf0 0xff 0xdf 0x1c
+
+# CHECK: xvcmpeqsp 7, 63, 27
+0xf0 0xff 0xda 0x1c
+
+# CHECK: xvcmpeqsp. 7, 63, 27
+0xf0 0xff 0xde 0x1c
+
+# CHECK: xvcmpgedp 7, 63, 27
+0xf0 0xff 0xdb 0x9c
+
+# CHECK: xvcmpgedp. 7, 63, 27
+0xf0 0xff 0xdf 0x9c
+
+# CHECK: xvcmpgesp 7, 63, 27
+0xf0 0xff 0xda 0x9c
+
+# CHECK: xvcmpgesp. 7, 63, 27
+0xf0 0xff 0xde 0x9c
+
+# CHECK: xvcmpgtdp 7, 63, 27
+0xf0 0xff 0xdb 0x5c
+
+# CHECK: xvcmpgtdp. 7, 63, 27
+0xf0 0xff 0xdf 0x5c
+
+# CHECK: xvcmpgtsp 7, 63, 27
+0xf0 0xff 0xda 0x5c
+
+# CHECK: xvcmpgtsp. 7, 63, 27
+0xf0 0xff 0xde 0x5c
+
+# CHECK: xvcpsgndp 7, 63, 27
+0xf0 0xff 0xdf 0x84
+
+# CHECK: xvcpsgnsp 7, 63, 27
+0xf0 0xff 0xde 0x84
+
+# CHECK: xvcvdpsp 7, 27
+0xf0 0xe0 0xde 0x24
+
+# CHECK: xvcvdpsxds 7, 27
+0xf0 0xe0 0xdf 0x60
+
+# CHECK: xvcvdpsxws 7, 27
+0xf0 0xe0 0xdb 0x60
+
+# CHECK: xvcvdpuxds 7, 27
+0xf0 0xe0 0xdf 0x20
+
+# CHECK: xvcvdpuxws 7, 27
+0xf0 0xe0 0xdb 0x20
+
+# CHECK: xvcvspdp 7, 27
+0xf0 0xe0 0xdf 0x24
+
+# CHECK: xvcvspsxds 7, 27
+0xf0 0xe0 0xde 0x60
+
+# CHECK: xvcvspsxws 7, 27
+0xf0 0xe0 0xda 0x60
+
+# CHECK: xvcvspuxds 7, 27
+0xf0 0xe0 0xde 0x20
+
+# CHECK: xvcvspuxws 7, 27
+0xf0 0xe0 0xda 0x20
+
+# CHECK: xvcvsxddp 7, 27
+0xf0 0xe0 0xdf 0xe0
+
+# CHECK: xvcvsxdsp 7, 27
+0xf0 0xe0 0xde 0xe0
+
+# CHECK: xvcvsxwdp 7, 27
+0xf0 0xe0 0xdb 0xe0
+
+# CHECK: xvcvsxwsp 7, 27
+0xf0 0xe0 0xda 0xe0
+
+# CHECK: xvcvuxddp 7, 27
+0xf0 0xe0 0xdf 0xa0
+
+# CHECK: xvcvuxdsp 7, 27
+0xf0 0xe0 0xde 0xa0
+
+# CHECK: xvcvuxwdp 7, 27
+0xf0 0xe0 0xdb 0xa0
+
+# CHECK: xvcvuxwsp 7, 27
+0xf0 0xe0 0xda 0xa0
+
+# CHECK: xvdivdp 7, 63, 27
+0xf0 0xff 0xdb 0xc4
+
+# CHECK: xvdivsp 7, 63, 27
+0xf0 0xff 0xda 0xc4
+
+# CHECK: xvmaddadp 7, 63, 27
+0xf0 0xff 0xdb 0x0c
+
+# CHECK: xvmaddasp 7, 63, 27
+0xf0 0xff 0xda 0x0c
+
+# CHECK: xvmaddmdp 7, 63, 27
+0xf0 0xff 0xdb 0x4c
+
+# CHECK: xvmaddmsp 7, 63, 27
+0xf0 0xff 0xda 0x4c
+
+# CHECK: xvmaxdp 7, 63, 27
+0xf0 0xff 0xdf 0x04
+
+# CHECK: xvmaxsp 7, 63, 27
+0xf0 0xff 0xde 0x04
+
+# CHECK: xvmindp 7, 63, 27
+0xf0 0xff 0xdf 0x44
+
+# CHECK: xvminsp 7, 63, 27
+0xf0 0xff 0xde 0x44
+
+# FIXME: decode as xvmovdp 7, 63
+# CHECK: xvcpsgndp 7, 63, 63
+0xf0 0xff 0xff 0x86
+
+# FIXME: decode as xvmovsp 7, 63
+# CHECK: xvcpsgnsp 7, 63, 63
+0xf0 0xff 0xfe 0x86
+
+# CHECK: xvmsubadp 7, 63, 27
+0xf0 0xff 0xdb 0x8c
+
+# CHECK: xvmsubasp 7, 63, 27
+0xf0 0xff 0xda 0x8c
+
+# CHECK: xvmsubmdp 7, 63, 27
+0xf0 0xff 0xdb 0xcc
+
+# CHECK: xvmsubmsp 7, 63, 27
+0xf0 0xff 0xda 0xcc
+
+# CHECK: xvmuldp 7, 63, 27
+0xf0 0xff 0xdb 0x84
+
+# CHECK: xvmulsp 7, 63, 27
+0xf0 0xff 0xda 0x84
+
+# CHECK: xvnabsdp 7, 27
+0xf0 0xe0 0xdf 0xa4
+
+# CHECK: xvnabssp 7, 27
+0xf0 0xe0 0xde 0xa4
+
+# CHECK: xvnegdp 7, 27
+0xf0 0xe0 0xdf 0xe4
+
+# CHECK: xvnegsp 7, 27
+0xf0 0xe0 0xde 0xe4
+
+# CHECK: xvnmaddadp 7, 63, 27
+0xf0 0xff 0xdf 0x0c
+
+# CHECK: xvnmaddasp 7, 63, 27
+0xf0 0xff 0xde 0x0c
+
+# CHECK: xvnmaddmdp 7, 63, 27
+0xf0 0xff 0xdf 0x4c
+
+# CHECK: xvnmaddmsp 7, 63, 27
+0xf0 0xff 0xde 0x4c
+
+# CHECK: xvnmsubadp 7, 63, 27
+0xf0 0xff 0xdf 0x8c
+
+# CHECK: xvnmsubasp 7, 63, 27
+0xf0 0xff 0xde 0x8c
+
+# CHECK: xvnmsubmdp 7, 63, 27
+0xf0 0xff 0xdf 0xcc
+
+# CHECK: xvnmsubmsp 7, 63, 27
+0xf0 0xff 0xde 0xcc
+
+# CHECK: xvrdpi 7, 27
+0xf0 0xe0 0xdb 0x24
+
+# CHECK: xvrdpic 7, 27
+0xf0 0xe0 0xdb 0xac
+
+# CHECK: xvrdpim 7, 27
+0xf0 0xe0 0xdb 0xe4
+
+# CHECK: xvrdpip 7, 27
+0xf0 0xe0 0xdb 0xa4
+
+# CHECK: xvrdpiz 7, 27
+0xf0 0xe0 0xdb 0x64
+
+# CHECK: xvredp 7, 27
+0xf0 0xe0 0xdb 0x68
+
+# CHECK: xvresp 7, 27
+0xf0 0xe0 0xda 0x68
+
+# CHECK: xvrspi 7, 27
+0xf0 0xe0 0xda 0x24
+
+# CHECK: xvrspic 7, 27
+0xf0 0xe0 0xda 0xac
+
+# CHECK: xvrspim 7, 27
+0xf0 0xe0 0xda 0xe4
+
+# CHECK: xvrspip 7, 27
+0xf0 0xe0 0xda 0xa4
+
+# CHECK: xvrspiz 7, 27
+0xf0 0xe0 0xda 0x64
+
+# CHECK: xvrsqrtedp 7, 27
+0xf0 0xe0 0xdb 0x28
+
+# CHECK: xvrsqrtesp 7, 27
+0xf0 0xe0 0xda 0x28
+
+# CHECK: xvsqrtdp 7, 27
+0xf0 0xe0 0xdb 0x2c
+
+# CHECK: xvsqrtsp 7, 27
+0xf0 0xe0 0xda 0x2c
+
+# CHECK: xvsubdp 7, 63, 27
+0xf0 0xff 0xdb 0x44
+
+# CHECK: xvsubsp 7, 63, 27
+0xf0 0xff 0xda 0x44
+
+# CHECK: xvtdivdp 6, 63, 27
+0xf3 0x1f 0xdb 0xec
+
+# CHECK: xvtdivsp 6, 63, 27
+0xf3 0x1f 0xda 0xec
+
+# CHECK: xvtsqrtdp 6, 27
+0xf3 0x00 0xdb 0xa8
+
+# CHECK: xvtsqrtsp 6, 27
+0xf3 0x00 0xda 0xa8
+
+# CHECK: xxland 7, 63, 27
+0xf0 0xff 0xdc 0x14
+
+# CHECK: xxlandc 7, 63, 27
+0xf0 0xff 0xdc 0x54
+
+# CHECK: xxlnor 7, 63, 27
+0xf0 0xff 0xdd 0x14
+
+# CHECK: xxlor 7, 63, 27
+0xf0 0xff 0xdc 0x94
+
+# CHECK: xxlxor 7, 63, 27
+0xf0 0xff 0xdc 0xd4
+
+# FIXME: decode as xxmrghd 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 0
+0xf0 0xff 0xd8 0x54
+
+# CHECK: xxmrghw 7, 63, 27
+0xf0 0xff 0xd8 0x94
+
+# FIXME: decode as xxmrgld 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 3
+0xf0 0xff 0xdb 0x54
+
+# CHECK: xxmrglw 7, 63, 27
+0xf0 0xff 0xd9 0x94
+
+# CHECK: xxpermdi 7, 63, 27, 2
+0xf0 0xff 0xda 0x54
+
+# CHECK: xxsel 7, 63, 27, 14
+0xf0 0xff 0xdb 0xb4
+
+# CHECK: xxsldwi 7, 63, 27, 1
+0xf0 0xff 0xd9 0x14
+
+# FIXME: decode as xxspltd 7, 63, 1
+# CHECK: xxpermdi 7, 63, 63, 3
+0xf0 0xff 0xfb 0x56
+
+# CHECK: xxspltw 7, 27, 3
+0xf0 0xe3 0xda 0x90
+
+# FIXME: decode as xxswapd 7, 63
+# CHECK: xxpermdi 7, 63, 63, 2
+0xf0 0xff 0xfa 0x56
+
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
new file mode 100644
index 0000000000..394d1881ae
--- /dev/null
+++ b/test/MC/PowerPC/vsx.s
@@ -0,0 +1,298 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | FileCheck %s
+
+# CHECK: lxsdx 7, 5, 31 # encoding: [0x7c,0xe5,0xfc,0x98]
+ lxsdx 7, 5, 31
+# CHECK: lxvd2x 7, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x98]
+ lxvd2x 7, 5, 31
+# CHECK: lxvdsx 7, 5, 31 # encoding: [0x7c,0xe5,0xfa,0x98]
+ lxvdsx 7, 5, 31
+# CHECK: lxvw4x 7, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x18]
+ lxvw4x 7, 5, 31
+# CHECK: stxsdx 8, 5, 31 # encoding: [0x7d,0x05,0xfd,0x98]
+ stxsdx 8, 5, 31
+# CHECK: stxvd2x 8, 5, 31 # encoding: [0x7d,0x05,0xff,0x98]
+ stxvd2x 8, 5, 31
+# CHECK: stxvw4x 8, 5, 31 # encoding: [0x7d,0x05,0xff,0x18]
+ stxvw4x 8, 5, 31
+# CHECK: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64]
+ xsabsdp 7, 27
+# CHECK: xsadddp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x04]
+ xsadddp 7, 63, 27
+# CHECK: xscmpodp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0x5c]
+ xscmpodp 6, 63, 27
+# CHECK: xscmpudp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0x1c]
+ xscmpudp 6, 63, 27
+# CHECK: xscpsgndp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x84]
+ xscpsgndp 7, 63, 27
+# CHECK: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24]
+ xscvdpsp 7, 27
+# CHECK: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60]
+ xscvdpsxds 7, 27
+# CHECK: xscvdpsxws 7, 27 # encoding: [0xf0,0xe0,0xd9,0x60]
+ xscvdpsxws 7, 27
+# CHECK: xscvdpuxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x20]
+ xscvdpuxds 7, 27
+# CHECK: xscvdpuxws 7, 27 # encoding: [0xf0,0xe0,0xd9,0x20]
+ xscvdpuxws 7, 27
+# CHECK: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24]
+ xscvspdp 7, 27
+# CHECK: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0]
+ xscvsxddp 7, 27
+# CHECK: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
+ xscvuxddp 7, 27
+# CHECK: xsdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xc4]
+ xsdivdp 7, 63, 27
+# CHECK: xsmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x0c]
+ xsmaddadp 7, 63, 27
+# CHECK: xsmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x4c]
+ xsmaddmdp 7, 63, 27
+# CHECK: xsmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x04]
+ xsmaxdp 7, 63, 27
+# CHECK: xsmindp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x44]
+ xsmindp 7, 63, 27
+# CHECK: xsmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x8c]
+ xsmsubadp 7, 63, 27
+# CHECK: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc]
+ xsmsubmdp 7, 63, 27
+# CHECK: xsmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x84]
+ xsmuldp 7, 63, 27
+# CHECK: xsnabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa4]
+ xsnabsdp 7, 27
+# CHECK: xsnegdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe4]
+ xsnegdp 7, 27
+# CHECK: xsnmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x0c]
+ xsnmaddadp 7, 63, 27
+# CHECK: xsnmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x4c]
+ xsnmaddmdp 7, 63, 27
+# CHECK: xsnmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x8c]
+ xsnmsubadp 7, 63, 27
+# CHECK: xsnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0xcc]
+ xsnmsubmdp 7, 63, 27
+# CHECK: xsrdpi 7, 27 # encoding: [0xf0,0xe0,0xd9,0x24]
+ xsrdpi 7, 27
+# CHECK: xsrdpic 7, 27 # encoding: [0xf0,0xe0,0xd9,0xac]
+ xsrdpic 7, 27
+# CHECK: xsrdpim 7, 27 # encoding: [0xf0,0xe0,0xd9,0xe4]
+ xsrdpim 7, 27
+# CHECK: xsrdpip 7, 27 # encoding: [0xf0,0xe0,0xd9,0xa4]
+ xsrdpip 7, 27
+# CHECK: xsrdpiz 7, 27 # encoding: [0xf0,0xe0,0xd9,0x64]
+ xsrdpiz 7, 27
+# CHECK: xsredp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x68]
+ xsredp 7, 27
+# CHECK: xsrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x28]
+ xsrsqrtedp 7, 27
+# CHECK: xssqrtdp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x2c]
+ xssqrtdp 7, 27
+# CHECK: xssubdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x44]
+ xssubdp 7, 63, 27
+# CHECK: xstdivdp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0xec]
+ xstdivdp 6, 63, 27
+# CHECK: xstsqrtdp 6, 27 # encoding: [0xf3,0x00,0xd9,0xa8]
+ xstsqrtdp 6, 27
+# CHECK: xvabsdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0x64]
+ xvabsdp 7, 27
+# CHECK: xvabssp 7, 27 # encoding: [0xf0,0xe0,0xde,0x64]
+ xvabssp 7, 27
+# CHECK: xvadddp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x04]
+ xvadddp 7, 63, 27
+# CHECK: xvaddsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x04]
+ xvaddsp 7, 63, 27
+# CHECK: xvcmpeqdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x1c]
+ xvcmpeqdp 7, 63, 27
+# CHECK: xvcmpeqdp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x1c]
+ xvcmpeqdp. 7, 63, 27
+# CHECK: xvcmpeqsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x1c]
+ xvcmpeqsp 7, 63, 27
+# CHECK: xvcmpeqsp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x1c]
+ xvcmpeqsp. 7, 63, 27
+# CHECK: xvcmpgedp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x9c]
+ xvcmpgedp 7, 63, 27
+# CHECK: xvcmpgedp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x9c]
+ xvcmpgedp. 7, 63, 27
+# CHECK: xvcmpgesp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x9c]
+ xvcmpgesp 7, 63, 27
+# CHECK: xvcmpgesp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x9c]
+ xvcmpgesp. 7, 63, 27
+# CHECK: xvcmpgtdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x5c]
+ xvcmpgtdp 7, 63, 27
+# CHECK: xvcmpgtdp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x5c]
+ xvcmpgtdp. 7, 63, 27
+# CHECK: xvcmpgtsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x5c]
+ xvcmpgtsp 7, 63, 27
+# CHECK: xvcmpgtsp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x5c]
+ xvcmpgtsp. 7, 63, 27
+# CHECK: xvcpsgndp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x84]
+ xvcpsgndp 7, 63, 27
+# CHECK: xvcpsgnsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x84]
+ xvcpsgnsp 7, 63, 27
+# CHECK: xvcvdpsp 7, 27 # encoding: [0xf0,0xe0,0xde,0x24]
+ xvcvdpsp 7, 27
+# CHECK: xvcvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdf,0x60]
+ xvcvdpsxds 7, 27
+# CHECK: xvcvdpsxws 7, 27 # encoding: [0xf0,0xe0,0xdb,0x60]
+ xvcvdpsxws 7, 27
+# CHECK: xvcvdpuxds 7, 27 # encoding: [0xf0,0xe0,0xdf,0x20]
+ xvcvdpuxds 7, 27
+# CHECK: xvcvdpuxws 7, 27 # encoding: [0xf0,0xe0,0xdb,0x20]
+ xvcvdpuxws 7, 27
+# CHECK: xvcvspdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0x24]
+ xvcvspdp 7, 27
+# CHECK: xvcvspsxds 7, 27 # encoding: [0xf0,0xe0,0xde,0x60]
+ xvcvspsxds 7, 27
+# CHECK: xvcvspsxws 7, 27 # encoding: [0xf0,0xe0,0xda,0x60]
+ xvcvspsxws 7, 27
+# CHECK: xvcvspuxds 7, 27 # encoding: [0xf0,0xe0,0xde,0x20]
+ xvcvspuxds 7, 27
+# CHECK: xvcvspuxws 7, 27 # encoding: [0xf0,0xe0,0xda,0x20]
+ xvcvspuxws 7, 27
+# CHECK: xvcvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xe0]
+ xvcvsxddp 7, 27
+# CHECK: xvcvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xe0]
+ xvcvsxdsp 7, 27
+# CHECK: xvcvsxwdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0xe0]
+ xvcvsxwdp 7, 27
+# CHECK: xvcvsxwsp 7, 27 # encoding: [0xf0,0xe0,0xda,0xe0]
+ xvcvsxwsp 7, 27
+# CHECK: xvcvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xa0]
+ xvcvuxddp 7, 27
+# CHECK: xvcvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xa0]
+ xvcvuxdsp 7, 27
+# CHECK: xvcvuxwdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0xa0]
+ xvcvuxwdp 7, 27
+# CHECK: xvcvuxwsp 7, 27 # encoding: [0xf0,0xe0,0xda,0xa0]
+ xvcvuxwsp 7, 27
+# CHECK: xvdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0xc4]
+ xvdivdp 7, 63, 27
+# CHECK: xvdivsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0xc4]
+ xvdivsp 7, 63, 27
+# CHECK: xvmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x0c]
+ xvmaddadp 7, 63, 27
+# CHECK: xvmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x0c]
+ xvmaddasp 7, 63, 27
+# CHECK: xvmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x4c]
+ xvmaddmdp 7, 63, 27
+# CHECK: xvmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x4c]
+ xvmaddmsp 7, 63, 27
+# CHECK: xvmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x04]
+ xvmaxdp 7, 63, 27
+# CHECK: xvmaxsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x04]
+ xvmaxsp 7, 63, 27
+# CHECK: xvmindp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x44]
+ xvmindp 7, 63, 27
+# CHECK: xvminsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x44]
+ xvminsp 7, 63, 27
+# CHECK: xvcpsgndp 7, 63, 63 # encoding: [0xf0,0xff,0xff,0x86]
+ xvmovdp 7, 63
+# CHECK: xvcpsgnsp 7, 63, 63 # encoding: [0xf0,0xff,0xfe,0x86]
+ xvmovsp 7, 63
+# CHECK: xvmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x8c]
+ xvmsubadp 7, 63, 27
+# CHECK: xvmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x8c]
+ xvmsubasp 7, 63, 27
+# CHECK: xvmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0xcc]
+ xvmsubmdp 7, 63, 27
+# CHECK: xvmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0xcc]
+ xvmsubmsp 7, 63, 27
+# CHECK: xvmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x84]
+ xvmuldp 7, 63, 27
+# CHECK: xvmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x84]
+ xvmulsp 7, 63, 27
+# CHECK: xvnabsdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xa4]
+ xvnabsdp 7, 27
+# CHECK: xvnabssp 7, 27 # encoding: [0xf0,0xe0,0xde,0xa4]
+ xvnabssp 7, 27
+# CHECK: xvnegdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xe4]
+ xvnegdp 7, 27
+# CHECK: xvnegsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xe4]
+ xvnegsp 7, 27
+# CHECK: xvnmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x0c]
+ xvnmaddadp 7, 63, 27
+# CHECK: xvnmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x0c]
+ xvnmaddasp 7, 63, 27
+# CHECK: xvnmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x4c]
+ xvnmaddmdp 7, 63, 27
+# CHECK: xvnmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x4c]
+ xvnmaddmsp 7, 63, 27
+# CHECK: xvnmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x8c]
+ xvnmsubadp 7, 63, 27
+# CHECK: xvnmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x8c]
+ xvnmsubasp 7, 63, 27
+# CHECK: xvnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0xcc]
+ xvnmsubmdp 7, 63, 27
+# CHECK: xvnmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0xcc]
+ xvnmsubmsp 7, 63, 27
+# CHECK: xvrdpi 7, 27 # encoding: [0xf0,0xe0,0xdb,0x24]
+ xvrdpi 7, 27
+# CHECK: xvrdpic 7, 27 # encoding: [0xf0,0xe0,0xdb,0xac]
+ xvrdpic 7, 27
+# CHECK: xvrdpim 7, 27 # encoding: [0xf0,0xe0,0xdb,0xe4]
+ xvrdpim 7, 27
+# CHECK: xvrdpip 7, 27 # encoding: [0xf0,0xe0,0xdb,0xa4]
+ xvrdpip 7, 27
+# CHECK: xvrdpiz 7, 27 # encoding: [0xf0,0xe0,0xdb,0x64]
+ xvrdpiz 7, 27
+# CHECK: xvredp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x68]
+ xvredp 7, 27
+# CHECK: xvresp 7, 27 # encoding: [0xf0,0xe0,0xda,0x68]
+ xvresp 7, 27
+# CHECK: xvrspi 7, 27 # encoding: [0xf0,0xe0,0xda,0x24]
+ xvrspi 7, 27
+# CHECK: xvrspic 7, 27 # encoding: [0xf0,0xe0,0xda,0xac]
+ xvrspic 7, 27
+# CHECK: xvrspim 7, 27 # encoding: [0xf0,0xe0,0xda,0xe4]
+ xvrspim 7, 27
+# CHECK: xvrspip 7, 27 # encoding: [0xf0,0xe0,0xda,0xa4]
+ xvrspip 7, 27
+# CHECK: xvrspiz 7, 27 # encoding: [0xf0,0xe0,0xda,0x64]
+ xvrspiz 7, 27
+# CHECK: xvrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x28]
+ xvrsqrtedp 7, 27
+# CHECK: xvrsqrtesp 7, 27 # encoding: [0xf0,0xe0,0xda,0x28]
+ xvrsqrtesp 7, 27
+# CHECK: xvsqrtdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x2c]
+ xvsqrtdp 7, 27
+# CHECK: xvsqrtsp 7, 27 # encoding: [0xf0,0xe0,0xda,0x2c]
+ xvsqrtsp 7, 27
+# CHECK: xvsubdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x44]
+ xvsubdp 7, 63, 27
+# CHECK: xvsubsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x44]
+ xvsubsp 7, 63, 27
+# CHECK: xvtdivdp 6, 63, 27 # encoding: [0xf3,0x1f,0xdb,0xec]
+ xvtdivdp 6, 63, 27
+# CHECK: xvtdivsp 6, 63, 27 # encoding: [0xf3,0x1f,0xda,0xec]
+ xvtdivsp 6, 63, 27
+# CHECK: xvtsqrtdp 6, 27 # encoding: [0xf3,0x00,0xdb,0xa8]
+ xvtsqrtdp 6, 27
+# CHECK: xvtsqrtsp 6, 27 # encoding: [0xf3,0x00,0xda,0xa8]
+ xvtsqrtsp 6, 27
+# CHECK: xxland 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x14]
+ xxland 7, 63, 27
+# CHECK: xxlandc 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x54]
+ xxlandc 7, 63, 27
+# CHECK: xxlnor 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x14]
+ xxlnor 7, 63, 27
+# CHECK: xxlor 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x94]
+ xxlor 7, 63, 27
+# CHECK: xxlxor 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0xd4]
+ xxlxor 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 0 # encoding: [0xf0,0xff,0xd8,0x54]
+ xxmrghd 7, 63, 27
+# CHECK: xxmrghw 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x94]
+ xxmrghw 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 3 # encoding: [0xf0,0xff,0xdb,0x54]
+ xxmrgld 7, 63, 27
+# CHECK: xxmrglw 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x94]
+ xxmrglw 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 2 # encoding: [0xf0,0xff,0xda,0x54]
+ xxpermdi 7, 63, 27, 2
+# CHECK: xxsel 7, 63, 27, 14 # encoding: [0xf0,0xff,0xdb,0xb4]
+ xxsel 7, 63, 27, 14
+# CHECK: xxsldwi 7, 63, 27, 1 # encoding: [0xf0,0xff,0xd9,0x14]
+ xxsldwi 7, 63, 27, 1
+# CHECK: xxpermdi 7, 63, 63, 3 # encoding: [0xf0,0xff,0xfb,0x56]
+ xxspltd 7, 63, 1
+# CHECK: xxspltw 7, 27, 3 # encoding: [0xf0,0xe3,0xda,0x90]
+ xxspltw 7, 27, 3
+# CHECK: xxpermdi 7, 63, 63, 2 # encoding: [0xf0,0xff,0xfa,0x56]
+ xxswapd 7, 63