summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp31
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp26
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp12
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/PPC.h1
-rw-r--r--lib/Target/PowerPC/PPC.td3
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp38
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp65
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td167
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp199
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td9
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td679
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td43
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp10
-rw-r--r--test/CodeGen/PowerPC/vsx.ll46
-rw-r--r--test/MC/Disassembler/PowerPC/vsx.txt452
-rw-r--r--test/MC/PowerPC/vsx.s298
22 files changed, 2071 insertions, 23 deletions
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 21af0374a0..5f0109a1f3 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -95,6 +95,25 @@ static unsigned VRegs[32] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static unsigned VSRegs[64] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
static unsigned CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -345,6 +364,11 @@ public:
return (unsigned) Imm.Val;
}
+ unsigned getVSReg() const {
+ assert(isVSRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
@@ -362,6 +386,7 @@ public:
bool isToken() const { return Kind == Token; }
bool isImm() const { return Kind == Immediate || Kind == Expression; }
+ bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
@@ -382,6 +407,7 @@ public:
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
bool isCCRegNumber() const { return (Kind == Expression
&& isUInt<3>(getExprCRVal())) ||
(Kind == Immediate
@@ -448,6 +474,11 @@ public:
Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
}
+ void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+ }
+
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 23866dc70b..904f871b81 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -92,6 +92,26 @@ static const unsigned VRegs[] = {
PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+static const unsigned VSRegs[] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+
static const unsigned GPRegs[] = {
PPC::R0, PPC::R1, PPC::R2, PPC::R3,
PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -163,6 +183,12 @@ static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, VRegs);
}
+static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRegs);
+}
+
static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index b1c480b4aa..dc54b52a24 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -199,6 +199,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 3 && "Invalid u2imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
int Value = MI->getOperand(OpNo).getImm();
@@ -316,7 +323,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8a4c03d645..4d1df78ee4 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,7 +43,7 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = 0);
-
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index f0d5af2446..2fad831241 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -35,6 +35,7 @@ namespace llvm {
FunctionPass *createPPCCTRLoopsVerify();
#endif
FunctionPass *createPPCEarlyReturnPass();
+ FunctionPass *createPPCVSXCopyPass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 2a9f65a6b1..5cb7eca4a4 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -90,7 +90,8 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
- "Enable VSX instructions">;
+ "Enable VSX instructions",
+ [FeatureAltivec]>;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 2a2ee25c5a..daf90c86ef 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -129,7 +129,10 @@ static const char *stripRegisterPrefix(const char *RegName) {
switch (RegName[0]) {
case 'r':
case 'f':
- case 'v': return RegName + 1;
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 65ea49a8e2..5b09588e0f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -572,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
Opc = PPC::FCMPUS;
} else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
- Opc = PPC::FCMPUD;
+ Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
@@ -640,7 +640,8 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
+static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
+ bool HasVSX) {
switch (CC) {
case ISD::SETEQ:
case ISD::SETUEQ:
@@ -654,7 +655,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
return PPC::VCMPEQUW;
// v4f32 != v4f32 could be translate to unordered not equal
else if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
break;
case ISD::SETLT:
case ISD::SETGT:
@@ -667,7 +670,9 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
else if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
break;
case ISD::SETULT:
case ISD::SETUGT:
@@ -682,17 +687,23 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
break;
case ISD::SETOEQ:
if (VecVT == MVT::v4f32)
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
break;
case ISD::SETOLT:
case ISD::SETOGT:
case ISD::SETOLE:
if (VecVT == MVT::v4f32)
- return PPC::VCMPGTFP;
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
break;
case ISD::SETOGE:
if (VecVT == MVT::v4f32)
- return PPC::VCMPGEFP;
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
break;
default:
break;
@@ -703,7 +714,7 @@ static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
// getVCmpEQInst: return the equal compare instruction for the specified vector
// type. Since this is for altivec specific code, only support the altivec
// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
+static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
switch (VecVT) {
case MVT::v16i8:
return PPC::VCMPEQUB;
@@ -712,13 +723,14 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
case MVT::v4i32:
return PPC::VCMPEQUW;
case MVT::v4f32:
- return PPC::VCMPEQFP;
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ case MVT::v2f64:
+ return PPC::XVCMPEQDP;
default:
llvm_unreachable("Invalid integer vector compare condition");
}
}
-
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDLoc dl(N);
unsigned Imm;
@@ -808,7 +820,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC);
+ unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX());
switch (CC) {
case ISD::SETEQ:
@@ -839,7 +851,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
}
@@ -848,7 +860,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETOLE:
case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT);
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 680112da9a..a35c83c19d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -505,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath) {
+ if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -532,6 +532,40 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget->hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ addRegisterClass(MVT::f64, &PPC::VSRCRegClass);
+
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+ }
}
if (Subtarget->has64BitSupport()) {
@@ -2094,6 +2128,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
+ case MVT::v2f64:
RC = &PPC::VRRCRegClass;
break;
}
@@ -2340,7 +2375,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
+ ObjectVT==MVT::v2f64) {
if (isVarArg) {
MinReservedArea = ((MinReservedArea+15)/16)*16;
MinReservedArea += CalculateStackSlotSize(ObjectVT,
@@ -2497,6 +2533,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
@@ -2959,7 +2996,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
+ ArgVT==MVT::v2f64) {
if (!isVarArg && !isPPC64) {
// Non-varargs Altivec parameters go after all the non-Altivec
// parameters; handle those later so we know how much padding we need.
@@ -4143,6 +4181,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2f64:
if (isVarArg) {
// These go aligned on the stack, or in the corresponding R registers
// when within range. The Darwin PPC ABI doc claims they also go in
@@ -6917,7 +6956,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -6979,7 +7019,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
(VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+ (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -7891,6 +7932,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ // FIXME: Update this for VSX!
(VT == MVT::v16i8 || VT == MVT::v8i16 ||
VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
@@ -8314,6 +8356,9 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
}
} else if (Constraint == "wc") { // individual CR bits.
return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -8335,6 +8380,13 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
// Look at the constraint type.
if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
switch (*constraint) {
default:
@@ -8393,6 +8445,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
} else if (Constraint == "wc") { // an individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
}
std::pair<unsigned, const TargetRegisterClass*> R =
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 9c7b17a4c2..7fed2c65da 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -593,6 +593,173 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+// XX*-Form (VSX)
+class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-13} = 0;
+ let Inst{14-15} = D;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = 0;
+ let Inst{22-23} = D;
+ let Inst{24-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = RC;
+ let Inst{22-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-25} = XC{4-0};
+ let Inst{26-27} = xo;
+ let Inst{28} = XC{5};
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d9907a0baf..408dd06b22 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -165,6 +165,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case PPC::RESTORE_CR:
case PPC::RESTORE_CRBIT:
case PPC::LVX:
+ case PPC::LXVD2X:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -190,6 +191,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case PPC::SPILL_CR:
case PPC::SPILL_CRBIT:
case PPC::STVX:
+ case PPC::STXVD2X:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
@@ -655,6 +657,47 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
+ // We can end up with self copies and similar things as a result of VSX copy
+ // legalization. Promote (or just ignore) them here.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (PPC::F8RCRegClass.contains(DestReg) &&
+ PPC::VSLRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (SrcReg == SuperReg)
+ return;
+
+ DestReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(DestReg) &&
+ PPC::VSHRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (SrcReg == SuperReg)
+ return;
+
+ DestReg = SuperReg;
+ } else if (PPC::F8RCRegClass.contains(SrcReg) &&
+ PPC::VSLRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (DestReg == SuperReg)
+ return;
+
+ SrcReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(SrcReg) &&
+ PPC::VSHRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (DestReg == SuperReg)
+ return;
+
+ SrcReg = SuperReg;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -666,6 +709,14 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::MCRF;
else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::VOR;
+ else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
+ // FIXME: There are really two different ways this can be done, and we
+ // should pick the better one depending on the situation:
+ // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
+ // issue in VSU pipeline 0.
+ // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
+ // can go to either pipeline.
+ Opc = PPC::XXLOR;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
@@ -731,6 +782,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -818,6 +875,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
FrameIdx));
NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ FrameIdx));
+ NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
assert(TM.getSubtargetImpl()->isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
@@ -1485,6 +1546,144 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
#undef DEBUG_TYPE
#define DEBUG_TYPE "ppc-early-ret"
STATISTIC(NumBCLR, "Number of early conditional returns");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 14c70a23b0..deb8b4617e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -412,6 +412,14 @@ def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+}
def PPCS5ImmAsmOperand : AsmOperandClass {
let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
let RenderMethod = "addImmOperands";
@@ -2431,6 +2439,7 @@ def : Pat<(fcopysign f32:$frB, f64:$frA),
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
+include "PPCInstrVSX.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
new file mode 100644
index 0000000000..cfc526ef80
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -0,0 +1,679 @@
+//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VSX extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>;
+ let Defs = [CR6] in
+ def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT;
+ }
+}
+
+def HasVSX : Predicate<"PPCSubTarget.hasVSX()">;
+let Predicates = [HasVSX] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
+let Uses = [RM] in {
+
+ // Load indexed instructions
+ let mayLoad = 1, canFoldAsLoad = 1 in {
+ def LXSDX : XForm_1<31, 588,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVD2X : XForm_1<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVDSX : XForm_1<31, 332,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+ // TODO: match load + splat to lxvdsx.
+
+ def LXVW4X : XForm_1<31, 780,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvw4x $XT, $src", IIC_LdStLFD,
+ [(set v4f32:$XT, (load xoaddr:$src))]>;
+ }
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def STXSDX : XX1Form<31, 716,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxsdx $XT, $dst", IIC_LdStSTFD,
+ [(store f64:$XT, xoaddr:$dst)]>;
+
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ def STXVW4X : XX1Form<31, 908,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ [(store v4f32:$XT, xoaddr:$dst)]>;
+ }
+
+ // Add/Mul Instructions
+ let isCommutable = 1 in {
+ def XSADDDP : XX3Form<60, 32,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ def XSMULDP : XX3Form<60, 48,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+
+ def XVADDDP : XX3Form<60, 96,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+
+ def XVADDSP : XX3Form<60, 64,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+
+ def XVMULDP : XX3Form<60, 112,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+
+ def XVMULSP : XX3Form<60, 80,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ }
+
+ // Subtract Instructions
+ def XSSUBDP : XX3Form<60, 40,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xssubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+
+ def XVSUBDP : XX3Form<60, 104,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ def XVSUBSP : XX3Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+
+ // FMA Instructions
+ def XSMADDADP : XX3Form<60, 33,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSMADDMDP : XX3Form<60, 41,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ // TODO: Select between these based first on whether one of the operands has
+ // no further uses. We probably want to do this after scheduling but before
+ // register allocation.
+
+ def XSMSUBADP : XX3Form<60, 49,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSMSUBMDP : XX3Form<60, 57,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XSNMADDADP : XX3Form<60, 161,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSNMADDMDP : XX3Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XSNMSUBADP : XX3Form<60, 177,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XSNMSUBMDP : XX3Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMADDADP : XX3Form<60, 97,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMADDMDP : XX3Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMADDASP : XX3Form<60, 65,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMADDMSP : XX3Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMSUBADP : XX3Form<60, 113,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMSUBMDP : XX3Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVMSUBASP : XX3Form<60, 81,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVMSUBMSP : XX3Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMADDADP : XX3Form<60, 225,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMADDMDP : XX3Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMADDASP : XX3Form<60, 193,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMADDMSP : XX3Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMSUBADP : XX3Form<60, 241,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMSUBMDP : XX3Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ def XVNMSUBASP : XX3Form<60, 209,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+ def XVNMSUBMSP : XX3Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
+
+ // Division Instructions
+ def XSDIVDP : XX3Form<60, 56,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsdivdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ def XSSQRTDP : XX2Form<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xssqrtdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsqrt f64:$XB))]>;
+
+ def XSREDP : XX2Form<60, 90,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsredp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfre f64:$XB))]>;
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ def XSTDIVDP : XX3Form_1<60, 61,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xstdivdp $crD, $XA, $XB", IIC_VecFP, []>;
+ def XSTSQRTDP : XX2Form_1<60, 106,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xstsqrtdp $crD, $XB", IIC_VecFP, []>;
+
+ def XVDIVDP : XX3Form<60, 120,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ def XVDIVSP : XX3Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+
+ def XVSQRTDP : XX2Form<60, 203,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ def XVSQRTSP : XX2Form<60, 139,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_VecFP, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_VecFP, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_VecFP, []>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_VecFP, []>;
+
+ def XVREDP : XX2Form<60, 218,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvredp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
+ def XVRESP : XX2Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvresp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
+
+ def XVRSQRTEDP : XX2Form<60, 202,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
+ def XVRSQRTESP : XX2Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
+
+ // Compare Instructions
+ def XSCMPODP : XX3Form_1<60, 43,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xscmpodp $crD, $XA, $XB", IIC_VecFPCompare, []>;
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_VecFPCompare, []>;
+
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPEQSP : XX3Form_Rcr<60, 67,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGEDP : XX3Form_Rcr<60, 115,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGESP : XX3Form_Rcr<60, 83,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTDP : XX3Form_Rcr<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTSP : XX3Form_Rcr<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+
+ // Move Instructions
+ def XSABSDP : XX2Form<60, 345,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fabs f64:$XB))]>;
+ def XSNABSDP : XX2Form<60, 361,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ def XSNEGDP : XX2Form<60, 377,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsnegdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg f64:$XB))]>;
+ def XSCPSGNDP : XX3Form<60, 176,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
+
+ def XVABSDP : XX2Form<60, 473,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fabs v2f64:$XB))]>;
+
+ def XVABSSP : XX2Form<60, 409,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fabs v4f32:$XB))]>;
+
+ def XVCPSGNDP : XX3Form<60, 240,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
+ def XVCPSGNSP : XX3Form<60, 208,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
+
+ def XVNABSDP : XX2Form<60, 489,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
+ def XVNABSSP : XX2Form<60, 425,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
+
+ def XVNEGDP : XX2Form<60, 505,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg v2f64:$XB))]>;
+ def XVNEGSP : XX2Form<60, 441,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg v4f32:$XB))]>;
+
+ // Conversion Instructions
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXDS : XX2Form<60, 344,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpsxds $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXWS : XX2Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPUXDS : XX2Form<60, 328,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpuxds $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPUXWS : XX2Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvdpuxws $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDP : XX2Form<60, 329,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvspdp $XT, $XB", IIC_VecFP, []>;
+ def XSCVSXDDP : XX2Form<60, 376,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvsxddp $XT, $XB", IIC_VecFP, []>;
+ def XSCVUXDDP : XX2Form<60, 360,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xscvuxddp $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXDS : XX2Form<60, 472,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXWS : XX2Form<60, 216,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXDS : XX2Form<60, 456,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXWS : XX2Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVSPDP : XX2Form<60, 457,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXDS : XX2Form<60, 408,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXWS : XX2Form<60, 152,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXDS : XX2Form<60, 392,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXWS : XX2Form<60, 136,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDDP : XX2Form<60, 504,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxddp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDSP : XX2Form<60, 440,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWSP : XX2Form<60, 184,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDDP : XX2Form<60, 488,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxddp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDSP : XX2Form<60, 424,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWDP : XX2Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+
+ // Rounding Instructions
+ def XSRDPI : XX2Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpi $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (frnd f64:$XB))]>;
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XSRDPIM : XX2Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpim $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ffloor f64:$XB))]>;
+ def XSRDPIP : XX2Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpip $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fceil f64:$XB))]>;
+ def XSRDPIZ : XX2Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xsrdpiz $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ftrunc f64:$XB))]>;
+
+ def XVRDPI : XX2Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpi $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRDPIM : XX2Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpim $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ def XVRDPIP : XX2Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpip $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ def XVRDPIZ : XX2Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpiz $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+
+ def XVRSPI : XX2Form<60, 137,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspi $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ def XVRSPIM : XX2Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspim $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ def XVRSPIP : XX2Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspip $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ def XVRSPIZ : XX2Form<60, 153,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspiz $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+
+ // Max/Min Instructions
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+} // Uses = [RM]
+
+ // Logical Instructions
+ def XXLAND : XX3Form<60, 130,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxland $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLANDC : XX3Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlandc $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLNOR : XX3Form<60, 162,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLOR : XX3Form<60, 146,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLXOR : XX3Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlxor $XT, $XA, $XB", IIC_VecGeneral, []>;
+
+ // Permutation Instructions
+ def XXMRGHW : XX3Form<60, 18,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
+ def XXMRGLW : XX3Form<60, 50,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
+
+ def XXPERMDI : XX3Form_2<60, 10,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ def XXSEL : XX4Form<60, 3,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
+ "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
+
+ def XXSLDWI : XX3Form_2<60, 2,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+} // neverHasSideEffects
+} // AddedComplexity
+
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (EXTRACT_SUBREG (v2f64 (COPY_TO_REGCLASS $S, VSLRC)), sub_64)>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (EXTRACT_SUBREG (v2f64 (COPY_TO_REGCLASS (XXPERMDI $S, $S, 3),
+ VSLRC)), sub_64)>;
+
+// Additional fnmsub patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+} // AddedComplexity
+} // HasVSX
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index a20b303d38..c68e922355 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -229,7 +229,11 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSHRCRegClassID:
return 32 - DefaultSafety;
+ case PPC::VSRCRegClassID:
+ return 64 - DefaultSafety;
case PPC::CRRCRegClassID:
return 8 - DefaultSafety;
}
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index f1ecda198f..339d4e4d71 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -16,6 +16,8 @@ def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
+def sub_128 : SubRegIndex<128>;
}
@@ -52,6 +54,23 @@ class VR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
+// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
+// floating-point registers.
+class VSRL<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
+// registers.
+class VSRH<VR SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 1;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_128];
+}
+
// CR - One of the 8 4-bit condition registers
class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
@@ -86,6 +105,16 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// VSX registers
+foreach Index = 0-31 in {
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ DwarfRegAlias<!cast<FPR>("F"#Index)>;
+}
+foreach Index = 0-31 in {
+ def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
+ DwarfRegAlias<!cast<VR>("V"#Index)>;
+}
+
// The reprsentation of r0 when treated as the constant 0.
def ZERO : GPR<0, "0">;
def ZERO8 : GP8<ZERO, "0">;
@@ -204,6 +233,20 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+// VSX register classes (the allocation order mirrors that of the corresponding
+// subregister classes).
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
+ (add (sequence "VSL%u", 0, 13),
+ (sequence "VSL%u", 31, 14))>;
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
+ (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
+ VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
+ VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
+ VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
+ VSH22, VSH21, VSH20)>;
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,f64,v2f64], 128,
+ (add VSLRC, VSHRC)>;
+
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 87b7c9f957..b07abe4461 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -88,6 +88,7 @@ void PPCSubtarget::initializeEnvironment() {
UseCRBits = false;
HasAltivec = false;
HasQPX = false;
+ HasVSX = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e9f8310189..87e012ece1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -178,6 +178,7 @@ public:
bool hasFPCVT() const { return HasFPCVT; }
bool hasAltivec() const { return HasAltivec; }
bool hasQPX() const { return HasQPX; }
+ bool hasVSX() const { return HasVSX; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 36d2100ac5..2449abf2ca 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -160,6 +160,8 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
+ addPass(createPPCVSXCopyPass());
+
return false;
}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 984519c079..2491a9e539 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -139,7 +139,7 @@ void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
- return 32;
+ return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
@@ -208,6 +208,14 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
+ // Double-precision scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ }
+
// Estimated cost of a load-hit-store delay. This was obtained
// experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
new file mode 100644
index 0000000000..ba53b7968c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test1(double %a, double %b) {
+entry:
+ %v = fmul double %a, %b
+ ret double %v
+
+; CHECK-LABEL: @test1
+; CHECK: xsmuldp 1, 1, 2
+; CHECK: blr
+}
+
+define double @test2(double %a, double %b) {
+entry:
+ %v = fdiv double %a, %b
+ ret double %v
+
+; CHECK-LABEL: @test2
+; CHECK: xsdivdp 1, 1, 2
+; CHECK: blr
+}
+
+define double @test3(double %a, double %b) {
+entry:
+ %v = fadd double %a, %b
+ ret double %v
+
+; CHECK-LABEL: @test3
+; CHECK: xsadddp 1, 1, 2
+; CHECK: blr
+}
+
+define <2 x double> @test4(<2 x double> %a, <2 x double> %b) {
+entry:
+ %v = fadd <2 x double> %a, %b
+ ret <2 x double> %v
+
+; FIXME: Check that the ABI for the return value is correct here!
+
+; CHECK-LABEL: @test4
+; CHECK: xvadddp {{[0-9]+}}, 34, 35
+; CHECK: blr
+}
+
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt
new file mode 100644
index 0000000000..b5e2751225
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -0,0 +1,452 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+
+# CHECK: lxsdx 7, 5, 31
+0x7c 0xe5 0xfc 0x98
+
+# CHECK: lxvd2x 7, 5, 31
+0x7c 0xe5 0xfe 0x98
+
+# CHECK: lxvdsx 7, 5, 31
+0x7c 0xe5 0xfa 0x98
+
+# CHECK: lxvw4x 7, 5, 31
+0x7c 0xe5 0xfe 0x18
+
+# CHECK: stxsdx 8, 5, 31
+0x7d 0x05 0xfd 0x98
+
+# CHECK: stxvd2x 8, 5, 31
+0x7d 0x05 0xff 0x98
+
+# CHECK: stxvw4x 8, 5, 31
+0x7d 0x05 0xff 0x18
+
+# CHECK: xsabsdp 7, 27
+0xf0 0xe0 0xdd 0x64
+
+# CHECK: xsadddp 7, 63, 27
+0xf0 0xff 0xd9 0x04
+
+# CHECK: xscmpodp 6, 63, 27
+0xf3 0x1f 0xd9 0x5c
+
+# CHECK: xscmpudp 6, 63, 27
+0xf3 0x1f 0xd9 0x1c
+
+# CHECK: xscpsgndp 7, 63, 27
+0xf0 0xff 0xdd 0x84
+
+# CHECK: xscvdpsp 7, 27
+0xf0 0xe0 0xdc 0x24
+
+# CHECK: xscvdpsxds 7, 27
+0xf0 0xe0 0xdd 0x60
+
+# CHECK: xscvdpsxws 7, 27
+0xf0 0xe0 0xd9 0x60
+
+# CHECK: xscvdpuxds 7, 27
+0xf0 0xe0 0xdd 0x20
+
+# CHECK: xscvdpuxws 7, 27
+0xf0 0xe0 0xd9 0x20
+
+# CHECK: xscvspdp 7, 27
+0xf0 0xe0 0xdd 0x24
+
+# CHECK: xscvsxddp 7, 27
+0xf0 0xe0 0xdd 0xe0
+
+# CHECK: xscvuxddp 7, 27
+0xf0 0xe0 0xdd 0xa0
+
+# CHECK: xsdivdp 7, 63, 27
+0xf0 0xff 0xd9 0xc4
+
+# CHECK: xsmaddadp 7, 63, 27
+0xf0 0xff 0xd9 0x0c
+
+# CHECK: xsmaddmdp 7, 63, 27
+0xf0 0xff 0xd9 0x4c
+
+# CHECK: xsmaxdp 7, 63, 27
+0xf0 0xff 0xdd 0x04
+
+# CHECK: xsmindp 7, 63, 27
+0xf0 0xff 0xdd 0x44
+
+# CHECK: xsmsubadp 7, 63, 27
+0xf0 0xff 0xd9 0x8c
+
+# CHECK: xsmsubmdp 7, 63, 27
+0xf0 0xff 0xd9 0xcc
+
+# CHECK: xsmuldp 7, 63, 27
+0xf0 0xff 0xd9 0x84
+
+# CHECK: xsnabsdp 7, 27
+0xf0 0xe0 0xdd 0xa4
+
+# CHECK: xsnegdp 7, 27
+0xf0 0xe0 0xdd 0xe4
+
+# CHECK: xsnmaddadp 7, 63, 27
+0xf0 0xff 0xdd 0x0c
+
+# CHECK: xsnmaddmdp 7, 63, 27
+0xf0 0xff 0xdd 0x4c
+
+# CHECK: xsnmsubadp 7, 63, 27
+0xf0 0xff 0xdd 0x8c
+
+# CHECK: xsnmsubmdp 7, 63, 27
+0xf0 0xff 0xdd 0xcc
+
+# CHECK: xsrdpi 7, 27
+0xf0 0xe0 0xd9 0x24
+
+# CHECK: xsrdpic 7, 27
+0xf0 0xe0 0xd9 0xac
+
+# CHECK: xsrdpim 7, 27
+0xf0 0xe0 0xd9 0xe4
+
+# CHECK: xsrdpip 7, 27
+0xf0 0xe0 0xd9 0xa4
+
+# CHECK: xsrdpiz 7, 27
+0xf0 0xe0 0xd9 0x64
+
+# CHECK: xsredp 7, 27
+0xf0 0xe0 0xd9 0x68
+
+# CHECK: xsrsqrtedp 7, 27
+0xf0 0xe0 0xd9 0x28
+
+# CHECK: xssqrtdp 7, 27
+0xf0 0xe0 0xd9 0x2c
+
+# CHECK: xssubdp 7, 63, 27
+0xf0 0xff 0xd9 0x44
+
+# CHECK: xstdivdp 6, 63, 27
+0xf3 0x1f 0xd9 0xec
+
+# CHECK: xstsqrtdp 6, 27
+0xf3 0x00 0xd9 0xa8
+
+# CHECK: xvabsdp 7, 27
+0xf0 0xe0 0xdf 0x64
+
+# CHECK: xvabssp 7, 27
+0xf0 0xe0 0xde 0x64
+
+# CHECK: xvadddp 7, 63, 27
+0xf0 0xff 0xdb 0x04
+
+# CHECK: xvaddsp 7, 63, 27
+0xf0 0xff 0xda 0x04
+
+# CHECK: xvcmpeqdp 7, 63, 27
+0xf0 0xff 0xdb 0x1c
+
+# CHECK: xvcmpeqdp. 7, 63, 27
+0xf0 0xff 0xdf 0x1c
+
+# CHECK: xvcmpeqsp 7, 63, 27
+0xf0 0xff 0xda 0x1c
+
+# CHECK: xvcmpeqsp. 7, 63, 27
+0xf0 0xff 0xde 0x1c
+
+# CHECK: xvcmpgedp 7, 63, 27
+0xf0 0xff 0xdb 0x9c
+
+# CHECK: xvcmpgedp. 7, 63, 27
+0xf0 0xff 0xdf 0x9c
+
+# CHECK: xvcmpgesp 7, 63, 27
+0xf0 0xff 0xda 0x9c
+
+# CHECK: xvcmpgesp. 7, 63, 27
+0xf0 0xff 0xde 0x9c
+
+# CHECK: xvcmpgtdp 7, 63, 27
+0xf0 0xff 0xdb 0x5c
+
+# CHECK: xvcmpgtdp. 7, 63, 27
+0xf0 0xff 0xdf 0x5c
+
+# CHECK: xvcmpgtsp 7, 63, 27
+0xf0 0xff 0xda 0x5c
+
+# CHECK: xvcmpgtsp. 7, 63, 27
+0xf0 0xff 0xde 0x5c
+
+# CHECK: xvcpsgndp 7, 63, 27
+0xf0 0xff 0xdf 0x84
+
+# CHECK: xvcpsgnsp 7, 63, 27
+0xf0 0xff 0xde 0x84
+
+# CHECK: xvcvdpsp 7, 27
+0xf0 0xe0 0xde 0x24
+
+# CHECK: xvcvdpsxds 7, 27
+0xf0 0xe0 0xdf 0x60
+
+# CHECK: xvcvdpsxws 7, 27
+0xf0 0xe0 0xdb 0x60
+
+# CHECK: xvcvdpuxds 7, 27
+0xf0 0xe0 0xdf 0x20
+
+# CHECK: xvcvdpuxws 7, 27
+0xf0 0xe0 0xdb 0x20
+
+# CHECK: xvcvspdp 7, 27
+0xf0 0xe0 0xdf 0x24
+
+# CHECK: xvcvspsxds 7, 27
+0xf0 0xe0 0xde 0x60
+
+# CHECK: xvcvspsxws 7, 27
+0xf0 0xe0 0xda 0x60
+
+# CHECK: xvcvspuxds 7, 27
+0xf0 0xe0 0xde 0x20
+
+# CHECK: xvcvspuxws 7, 27
+0xf0 0xe0 0xda 0x20
+
+# CHECK: xvcvsxddp 7, 27
+0xf0 0xe0 0xdf 0xe0
+
+# CHECK: xvcvsxdsp 7, 27
+0xf0 0xe0 0xde 0xe0
+
+# CHECK: xvcvsxwdp 7, 27
+0xf0 0xe0 0xdb 0xe0
+
+# CHECK: xvcvsxwsp 7, 27
+0xf0 0xe0 0xda 0xe0
+
+# CHECK: xvcvuxddp 7, 27
+0xf0 0xe0 0xdf 0xa0
+
+# CHECK: xvcvuxdsp 7, 27
+0xf0 0xe0 0xde 0xa0
+
+# CHECK: xvcvuxwdp 7, 27
+0xf0 0xe0 0xdb 0xa0
+
+# CHECK: xvcvuxwsp 7, 27
+0xf0 0xe0 0xda 0xa0
+
+# CHECK: xvdivdp 7, 63, 27
+0xf0 0xff 0xdb 0xc4
+
+# CHECK: xvdivsp 7, 63, 27
+0xf0 0xff 0xda 0xc4
+
+# CHECK: xvmaddadp 7, 63, 27
+0xf0 0xff 0xdb 0x0c
+
+# CHECK: xvmaddasp 7, 63, 27
+0xf0 0xff 0xda 0x0c
+
+# CHECK: xvmaddmdp 7, 63, 27
+0xf0 0xff 0xdb 0x4c
+
+# CHECK: xvmaddmsp 7, 63, 27
+0xf0 0xff 0xda 0x4c
+
+# CHECK: xvmaxdp 7, 63, 27
+0xf0 0xff 0xdf 0x04
+
+# CHECK: xvmaxsp 7, 63, 27
+0xf0 0xff 0xde 0x04
+
+# CHECK: xvmindp 7, 63, 27
+0xf0 0xff 0xdf 0x44
+
+# CHECK: xvminsp 7, 63, 27
+0xf0 0xff 0xde 0x44
+
+# FIXME: decode as xvmovdp 7, 63
+# CHECK: xvcpsgndp 7, 63, 63
+0xf0 0xff 0xff 0x86
+
+# FIXME: decode as xvmovsp 7, 63
+# CHECK: xvcpsgnsp 7, 63, 63
+0xf0 0xff 0xfe 0x86
+
+# CHECK: xvmsubadp 7, 63, 27
+0xf0 0xff 0xdb 0x8c
+
+# CHECK: xvmsubasp 7, 63, 27
+0xf0 0xff 0xda 0x8c
+
+# CHECK: xvmsubmdp 7, 63, 27
+0xf0 0xff 0xdb 0xcc
+
+# CHECK: xvmsubmsp 7, 63, 27
+0xf0 0xff 0xda 0xcc
+
+# CHECK: xvmuldp 7, 63, 27
+0xf0 0xff 0xdb 0x84
+
+# CHECK: xvmulsp 7, 63, 27
+0xf0 0xff 0xda 0x84
+
+# CHECK: xvnabsdp 7, 27
+0xf0 0xe0 0xdf 0xa4
+
+# CHECK: xvnabssp 7, 27
+0xf0 0xe0 0xde 0xa4
+
+# CHECK: xvnegdp 7, 27
+0xf0 0xe0 0xdf 0xe4
+
+# CHECK: xvnegsp 7, 27
+0xf0 0xe0 0xde 0xe4
+
+# CHECK: xvnmaddadp 7, 63, 27
+0xf0 0xff 0xdf 0x0c
+
+# CHECK: xvnmaddasp 7, 63, 27
+0xf0 0xff 0xde 0x0c
+
+# CHECK: xvnmaddmdp 7, 63, 27
+0xf0 0xff 0xdf 0x4c
+
+# CHECK: xvnmaddmsp 7, 63, 27
+0xf0 0xff 0xde 0x4c
+
+# CHECK: xvnmsubadp 7, 63, 27
+0xf0 0xff 0xdf 0x8c
+
+# CHECK: xvnmsubasp 7, 63, 27
+0xf0 0xff 0xde 0x8c
+
+# CHECK: xvnmsubmdp 7, 63, 27
+0xf0 0xff 0xdf 0xcc
+
+# CHECK: xvnmsubmsp 7, 63, 27
+0xf0 0xff 0xde 0xcc
+
+# CHECK: xvrdpi 7, 27
+0xf0 0xe0 0xdb 0x24
+
+# CHECK: xvrdpic 7, 27
+0xf0 0xe0 0xdb 0xac
+
+# CHECK: xvrdpim 7, 27
+0xf0 0xe0 0xdb 0xe4
+
+# CHECK: xvrdpip 7, 27
+0xf0 0xe0 0xdb 0xa4
+
+# CHECK: xvrdpiz 7, 27
+0xf0 0xe0 0xdb 0x64
+
+# CHECK: xvredp 7, 27
+0xf0 0xe0 0xdb 0x68
+
+# CHECK: xvresp 7, 27
+0xf0 0xe0 0xda 0x68
+
+# CHECK: xvrspi 7, 27
+0xf0 0xe0 0xda 0x24
+
+# CHECK: xvrspic 7, 27
+0xf0 0xe0 0xda 0xac
+
+# CHECK: xvrspim 7, 27
+0xf0 0xe0 0xda 0xe4
+
+# CHECK: xvrspip 7, 27
+0xf0 0xe0 0xda 0xa4
+
+# CHECK: xvrspiz 7, 27
+0xf0 0xe0 0xda 0x64
+
+# CHECK: xvrsqrtedp 7, 27
+0xf0 0xe0 0xdb 0x28
+
+# CHECK: xvrsqrtesp 7, 27
+0xf0 0xe0 0xda 0x28
+
+# CHECK: xvsqrtdp 7, 27
+0xf0 0xe0 0xdb 0x2c
+
+# CHECK: xvsqrtsp 7, 27
+0xf0 0xe0 0xda 0x2c
+
+# CHECK: xvsubdp 7, 63, 27
+0xf0 0xff 0xdb 0x44
+
+# CHECK: xvsubsp 7, 63, 27
+0xf0 0xff 0xda 0x44
+
+# CHECK: xvtdivdp 6, 63, 27
+0xf3 0x1f 0xdb 0xec
+
+# CHECK: xvtdivsp 6, 63, 27
+0xf3 0x1f 0xda 0xec
+
+# CHECK: xvtsqrtdp 6, 27
+0xf3 0x00 0xdb 0xa8
+
+# CHECK: xvtsqrtsp 6, 27
+0xf3 0x00 0xda 0xa8
+
+# CHECK: xxland 7, 63, 27
+0xf0 0xff 0xdc 0x14
+
+# CHECK: xxlandc 7, 63, 27
+0xf0 0xff 0xdc 0x54
+
+# CHECK: xxlnor 7, 63, 27
+0xf0 0xff 0xdd 0x14
+
+# CHECK: xxlor 7, 63, 27
+0xf0 0xff 0xdc 0x94
+
+# CHECK: xxlxor 7, 63, 27
+0xf0 0xff 0xdc 0xd4
+
+# FIXME: decode as xxmrghd 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 0
+0xf0 0xff 0xd8 0x54
+
+# CHECK: xxmrghw 7, 63, 27
+0xf0 0xff 0xd8 0x94
+
+# FIXME: decode as xxmrgld 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 3
+0xf0 0xff 0xdb 0x54
+
+# CHECK: xxmrglw 7, 63, 27
+0xf0 0xff 0xd9 0x94
+
+# CHECK: xxpermdi 7, 63, 27, 2
+0xf0 0xff 0xda 0x54
+
+# CHECK: xxsel 7, 63, 27, 14
+0xf0 0xff 0xdb 0xb4
+
+# CHECK: xxsldwi 7, 63, 27, 1
+0xf0 0xff 0xd9 0x14
+
+# FIXME: decode as xxspltd 7, 63, 1
+# CHECK: xxpermdi 7, 63, 63, 3
+0xf0 0xff 0xfb 0x56
+
+# CHECK: xxspltw 7, 27, 3
+0xf0 0xe3 0xda 0x90
+
+# FIXME: decode as xxswapd 7, 63
+# CHECK: xxpermdi 7, 63, 63, 2
+0xf0 0xff 0xfa 0x56
+
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
new file mode 100644
index 0000000000..394d1881ae
--- /dev/null
+++ b/test/MC/PowerPC/vsx.s
@@ -0,0 +1,298 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | FileCheck %s
+
+# CHECK: lxsdx 7, 5, 31 # encoding: [0x7c,0xe5,0xfc,0x98]
+ lxsdx 7, 5, 31
+# CHECK: lxvd2x 7, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x98]
+ lxvd2x 7, 5, 31
+# CHECK: lxvdsx 7, 5, 31 # encoding: [0x7c,0xe5,0xfa,0x98]
+ lxvdsx 7, 5, 31
+# CHECK: lxvw4x 7, 5, 31 # encoding: [0x7c,0xe5,0xfe,0x18]
+ lxvw4x 7, 5, 31
+# CHECK: stxsdx 8, 5, 31 # encoding: [0x7d,0x05,0xfd,0x98]
+ stxsdx 8, 5, 31
+# CHECK: stxvd2x 8, 5, 31 # encoding: [0x7d,0x05,0xff,0x98]
+ stxvd2x 8, 5, 31
+# CHECK: stxvw4x 8, 5, 31 # encoding: [0x7d,0x05,0xff,0x18]
+ stxvw4x 8, 5, 31
+# CHECK: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64]
+ xsabsdp 7, 27
+# CHECK: xsadddp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x04]
+ xsadddp 7, 63, 27
+# CHECK: xscmpodp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0x5c]
+ xscmpodp 6, 63, 27
+# CHECK: xscmpudp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0x1c]
+ xscmpudp 6, 63, 27
+# CHECK: xscpsgndp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x84]
+ xscpsgndp 7, 63, 27
+# CHECK: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24]
+ xscvdpsp 7, 27
+# CHECK: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60]
+ xscvdpsxds 7, 27
+# CHECK: xscvdpsxws 7, 27 # encoding: [0xf0,0xe0,0xd9,0x60]
+ xscvdpsxws 7, 27
+# CHECK: xscvdpuxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x20]
+ xscvdpuxds 7, 27
+# CHECK: xscvdpuxws 7, 27 # encoding: [0xf0,0xe0,0xd9,0x20]
+ xscvdpuxws 7, 27
+# CHECK: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24]
+ xscvspdp 7, 27
+# CHECK: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0]
+ xscvsxddp 7, 27
+# CHECK: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
+ xscvuxddp 7, 27
+# CHECK: xsdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xc4]
+ xsdivdp 7, 63, 27
+# CHECK: xsmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x0c]
+ xsmaddadp 7, 63, 27
+# CHECK: xsmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x4c]
+ xsmaddmdp 7, 63, 27
+# CHECK: xsmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x04]
+ xsmaxdp 7, 63, 27
+# CHECK: xsmindp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x44]
+ xsmindp 7, 63, 27
+# CHECK: xsmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x8c]
+ xsmsubadp 7, 63, 27
+# CHECK: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc]
+ xsmsubmdp 7, 63, 27
+# CHECK: xsmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x84]
+ xsmuldp 7, 63, 27
+# CHECK: xsnabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa4]
+ xsnabsdp 7, 27
+# CHECK: xsnegdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe4]
+ xsnegdp 7, 27
+# CHECK: xsnmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x0c]
+ xsnmaddadp 7, 63, 27
+# CHECK: xsnmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x4c]
+ xsnmaddmdp 7, 63, 27
+# CHECK: xsnmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x8c]
+ xsnmsubadp 7, 63, 27
+# CHECK: xsnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0xcc]
+ xsnmsubmdp 7, 63, 27
+# CHECK: xsrdpi 7, 27 # encoding: [0xf0,0xe0,0xd9,0x24]
+ xsrdpi 7, 27
+# CHECK: xsrdpic 7, 27 # encoding: [0xf0,0xe0,0xd9,0xac]
+ xsrdpic 7, 27
+# CHECK: xsrdpim 7, 27 # encoding: [0xf0,0xe0,0xd9,0xe4]
+ xsrdpim 7, 27
+# CHECK: xsrdpip 7, 27 # encoding: [0xf0,0xe0,0xd9,0xa4]
+ xsrdpip 7, 27
+# CHECK: xsrdpiz 7, 27 # encoding: [0xf0,0xe0,0xd9,0x64]
+ xsrdpiz 7, 27
+# CHECK: xsredp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x68]
+ xsredp 7, 27
+# CHECK: xsrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x28]
+ xsrsqrtedp 7, 27
+# CHECK: xssqrtdp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x2c]
+ xssqrtdp 7, 27
+# CHECK: xssubdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x44]
+ xssubdp 7, 63, 27
+# CHECK: xstdivdp 6, 63, 27 # encoding: [0xf3,0x1f,0xd9,0xec]
+ xstdivdp 6, 63, 27
+# CHECK: xstsqrtdp 6, 27 # encoding: [0xf3,0x00,0xd9,0xa8]
+ xstsqrtdp 6, 27
+# CHECK: xvabsdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0x64]
+ xvabsdp 7, 27
+# CHECK: xvabssp 7, 27 # encoding: [0xf0,0xe0,0xde,0x64]
+ xvabssp 7, 27
+# CHECK: xvadddp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x04]
+ xvadddp 7, 63, 27
+# CHECK: xvaddsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x04]
+ xvaddsp 7, 63, 27
+# CHECK: xvcmpeqdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x1c]
+ xvcmpeqdp 7, 63, 27
+# CHECK: xvcmpeqdp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x1c]
+ xvcmpeqdp. 7, 63, 27
+# CHECK: xvcmpeqsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x1c]
+ xvcmpeqsp 7, 63, 27
+# CHECK: xvcmpeqsp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x1c]
+ xvcmpeqsp. 7, 63, 27
+# CHECK: xvcmpgedp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x9c]
+ xvcmpgedp 7, 63, 27
+# CHECK: xvcmpgedp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x9c]
+ xvcmpgedp. 7, 63, 27
+# CHECK: xvcmpgesp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x9c]
+ xvcmpgesp 7, 63, 27
+# CHECK: xvcmpgesp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x9c]
+ xvcmpgesp. 7, 63, 27
+# CHECK: xvcmpgtdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x5c]
+ xvcmpgtdp 7, 63, 27
+# CHECK: xvcmpgtdp. 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x5c]
+ xvcmpgtdp. 7, 63, 27
+# CHECK: xvcmpgtsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x5c]
+ xvcmpgtsp 7, 63, 27
+# CHECK: xvcmpgtsp. 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x5c]
+ xvcmpgtsp. 7, 63, 27
+# CHECK: xvcpsgndp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x84]
+ xvcpsgndp 7, 63, 27
+# CHECK: xvcpsgnsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x84]
+ xvcpsgnsp 7, 63, 27
+# CHECK: xvcvdpsp 7, 27 # encoding: [0xf0,0xe0,0xde,0x24]
+ xvcvdpsp 7, 27
+# CHECK: xvcvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdf,0x60]
+ xvcvdpsxds 7, 27
+# CHECK: xvcvdpsxws 7, 27 # encoding: [0xf0,0xe0,0xdb,0x60]
+ xvcvdpsxws 7, 27
+# CHECK: xvcvdpuxds 7, 27 # encoding: [0xf0,0xe0,0xdf,0x20]
+ xvcvdpuxds 7, 27
+# CHECK: xvcvdpuxws 7, 27 # encoding: [0xf0,0xe0,0xdb,0x20]
+ xvcvdpuxws 7, 27
+# CHECK: xvcvspdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0x24]
+ xvcvspdp 7, 27
+# CHECK: xvcvspsxds 7, 27 # encoding: [0xf0,0xe0,0xde,0x60]
+ xvcvspsxds 7, 27
+# CHECK: xvcvspsxws 7, 27 # encoding: [0xf0,0xe0,0xda,0x60]
+ xvcvspsxws 7, 27
+# CHECK: xvcvspuxds 7, 27 # encoding: [0xf0,0xe0,0xde,0x20]
+ xvcvspuxds 7, 27
+# CHECK: xvcvspuxws 7, 27 # encoding: [0xf0,0xe0,0xda,0x20]
+ xvcvspuxws 7, 27
+# CHECK: xvcvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xe0]
+ xvcvsxddp 7, 27
+# CHECK: xvcvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xe0]
+ xvcvsxdsp 7, 27
+# CHECK: xvcvsxwdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0xe0]
+ xvcvsxwdp 7, 27
+# CHECK: xvcvsxwsp 7, 27 # encoding: [0xf0,0xe0,0xda,0xe0]
+ xvcvsxwsp 7, 27
+# CHECK: xvcvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xa0]
+ xvcvuxddp 7, 27
+# CHECK: xvcvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xa0]
+ xvcvuxdsp 7, 27
+# CHECK: xvcvuxwdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0xa0]
+ xvcvuxwdp 7, 27
+# CHECK: xvcvuxwsp 7, 27 # encoding: [0xf0,0xe0,0xda,0xa0]
+ xvcvuxwsp 7, 27
+# CHECK: xvdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0xc4]
+ xvdivdp 7, 63, 27
+# CHECK: xvdivsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0xc4]
+ xvdivsp 7, 63, 27
+# CHECK: xvmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x0c]
+ xvmaddadp 7, 63, 27
+# CHECK: xvmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x0c]
+ xvmaddasp 7, 63, 27
+# CHECK: xvmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x4c]
+ xvmaddmdp 7, 63, 27
+# CHECK: xvmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x4c]
+ xvmaddmsp 7, 63, 27
+# CHECK: xvmaxdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x04]
+ xvmaxdp 7, 63, 27
+# CHECK: xvmaxsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x04]
+ xvmaxsp 7, 63, 27
+# CHECK: xvmindp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x44]
+ xvmindp 7, 63, 27
+# CHECK: xvminsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x44]
+ xvminsp 7, 63, 27
+# CHECK: xvcpsgndp 7, 63, 63 # encoding: [0xf0,0xff,0xff,0x86]
+ xvmovdp 7, 63
+# CHECK: xvcpsgnsp 7, 63, 63 # encoding: [0xf0,0xff,0xfe,0x86]
+ xvmovsp 7, 63
+# CHECK: xvmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x8c]
+ xvmsubadp 7, 63, 27
+# CHECK: xvmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x8c]
+ xvmsubasp 7, 63, 27
+# CHECK: xvmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0xcc]
+ xvmsubmdp 7, 63, 27
+# CHECK: xvmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0xcc]
+ xvmsubmsp 7, 63, 27
+# CHECK: xvmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x84]
+ xvmuldp 7, 63, 27
+# CHECK: xvmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x84]
+ xvmulsp 7, 63, 27
+# CHECK: xvnabsdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xa4]
+ xvnabsdp 7, 27
+# CHECK: xvnabssp 7, 27 # encoding: [0xf0,0xe0,0xde,0xa4]
+ xvnabssp 7, 27
+# CHECK: xvnegdp 7, 27 # encoding: [0xf0,0xe0,0xdf,0xe4]
+ xvnegdp 7, 27
+# CHECK: xvnegsp 7, 27 # encoding: [0xf0,0xe0,0xde,0xe4]
+ xvnegsp 7, 27
+# CHECK: xvnmaddadp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x0c]
+ xvnmaddadp 7, 63, 27
+# CHECK: xvnmaddasp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x0c]
+ xvnmaddasp 7, 63, 27
+# CHECK: xvnmaddmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x4c]
+ xvnmaddmdp 7, 63, 27
+# CHECK: xvnmaddmsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x4c]
+ xvnmaddmsp 7, 63, 27
+# CHECK: xvnmsubadp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0x8c]
+ xvnmsubadp 7, 63, 27
+# CHECK: xvnmsubasp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0x8c]
+ xvnmsubasp 7, 63, 27
+# CHECK: xvnmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xdf,0xcc]
+ xvnmsubmdp 7, 63, 27
+# CHECK: xvnmsubmsp 7, 63, 27 # encoding: [0xf0,0xff,0xde,0xcc]
+ xvnmsubmsp 7, 63, 27
+# CHECK: xvrdpi 7, 27 # encoding: [0xf0,0xe0,0xdb,0x24]
+ xvrdpi 7, 27
+# CHECK: xvrdpic 7, 27 # encoding: [0xf0,0xe0,0xdb,0xac]
+ xvrdpic 7, 27
+# CHECK: xvrdpim 7, 27 # encoding: [0xf0,0xe0,0xdb,0xe4]
+ xvrdpim 7, 27
+# CHECK: xvrdpip 7, 27 # encoding: [0xf0,0xe0,0xdb,0xa4]
+ xvrdpip 7, 27
+# CHECK: xvrdpiz 7, 27 # encoding: [0xf0,0xe0,0xdb,0x64]
+ xvrdpiz 7, 27
+# CHECK: xvredp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x68]
+ xvredp 7, 27
+# CHECK: xvresp 7, 27 # encoding: [0xf0,0xe0,0xda,0x68]
+ xvresp 7, 27
+# CHECK: xvrspi 7, 27 # encoding: [0xf0,0xe0,0xda,0x24]
+ xvrspi 7, 27
+# CHECK: xvrspic 7, 27 # encoding: [0xf0,0xe0,0xda,0xac]
+ xvrspic 7, 27
+# CHECK: xvrspim 7, 27 # encoding: [0xf0,0xe0,0xda,0xe4]
+ xvrspim 7, 27
+# CHECK: xvrspip 7, 27 # encoding: [0xf0,0xe0,0xda,0xa4]
+ xvrspip 7, 27
+# CHECK: xvrspiz 7, 27 # encoding: [0xf0,0xe0,0xda,0x64]
+ xvrspiz 7, 27
+# CHECK: xvrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x28]
+ xvrsqrtedp 7, 27
+# CHECK: xvrsqrtesp 7, 27 # encoding: [0xf0,0xe0,0xda,0x28]
+ xvrsqrtesp 7, 27
+# CHECK: xvsqrtdp 7, 27 # encoding: [0xf0,0xe0,0xdb,0x2c]
+ xvsqrtdp 7, 27
+# CHECK: xvsqrtsp 7, 27 # encoding: [0xf0,0xe0,0xda,0x2c]
+ xvsqrtsp 7, 27
+# CHECK: xvsubdp 7, 63, 27 # encoding: [0xf0,0xff,0xdb,0x44]
+ xvsubdp 7, 63, 27
+# CHECK: xvsubsp 7, 63, 27 # encoding: [0xf0,0xff,0xda,0x44]
+ xvsubsp 7, 63, 27
+# CHECK: xvtdivdp 6, 63, 27 # encoding: [0xf3,0x1f,0xdb,0xec]
+ xvtdivdp 6, 63, 27
+# CHECK: xvtdivsp 6, 63, 27 # encoding: [0xf3,0x1f,0xda,0xec]
+ xvtdivsp 6, 63, 27
+# CHECK: xvtsqrtdp 6, 27 # encoding: [0xf3,0x00,0xdb,0xa8]
+ xvtsqrtdp 6, 27
+# CHECK: xvtsqrtsp 6, 27 # encoding: [0xf3,0x00,0xda,0xa8]
+ xvtsqrtsp 6, 27
+# CHECK: xxland 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x14]
+ xxland 7, 63, 27
+# CHECK: xxlandc 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x54]
+ xxlandc 7, 63, 27
+# CHECK: xxlnor 7, 63, 27 # encoding: [0xf0,0xff,0xdd,0x14]
+ xxlnor 7, 63, 27
+# CHECK: xxlor 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0x94]
+ xxlor 7, 63, 27
+# CHECK: xxlxor 7, 63, 27 # encoding: [0xf0,0xff,0xdc,0xd4]
+ xxlxor 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 0 # encoding: [0xf0,0xff,0xd8,0x54]
+ xxmrghd 7, 63, 27
+# CHECK: xxmrghw 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x94]
+ xxmrghw 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 3 # encoding: [0xf0,0xff,0xdb,0x54]
+ xxmrgld 7, 63, 27
+# CHECK: xxmrglw 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x94]
+ xxmrglw 7, 63, 27
+# CHECK: xxpermdi 7, 63, 27, 2 # encoding: [0xf0,0xff,0xda,0x54]
+ xxpermdi 7, 63, 27, 2
+# CHECK: xxsel 7, 63, 27, 14 # encoding: [0xf0,0xff,0xdb,0xb4]
+ xxsel 7, 63, 27, 14
+# CHECK: xxsldwi 7, 63, 27, 1 # encoding: [0xf0,0xff,0xd9,0x14]
+ xxsldwi 7, 63, 27, 1
+# CHECK: xxpermdi 7, 63, 63, 3 # encoding: [0xf0,0xff,0xfb,0x56]
+ xxspltd 7, 63, 1
+# CHECK: xxspltw 7, 27, 3 # encoding: [0xf0,0xe3,0xda,0x90]
+ xxspltw 7, 27, 3
+# CHECK: xxpermdi 7, 63, 63, 2 # encoding: [0xf0,0xff,0xfa,0x56]
+ xxswapd 7, 63