summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorScott Michel <scottm@aero.org>2008-12-27 04:51:36 +0000
committerScott Michel <scottm@aero.org>2008-12-27 04:51:36 +0000
commitf0569be4a948c7ed816bfa2b8774a5a18458ee23 (patch)
tree541905fcbd5e64ef95599b1ca3c4182adc972688 /lib
parent1323e8bf6a7bec163c5d43006f5b3b78042cef61 (diff)
downloadllvm-f0569be4a948c7ed816bfa2b8774a5a18458ee23.tar.gz
llvm-f0569be4a948c7ed816bfa2b8774a5a18458ee23.tar.bz2
llvm-f0569be4a948c7ed816bfa2b8774a5a18458ee23.tar.xz
- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not needed. Consequently, the CellSPU backend would produce correct, but _really slow and horrible_, code. Replaced with instruction sequences that do the equivalent truncation in SPUInstrInfo.td. - Re-examine how unaligned loads and stores work. Generated unaligned load code has been tested on the CellSPU hardware; see the i32operations.c and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be toy test code, it does prove that some real world code does compile correctly.) - Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc fault because i64 ult is not yet implemented.) - Added i64 eq and neq for setcc and select/setcc; started new instruction information file for them in SPU64InstrInfo.td. Additional i64 operations should be added to this file and not to SPUInstrInfo.td. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp8
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td77
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp66
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp797
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td5
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp55
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td726
-rw-r--r--lib/Target/CellSPU/SPUNodes.td14
-rw-r--r--lib/Target/CellSPU/SPUOperands.td10
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp5
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.cpp7
12 files changed, 1148 insertions, 628 deletions
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 589a260005..98aa084d50 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -117,7 +117,7 @@ namespace {
}
void
- printMemRegImmS7(const MachineInstr *MI, unsigned OpNo)
+ printShufAddr(const MachineInstr *MI, unsigned OpNo)
{
char value = MI->getOperand(OpNo).getImm();
O << (int) value;
@@ -183,16 +183,16 @@ namespace {
}
void
- printMemRegImmS10(const MachineInstr *MI, unsigned OpNo)
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo)
{
const MachineOperand &MO = MI->getOperand(OpNo);
assert(MO.isImm() &&
- "printMemRegImmS10 first operand is not immedate");
+ "printDFormAddr first operand is not immedate");
int64_t value = int64_t(MI->getOperand(OpNo).getImm());
int16_t value16 = int16_t(value);
assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
&& "Invalid dform s10 offset argument");
- O << value16 << "(";
+ O << (value16 & ~0xf) << "(";
printOperand(MI, OpNo+1);
O << ")";
}
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 0000000000..6d679bac72
--- /dev/null
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,77 @@
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask
+// (TODO)
+//
+// M00$E Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+class I64SELECTNegCond<PatFrag cond, CodeFrag cmpare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 cmpare.Fragment))>;
+
+class I64SETCCNegCond<PatFrag cond, CodeFrag cmpare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 cmpare.Fragment, -1)>;
+
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB))),
+ 0x0000000c)>;
+
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)),
+ 0x0000000f)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
+ def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64EQv2i64.Fragment>;
+
+def I64Select:
+ Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
+ (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
+
+def : I64SETCCNegCond<setne, I64EQr64>;
+
+def : I64SELECTNegCond<setne, I64EQr64>; \ No newline at end of file
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9ac0e2e256..f51aba2fda 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -165,24 +165,23 @@ namespace {
MVT VT;
unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
bool ldresult_imm; /// LDRESULT instruction requires immediate?
- int prefslot_byte; /// Byte offset of the "preferred" slot
+ unsigned lrinst; /// LR instruction
};
const valtype_map_s valtype_map[] = {
- { MVT::i1, 0, false, 3 },
- { MVT::i8, SPU::ORBIr8, true, 3 },
- { MVT::i16, SPU::ORHIr16, true, 2 },
- { MVT::i32, SPU::ORIr32, true, 0 },
- { MVT::i64, SPU::ORr64, false, 0 },
- { MVT::f32, SPU::ORf32, false, 0 },
- { MVT::f64, SPU::ORf64, false, 0 },
+ { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
+ { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
+ { MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
+ { MVT::i64, SPU::ORr64, false, SPU::LRr64 },
+ { MVT::f32, SPU::ORf32, false, SPU::LRf32 },
+ { MVT::f64, SPU::ORf64, false, SPU::LRf64 },
// vector types... (sigh!)
- { MVT::v16i8, 0, false, 0 },
- { MVT::v8i16, 0, false, 0 },
- { MVT::v4i32, 0, false, 0 },
- { MVT::v2i64, 0, false, 0 },
- { MVT::v4f32, 0, false, 0 },
- { MVT::v2f64, 0, false, 0 }
+ { MVT::v16i8, 0, false, SPU::LRv16i8 },
+ { MVT::v8i16, 0, false, SPU::LRv8i16 },
+ { MVT::v4i32, 0, false, SPU::LRv4i32 },
+ { MVT::v2i64, 0, false, SPU::LRv2i64 },
+ { MVT::v4f32, 0, false, SPU::LRv4f32 },
+ { MVT::v2f64, 0, false, SPU::LRv2f64 }
};
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
@@ -686,31 +685,32 @@ SPUDAGToDAGISel::Select(SDValue Op) {
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain);
}
- Chain = SDValue(Result, 1);
-
return Result;
} else if (Opc == SPUISD::IndirectAddr) {
- SDValue Op0 = Op.getOperand(0);
- if (Op0.getOpcode() == SPUISD::LDRESULT) {
- /* || Op0.getOpcode() == SPUISD::AFormAddr) */
- // (IndirectAddr (LDRESULT, imm))
- SDValue Op1 = Op.getOperand(1);
- MVT VT = Op.getValueType();
-
- DEBUG(cerr << "CellSPU: IndirectAddr(LDRESULT, imm):\nOp0 = ");
- DEBUG(Op.getOperand(0).getNode()->dump(CurDAG));
- DEBUG(cerr << "\nOp1 = ");
- DEBUG(Op.getOperand(1).getNode()->dump(CurDAG));
- DEBUG(cerr << "\n");
-
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ MVT VT = Op.getValueType();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- Op1 = CurDAG->getTargetConstant(CN->getZExtValue(), VT);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32);
- Ops[0] = Op0;
- Ops[1] = Op1;
- n_ops = 2;
}
+ Ops[0] = Op0;
+ Ops[1] = Op1;
+ n_ops = 2;
}
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index c3c31e0f47..e975d0d039 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
#include "SPUFrameInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -77,37 +78,6 @@ namespace {
return retval;
}
-
- //! Predicate that returns true if operand is a memory target
- /*!
- \arg Op Operand to test
- \return true if the operand is a memory target (i.e., global
- address, external symbol, constant pool) or an A-form
- address.
- */
- bool isMemoryOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::GlobalAddress
- || Opc == ISD::GlobalTLSAddress
- || Opc == ISD::JumpTable
- || Opc == ISD::ConstantPool
- || Opc == ISD::ExternalSymbol
- || Opc == ISD::TargetGlobalAddress
- || Opc == ISD::TargetGlobalTLSAddress
- || Opc == ISD::TargetJumpTable
- || Opc == ISD::TargetConstantPool
- || Opc == ISD::TargetExternalSymbol
- || Opc == SPUISD::AFormAddr);
- }
-
- //! Predicate that returns true if the operand is an indirect target
- bool isIndirectOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::Register
- || Opc == SPUISD::LDRESULT);
- }
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -135,20 +105,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setTruncStoreAction(MVT::i8, MVT::i8, Custom);
- setTruncStoreAction(MVT::i16, MVT::i8, Custom);
- setTruncStoreAction(MVT::i32, MVT::i8, Custom);
- setTruncStoreAction(MVT::i64, MVT::i8, Custom);
- setTruncStoreAction(MVT::i128, MVT::i8, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
// SPU constant load actions are custom lowered:
setOperationAction(ISD::Constant, MVT::i64, Custom);
@@ -160,11 +118,33 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
}
- // Custom lower BRCOND for i8 to "promote" the result to i16
+ // Custom lower BRCOND for i8 to "promote" the result to whatever the result
+ // operand happens to be:
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
// Expand the jumptable branches
@@ -176,14 +156,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-#if 0
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-#endif
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- // PowerPC has no SREM/UREM instructions
+ // SPU has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
@@ -232,14 +210,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
- // SMUL_LOHI, UMUL_LOHI
-#if 0
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-#endif
-
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
@@ -265,12 +235,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
// Zero extension and sign extension for i64 have to be
// custom legalized
@@ -278,10 +248,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
- // Custom lower truncates
- setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
+ // Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
// SPU has a legal FP -> signed INT instruction
@@ -292,7 +259,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// FDIV on SPU requires custom lowering
setOperationAction(ISD::FDIV, MVT::f32, Custom);
- //setOperationAction(ISD::FDIV, MVT::f64, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
// SPU has [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
@@ -402,7 +369,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setShiftAmountType(MVT::i32);
- setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -435,7 +402,7 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
@@ -471,9 +438,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
return ((i != node_names.end()) ? i->second : 0);
}
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
MVT VT = Op.getValueType();
- return (VT.isInteger() ? VT : MVT(MVT::i32));
+ // i16 and i32 are valid SETCC result types
+ return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
}
//===----------------------------------------------------------------------===//
@@ -486,105 +458,6 @@ MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
// LowerOperation implementation
//===----------------------------------------------------------------------===//
-/// Aligned load common code for CellSPU
-/*!
- \param[in] Op The SelectionDAG load or store operand
- \param[in] DAG The selection DAG
- \param[in] ST CellSPU subtarget information structure
- \param[in,out] alignment Caller initializes this to the load or store node's
- value from getAlignment(), may be updated while generating the aligned load
- \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
- offset (divisible by 16, modulo 16 == 0)
- \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
- offset of the preferred slot (modulo 16 != 0)
- \param[in,out] VT Caller initializes this value type to the the load or store
- node's loaded or stored value type; may be updated if an i1-extended load or
- store.
- \param[out] was16aligned true if the base pointer had 16-byte alignment,
- otherwise false. Can help to determine if the chunk needs to be rotated.
-
- Both load and store lowering load a block of data aligned on a 16-byte
- boundary. This is the common aligned load code shared between both.
- */
-static SDValue
-AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
- LSBaseSDNode *LSN,
- unsigned &alignment, int &alignOffs, int &prefSlotOffs,
- MVT &VT, bool &was16aligned)
-{
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
- SDValue basePtr = LSN->getBasePtr();
- SDValue chain = LSN->getChain();
-
- if (basePtr.getOpcode() == ISD::ADD) {
- SDValue Op1 = basePtr.getNode()->getOperand(1);
-
- if (Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant) {
- const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
-
- alignOffs = (int) CN->getZExtValue();
- prefSlotOffs = (int) (alignOffs & 0xf);
-
- // Adjust the rotation amount to ensure that the final result ends up in
- // the preferred slot:
- prefSlotOffs -= vtm->prefslot_byte;
- basePtr = basePtr.getOperand(0);
-
- // Loading from memory, can we adjust alignment?
- if (basePtr.getOpcode() == SPUISD::AFormAddr) {
- SDValue APtr = basePtr.getOperand(0);
- if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
- alignment = GSDN->getGlobal()->getAlignment();
- }
- }
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
- } else if (basePtr.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
- alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
- prefSlotOffs = (int) (alignOffs & 0xf);
- prefSlotOffs -= vtm->prefslot_byte;
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
-
- if (alignment == 16) {
- // Realign the base pointer as a D-Form address:
- if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
- basePtr = DAG.getNode(ISD::ADD, PtrVT,
- basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- }
-
- // Emit the vector load:
- was16aligned = true;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
- }
-
- // Unaligned load or we're using the "large memory" model, which means that
- // we have to be very pessimistic:
- if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Add the offset
- basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- was16aligned = false;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
-}
-
/// Custom lower loads for CellSPU
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
@@ -605,43 +478,110 @@ static SDValue
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
SDValue the_chain = LN->getChain();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
MVT InVT = LN->getMemoryVT();
MVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- SDValue Ops[8];
+ const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
switch (LN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int offset, rotamt;
- bool was16aligned;
- SDValue result =
- AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
- was16aligned);
-
- if (result.getNode() == 0)
- return result;
-
- the_chain = result.getValue(1);
- // Rotate the chunk if necessary
- if (rotamt < 0)
- rotamt += 16;
- if (rotamt != 0 || !was16aligned) {
- SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
-
- Ops[0] = result;
- if (was16aligned) {
- Ops[1] = DAG.getConstant(rotamt, MVT::i16);
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
} else {
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- LoadSDNode *LN1 = cast<LoadSDNode>(result);
- Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
DAG.getConstant(rotamt, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
+ DAG.getConstant(-vtm->prefslot_byte, PtrVT));
}
+ // Re-emit as a v16i8 vector load
+ result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = result.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
+ result.getValue(0), rotate);
+
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
@@ -704,23 +644,86 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int chunk_offset, slot_offset;
- bool was16aligned;
-
// The vector type we really want to load from the 16-byte chunk.
MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
- SDValue alignLoadVec =
- AlignedLoad(Op, DAG, ST, SN, alignment,
- chunk_offset, slot_offset, VT, was16aligned);
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+ SN->getSrcValue(), SN->getSrcValueOffset(),
+ SN->isVolatile(), 16);
- if (alignLoadVec.getNode() == 0)
- return alignLoadVec;
+ // Update the chain
+ the_chain = alignLoadVec.getValue(1);
LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
- SDValue basePtr = LN->getBasePtr();
- SDValue the_chain = alignLoadVec.getValue(1);
SDValue theValue = SN->getValue();
SDValue result;
@@ -732,29 +735,20 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
theValue = theValue.getOperand(0);
}
- chunk_offset &= 0xf;
-
- SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
- SDValue insertEltPtr;
-
// If the base pointer is already a D-form address, then just create
// a new D-form address with a slot offset and the orignal base pointer.
// Otherwise generate a D-form address with the slot offset relative
// to the stack pointer, which is always aligned.
- DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
- DEBUG(basePtr.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
-
- if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
- (basePtr.getOpcode() == ISD::ADD
- && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
- insertEltPtr = basePtr;
- } else {
- insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
- }
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
+ DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
SDValue vectorizeOp =
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
@@ -919,22 +913,31 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
static SDValue
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
-{
+LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
SDValue Cond = Op.getOperand(1);
MVT CondVT = Cond.getValueType();
- MVT CondNVT;
+ unsigned CondOpc;
if (CondVT == MVT::i8) {
- CondNVT = MVT::i16;
+ SDValue CondOp0 = Cond.getOperand(0);
+ if (Cond.getOpcode() == ISD::TRUNCATE) {
+ // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine
+ // will then remove the truncate)
+ CondVT = CondOp0.getValueType();
+ CondOpc = ISD::ANY_EXTEND;
+ } else {
+ CondVT = MVT::i32; // default to something reasonable
+ CondOpc = ISD::ZERO_EXTEND;
+ }
+
+ Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1));
+
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
- Op.getOperand(0),
- DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
- Op.getOperand(2));
- } else
- return SDValue(); // Unchanged
+ Op.getOperand(0), Cond, Op.getOperand(2));
+ }
+
+ return SDValue(); // Unchanged
}
static SDValue
@@ -1896,7 +1899,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
case MVT::i64:
case MVT::f32:
case MVT::f64:
- return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
}
}
@@ -2274,9 +2277,11 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
return result;
}
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
{
SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ MVT ShiftVT = TLI.getShiftAmountTy();
assert(Op.getValueType() == MVT::i8);
switch (Opc) {
@@ -2290,11 +2295,11 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
SDValue N1 = Op.getOperand(1);
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
MVT::i16));
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
MVT::i16));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
@@ -2307,13 +2312,13 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i32)
+ N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i32, N1)
+ ? DAG.getNode(N1Opc, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i32));
+ TLI.getShiftAmountTy()));
SDValue ExpandArg =
DAG.getNode(ISD::OR, MVT::i16, N0,
DAG.getNode(ISD::SHL, MVT::i16,
@@ -2328,14 +2333,13 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16)
+ MVT::i32));
+ N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
+ ? DAG.getNode(N1Opc, ShiftVT, N1)
+ : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
@@ -2344,15 +2348,15 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
unsigned N1Opc;
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16)
+ N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::SIGN_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
+ ? DAG.getNode(N1Opc, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
+ ShiftVT));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
@@ -2366,7 +2370,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
MVT::i16));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
@@ -2397,7 +2401,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
SDValue PromoteScalar =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+ DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
if (Opc != ISD::SIGN_EXTEND) {
// Use a shuffle to zero extend the i32 to i64 directly:
@@ -2438,9 +2442,9 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
// Turn operands into vectors to satisfy type checking (shufb works on
// vectors)
SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue Op1 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
SmallVector<SDValue, 16> ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
@@ -2467,9 +2471,9 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
// Turn operands into vectors to satisfy type checking (shufb works on
// vectors)
SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue Op1 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
SmallVector<SDValue, 16> ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
@@ -2495,7 +2499,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
case ISD::SHL: {
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
- SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
+ SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0);
SDValue MaskLower =
DAG.getNode(SPUISD::SELB, VecVT,
Op0Vec,
@@ -2540,7 +2544,7 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
case ISD::SRA: {
// Promote Op0 to vector
SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftVT = ShiftAmt.getValueType();
@@ -2669,7 +2673,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
SDValue N = Op.getOperand(0);
SDValue Elt0 = DAG.getConstant(0, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
@@ -2686,7 +2690,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
SDValue Shift1 = DAG.getConstant(8, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
@@ -2720,7 +2724,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
SDValue Shift1 = DAG.getConstant(16, MVT::i32);
SDValue Shift2 = DAG.getConstant(8, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
@@ -2760,6 +2764,32 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+//! Lower ISD::SETCC
+/*!
+ Lower i64 condition code handling.
+ */
+
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue condition = Op.getOperand(2);
+
+ if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
+ // Expand the i64 comparisons to what Cell can actually support,
+ // which is eq, ugt and sgt:
+#if 0
+ CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
+
+ switch (ccvalue->get()) {
+ case
+ }
+#endif
+ }
+
+ return SDValue();
+}
+
//! Lower ISD::SELECT_CC
/*!
ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
@@ -2772,7 +2802,8 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
assumption, given the simplisitc uses so far.
*/
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
MVT VT = Op.getValueType();
SDValue lhs = Op.getOperand(0);
SDValue rhs = Op.getOperand(1);
@@ -2780,12 +2811,20 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SDValue falseval = Op.getOperand(3);
SDValue condition = Op.getOperand(4);
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
// Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
// legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
// with another "cannot select select_cc" assert:
- SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
- return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
+ SDValue compare = DAG.getNode(ISD::SETCC, TLI.getSetCCResultType(Op),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
}
//! Custom lower ISD::TRUNCATE
@@ -2799,89 +2838,29 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
MVT Op0VT = Op0.getValueType();
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
- SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+ // Create shuffle mask
+ if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ // least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
- unsigned maskLow;
- unsigned maskHigh;
- // Create shuffle mask
- switch (Op0VT.getSimpleVT()) {
- case MVT::i128:
- switch (simpleVT) {
- case MVT::i64:
- // least significant doubleword of quadword
- maskHigh = 0x08090a0b;
- maskLow = 0x0c0d0e0f;
- break;
- case MVT::i32:
- // least significant word of quadword
- maskHigh = maskLow = 0x0c0d0e0f;
- break;
- case MVT::i16:
- // least significant halfword of quadword
- maskHigh = maskLow = 0x0e0f0e0f;
- break;
- case MVT::i8:
- // least significant byte of quadword
- maskHigh = maskLow = 0x0f0f0f0f;
- break;
- default:
- cerr << "Truncation to illegal type!";
- abort();
- }
- break;
- case MVT::i64:
- switch (simpleVT) {
- case MVT::i32:
- // least significant word of doubleword
- maskHigh = maskLow = 0x04050607;
- break;
- case MVT::i16:
- // least significant halfword of doubleword
- maskHigh = maskLow = 0x06070607;
- break;
- case MVT::i8:
- // least significant byte of doubleword
- maskHigh = maskLow = 0x07070707;
- break;
- default:
- cerr << "Truncation to illegal type!";
- abort();
- }
- break;
- case MVT::i32:
- case MVT::i16:
- switch (simpleVT) {
- case MVT::i16:
- // least significant halfword of word
- maskHigh = maskLow = 0x02030203;
- break;
- case MVT::i8:
- // least significant byte of word/halfword
- maskHigh = maskLow = 0x03030303;
- break;
- default:
- cerr << "Truncation to illegal type!";
- abort();
- }
- break;
- default:
- cerr << "Trying to lower truncation from illegal type!";
- abort();
- }
+ SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
- // Use a shuffle to perform the truncation
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32),
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32));
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
+ PromoteScalar, PromoteScalar, shufMask);
- SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
- PromoteScalar, PromoteScalar, shufMask);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
+ }
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
+ return SDValue(); // Leave the truncate unmolested
}
//! Custom (target-specific) lowering entry point
@@ -2921,7 +2900,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::ConstantFP:
return LowerConstantFP(Op, DAG);
case ISD::BRCOND:
- return LowerBRCOND(Op, DAG);
+ return LowerBRCOND(Op, DAG, *this);
case ISD::FORMAL_ARGUMENTS:
return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
case ISD::CALL:
@@ -2942,7 +2921,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SHL:
case ISD::SRA: {
if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc);
+ return LowerI8Math(Op, DAG, Opc, *this);
else if (VT == MVT::i64)
return LowerI64Math(Op, DAG, Opc);
break;
@@ -2971,7 +2950,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
if (VT.isVector())
return LowerVectorMUL(Op, DAG);
else if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc);
+ return LowerI8Math(Op, DAG, Opc, *this);
else
return LowerMUL(Op, DAG, VT, Opc);
@@ -2990,10 +2969,13 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
return LowerCTPOP(Op, DAG);
case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG);
+ return LowerSELECT_CC(Op, DAG, *this);
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
}
return SDValue();
@@ -3036,7 +3018,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
SelectionDAG &DAG = DCI.DAG;
SDValue Op0 = N->getOperand(0); // everything has at least one operand
MVT NodeVT = N->getValueType(0); // The node's value type
- MVT Op0VT = Op0.getValueType(); // The first operand's result
+ MVT Op0VT = Op0.getValueType(); // The first operand's result
SDValue Result; // Initially, empty result
switch (N->getOpcode()) {
@@ -3044,49 +3026,53 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case ISD::ADD: {
SDValue Op1 = N->getOperand(1);
- if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
- SDValue Op01 = Op0.getOperand(1);
- if (Op01.getOpcode() == ISD::Constant
- || Op01.getOpcode() == ISD::TargetConstant) {
- // (add <const>, (SPUindirect <arg>, <const>)) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
- ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
- SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
cerr << "\n"
- << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
<< "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
- }
+ << combinedConst << ")\n";
+ }
#endif
- return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
- Op0.getOperand(0), combinedConst);
- }
- } else if (isa<ConstantSDNode>(Op0)
- && Op1.getOpcode() == SPUISD::IndirectAddr) {
- SDValue Op11 = Op1.getOperand(1);
- if (Op11.getOpcode() == ISD::Constant
- || Op11.getOpcode() == ISD::TargetConstant) {
- // (add (SPUindirect <arg>, <const>), <const>) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
- ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
- SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
-
- DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
- DEBUG(cerr << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
-
- return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
- Op1.getOperand(0), combinedConst);
+ return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
+ IndirectArg, combinedValue);
+ }
}
}
break;
@@ -3127,6 +3113,25 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
return Op0;
}
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
}
break;
}
@@ -3136,19 +3141,19 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case SPUISD::VEC_SRL:
case SPUISD::VEC_SRA:
case SPUISD::ROTQUAD_RZ_BYTES:
- case SPUISD::ROTQUAD_RZ_BITS: {
+ case SPUISD::ROTQUAD_RZ_BITS:
+ case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
- if (isa<ConstantSDNode>(Op1)) {
- // Kill degenerate vector shifts:
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- if (CN->getZExtValue() == 0) {
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
Result = Op0;
}
}
break;
}
- case SPUISD::PROMOTE_SCALAR: {
+ case SPUISD::PREFSLOT2VEC: {
switch (Op0.getOpcode()) {
default:
break;
@@ -3263,7 +3268,7 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
case CNTB:
#endif
- case SPUISD::PROMOTE_SCALAR: {
+ case SPUISD::PREFSLOT2VEC: {
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType();
unsigned Op0VTBits = Op0VT.getSizeInBits();
@@ -3306,7 +3311,25 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
#endif
}
}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+ case ISD::SETCC: {
+ MVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
// LowerAsmOperandForConstraint
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index dd1f97f8d3..8d2e994545 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -39,7 +39,7 @@ namespace llvm {
SHUFB, ///< Vector shuffle (permute)
SHUFFLE_MASK, ///< Shuffle mask
CNTB, ///< Count leading ones in bytes
- PROMOTE_SCALAR, ///< Promote scalar->vector
+ PREFSLOT2VEC, ///< Promote scalar->vector
VEC2PREFSLOT, ///< Extract element 0
MPY, ///< 16-bit Multiply (low parts of a 32-bit)
MPYU, ///< Multiply Unsigned
@@ -58,6 +58,7 @@ namespace llvm {
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ GATHER_BITS, ///< Gather bits from bytes/words/halfwords
ADD_EXTENDED, ///< Add extended, with carry
CARRY_GENERATE, ///< Carry generate for ADD_EXTENDED
SUB_EXTENDED, ///< Subtract extended, with borrow
@@ -120,6 +121,9 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth = 0) const;
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth = 0) const;
+
ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
index f423dfa342..710196467b 100644
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -120,9 +120,8 @@ class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
}
let RA = 0 in {
- class BICondForm<bits<11> opcode, string asmstr, list<dag> pattern>
- : RRForm<opcode, (outs), (ins R32C:$rA, R32C:$func), asmstr,
- BranchResolv, pattern>
+ class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
{ }
let RT = 0 in {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 442d49141b..37a5870579 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -34,10 +34,14 @@ namespace {
inline bool isCondBranch(const MachineInstr *I) {
unsigned opc = I->getOpcode();
- return (opc == SPU::BRNZ
- || opc == SPU::BRZ
- || opc == SPU::BRHNZ
- || opc == SPU::BRHZ);
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
}
}
@@ -103,6 +107,19 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
return true;
}
break;
+ case SPU::LRr8:
+ case SPU::LRr16:
+ case SPU::LRr32:
+ case SPU::LRf32:
+ case SPU::LRr64:
+ case SPU::LRf64:
+ case SPU::LRr128:
+ case SPU::LRv16i8:
+ case SPU::LRv8i16:
+ case SPU::LRv4i32:
+ case SPU::LRv4f32:
+ case SPU::LRv2i64:
+ case SPU::LRv2f64:
case SPU::ORv16i8_i8:
case SPU::ORv8i16_i16:
case SPU::ORv4i32_i32:
@@ -114,7 +131,18 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
case SPU::ORi32_v4i32:
case SPU::ORi64_v2i64:
case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64:
+ case SPU::ORf64_v2f64: {
+ assert(MI.getNumOperands() == 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid SPU OR<type>_<vec> instruction!");
+ if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ sourceReg = MI.getOperand(0).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ }
case SPU::ORv16i8:
case SPU::ORv8i16:
case SPU::ORv4i32:
@@ -198,18 +226,14 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case SPU::STQDr8: {
const MachineOperand MOp1 = MI->getOperand(1);
const MachineOperand MOp2 = MI->getOperand(2);
- if (MOp1.isImm()
- && (MOp2.isFI()
- || (MOp2.isReg() && MOp2.getReg() == SPU::R1))) {
- if (MOp2.isFI())
- FrameIndex = MOp2.getIndex();
- else
- FrameIndex = MOp1.getImm() / SPUFrameInfo::stackSlotSize();
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
return MI->getOperand(0).getReg();
}
break;
}
- case SPU::STQXv16i8:
+#if 0
+ case SPU::STQXv16i8:
case SPU::STQXv8i16:
case SPU::STQXv4i32:
case SPU::STQXv4f32:
@@ -226,6 +250,7 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
+#endif
}
return 0;
}
@@ -292,6 +317,8 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
} else if (RC == SPU::R8CRegisterClass) {
opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
} else {
assert(0 && "Unknown regclass!");
abort();
@@ -366,6 +393,8 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
} else if (RC == SPU::R8CRegisterClass) {
opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
} else {
assert(0 && "Unknown regclass in loadRegFromStackSlot!");
abort();
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 2338a0318b..08d767684a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1,10 +1,10 @@
//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
// Cell SPU Instructions:
//===----------------------------------------------------------------------===//
@@ -49,14 +49,14 @@ def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$fi
let canFoldAsLoad = 1 in {
class LoadDFormVec<ValueType vectype>
- : RI10Form<0b00101100, (outs VECREG:$rT), (ins memri10:$src),
+ : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
"lqd\t$rT, $src",
LoadStore,
[(set (vectype VECREG:$rT), (load dform_addr:$src))]>
{ }
class LoadDForm<RegisterClass rclass>
- : RI10Form<0b00101100, (outs rclass:$rT), (ins memri10:$src),
+ : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
"lqd\t$rT, $src",
LoadStore,
[(set rclass:$rT, (load dform_addr:$src))]>
@@ -161,14 +161,14 @@ let canFoldAsLoad = 1 in {
// Stores:
//===----------------------------------------------------------------------===//
class StoreDFormVec<ValueType vectype>
- : RI10Form<0b00100100, (outs), (ins VECREG:$rT, memri10:$src),
+ : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
"stqd\t$rT, $src",
LoadStore,
[(store (vectype VECREG:$rT), dform_addr:$src)]>
{ }
class StoreDForm<RegisterClass rclass>
- : RI10Form<0b00100100, (outs), (ins rclass:$rT, memri10:$src),
+ : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
"stqd\t$rT, $src",
LoadStore,
[(store rclass:$rT, dform_addr:$src)]>
@@ -269,7 +269,7 @@ def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
// Generate Controls for Insertion:
//===----------------------------------------------------------------------===//
-def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
"cbd\t$rT, $src", ShuffleOp,
[(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
@@ -277,7 +277,7 @@ def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
"cbx\t$rT, $src", ShuffleOp,
[(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins memri7:$src),
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
"chd\t$rT, $src", ShuffleOp,
[(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
@@ -285,7 +285,7 @@ def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
"chx\t$rT, $src", ShuffleOp,
[(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
"cwd\t$rT, $src", ShuffleOp,
[(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
@@ -293,7 +293,7 @@ def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
"cwx\t$rT, $src", ShuffleOp,
[(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins memri7:$src),
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
"cwd\t$rT, $src", ShuffleOp,
[(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
@@ -301,7 +301,7 @@ def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
"cwx\t$rT, $src", ShuffleOp,
[(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
"cdd\t$rT, $src", ShuffleOp,
[(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
@@ -309,7 +309,7 @@ def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
"cdx\t$rT, $src", ShuffleOp,
[(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
-def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins memri7:$src),
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
"cdd\t$rT, $src", ShuffleOp,
[(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
@@ -421,6 +421,7 @@ multiclass ImmLoadAddress
def f32: ILARegInst<R32FP, f18imm, fpimm18>;
def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+ def hi: ILARegInst<R32C, symbolHi, imm18>;
def lo: ILARegInst<R32C, symbolLo, imm18>;
def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
@@ -481,37 +482,77 @@ multiclass FormSelectMaskBytesImm
defm FSMBI : FormSelectMaskBytesImm;
// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
-def FSMB:
- RRForm_1<0b01101101100, (outs VECREG:$rT), (ins R16C:$rA),
- "fsmb\t$rT, $rA", SelectOp,
- [(set (v16i8 VECREG:$rT), (SPUselmask R16C:$rA))]>;
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+ def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+ def v16i8: FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
// only 8-bits wide (even though it's input as 16-bits here)
-def FSMH:
- RRForm_1<0b10101101100, (outs VECREG:$rT), (ins R16C:$rA),
- "fsmh\t$rT, $rA", SelectOp,
- [(set (v8i16 VECREG:$rT), (SPUselmask R16C:$rA))]>;
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+ def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+ def v8i16: FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
// fsm: Form select mask for words. Like the other fsm* instructions,
// only the lower 4 bits of $rA are significant.
-class FSMInst<ValueType vectype, RegisterClass rclass>:
- RRForm_1<0b00101101100, (outs VECREG:$rT), (ins rclass:$rA),
- "fsm\t$rT, $rA",
- SelectOp,
- [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+ FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+ FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
multiclass FormSelectMaskWord {
- def r32 : FSMInst<v4i32, R32C>;
- def r16 : FSMInst<v4i32, R16C>;
+ def v4i32: FSMVecInst<v4i32>;
+
+ def r32 : FSMRegInst<v4i32, R32C>;
+ def r16 : FSMRegInst<v4i32, R16C>;
}
defm FSM : FormSelectMaskWord;
// Special case when used for i64 math operations
multiclass FormSelectMaskWord64 {
- def r32 : FSMInst<v2i64, R32C>;
- def r16 : FSMInst<v2i64, R16C>;
+ def r32 : FSMRegInst<v2i64, R32C>;
+ def r16 : FSMRegInst<v2i64, R16C>;
}
defm FSM64 : FormSelectMaskWord64;
@@ -736,7 +777,7 @@ defm BG : BorrowGenerate;
// BGX: Borrow generate, extended.
def BGXvec:
RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
- VECREG:$rCarry),
+ VECREG:$rCarry),
"bgx\t$rT, $rA, $rB", IntegerOp,
[]>,
RegConstraint<"$rCarry = $rT">,
@@ -898,20 +939,31 @@ def MPYHHAUr32:
[]>;
// clz: Count leading zeroes
-def CLZv4i32:
- RRForm_1<0b10100101010, (outs VECREG:$rT), (ins VECREG:$rA),
- "clz\t$rT, $rA", IntegerOp,
- [/* intrinsic */]>;
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+ IntegerOp, pattern>;
-def CLZr32:
- RRForm_1<0b10100101010, (outs R32C:$rT), (ins R32C:$rA),
- "clz\t$rT, $rA", IntegerOp,
- [(set R32C:$rT, (ctlz R32C:$rA))]>;
+class CLZRegInst<RegisterClass rclass>:
+ CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+ CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+ def v4i32 : CLZVecInst<v4i32>;
+ def r32 : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
// cntb: Count ones in bytes (aka "population count")
+//
// NOTE: This instruction is really a vector instruction, but the custom
// lowering code uses it in unorthodox ways to support CTPOP for other
// data types!
+
def CNTBv16i8:
RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
"cntb\t$rT, $rA", IntegerOp,
@@ -927,26 +979,88 @@ def CNTBv4i32 :
"cntb\t$rT, $rA", IntegerOp,
[(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
-// gbb: Gather all low order bits from each byte in $rA into a single 16-bit
-// quantity stored into $rT
-def GBB:
- RRForm_1<0b01001101100, (outs R16C:$rT), (ins VECREG:$rA),
- "gbb\t$rT, $rA", GatherOp,
- []>;
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>;
+
+class GBBVecInst<ValueType vectype>:
+ GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUgatherbits (vectype VECREG:$rA)))]>;
+
+multiclass GatherBitsFromBytes {
+ def v16i8_r32: GBBRegInst<R32C, v16i8>;
+ def v16i8_r16: GBBRegInst<R16C, v16i8>;
+ def v16i8: GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
// gbh: Gather all low order bits from each halfword in $rA into a single
-// 8-bit quantity stored in $rT
-def GBH:
- RRForm_1<0b10001101100, (outs R16C:$rT), (ins VECREG:$rA),
- "gbh\t$rT, $rA", GatherOp,
- []>;
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+ GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>;
+
+class GBHVecInst<ValueType vectype>:
+ GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUgatherbits (vectype VECREG:$rA)))]>;
+
+multiclass GatherBitsHalfword {
+ def v8i16_r32: GBHRegInst<R32C, v8i16>;
+ def v8i16_r16: GBHRegInst<R16C, v8i16>;
+ def v8i16: GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
// gb: Gather all low order bits from each word in $rA into a single
-// 4-bit quantity stored in $rT
-def GB:
- RRForm_1<0b00001101100, (outs R16C:$rT), (ins VECREG:$rA),
- "gb\t$rT, $rA", GatherOp,
- []>;
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [(set rclass:$rT, (SPUgatherbits (vectype VECREG:$rA)))]>;
+
+class GBVecInst<ValueType vectype>:
+ GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUgatherbits (vectype VECREG:$rA)))]>;
+
+multiclass GatherBitsWord {
+ def v4i32_r32: GBRegInst<R32C, v4i32>;
+ def v4i32_r16: GBRegInst<R16C, v4i32>;
+ def v4i32: GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
// avgb: average bytes
def AVGB:
@@ -976,30 +1090,26 @@ class XSBHVecInst<ValueType vectype>:
XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
[(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
-class XSBHRegInst<RegisterClass rclass>:
+class XSBHInRegInst<RegisterClass rclass>:
XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
[(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>;
multiclass ExtendByteHalfword {
def v16i8: XSBHVecInst<v8i16>;
- def r16: XSBHRegInst<R16C>;
+ def r16: XSBHInRegInst<R16C>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
// pattern below). Intentionally doesn't match a pattern because we want the
// sext 8->32 pattern to do the work for us, namely because we need the extra
// XSHWr32.
- def r32: XSBHRegInst<R32C>;
+ def r32: XSBHInRegInst<R32C>;
}
defm XSBH : ExtendByteHalfword;
-// Sign-extend, but take an 8-bit register to a 16-bit register (not done as
-// sext_inreg)
-def XSBHr8:
- XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
- [(set R16C:$rDst, (sext R8C:$rSrc))]>;
-
// Sign extend halfwords to words:
def XSHWvec:
RRForm_1<0b01101101010, (outs VECREG:$rDest), (ins VECREG:$rSrc),
@@ -1208,13 +1318,44 @@ class ORRegInst<RegisterClass rclass>:
ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
[(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+// ORCvtForm: OR conversion form
+//
+// This is used to "convert" the preferred slot to its vector equivalent, as
+// well as convert a vector back to its preferred slot.
+//
+// These are effectively no-ops, but need to exist for proper type conversion
+// and type coercion.
+
+class ORCvtForm<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/* no pattern */];
+
+ let Inst{0-10} = 0b10000010000;
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
class ORPromoteScalar<RegisterClass rclass>:
- ORInst<(outs VECREG:$rT), (ins rclass:$rA, rclass:$rB),
- [/* no pattern */]>;
+ ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
class ORExtractElt<RegisterClass rclass>:
- ORInst<(outs rclass:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern */]>;
+ ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
+
+class ORCvtRegGPRC<RegisterClass rclass>:
+ ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>;
+
+class ORCvtVecGPRC:
+ ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+
+class ORCvtGPRCReg<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>;
+
+class ORCvtGPRCVec:
+ ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
multiclass BitwiseOr
{
@@ -1229,7 +1370,7 @@ multiclass BitwiseOr
(v4i32 VECREG:$rB)))))]>;
def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (v2f64 VECREG:$rT),
+ [(set (v2f64 VECREG:$rT),
(v2f64 (bitconvert (or (v2i64 VECREG:$rA),
(v2i64 VECREG:$rB)))))]>;
@@ -1260,48 +1401,115 @@ multiclass BitwiseOr
def i64_v2i64: ORExtractElt<R64C>;
def f32_v4f32: ORExtractElt<R32FP>;
def f64_v2f64: ORExtractElt<R64FP>;
+
+ // Conversion from GPRC to register
+ def i128_r64: ORCvtRegGPRC<R64C>;
+ def i128_f64: ORCvtRegGPRC<R64FP>;
+ def i128_r32: ORCvtRegGPRC<R32C>;
+ def i128_f32: ORCvtRegGPRC<R32FP>;
+ def i128_r16: ORCvtRegGPRC<R16C>;
+ def i128_r8: ORCvtRegGPRC<R8C>;
+
+ // Conversion from GPRC to vector
+ def i128_vec: ORCvtVecGPRC;
+
+ // Conversion from register to GPRC
+ def r64_i128: ORCvtGPRCReg<R64C>;
+ def f64_i128: ORCvtGPRCReg<R64FP>;
+ def r32_i128: ORCvtGPRCReg<R32C>;
+ def f32_i128: ORCvtGPRCReg<R32FP>;
+ def r16_i128: ORCvtGPRCReg<R16C>;
+ def r8_i128: ORCvtGPRCReg<R8C>;
+
+ // Conversion from vector to GPRC
+ def vec_i128: ORCvtGPRCVec;
}
defm OR : BitwiseOr;
-// scalar->vector promotion patterns:
-def : Pat<(v16i8 (SPUpromote_scalar R8C:$rA)),
- (ORv16i8_i8 R8C:$rA, R8C:$rA)>;
+// scalar->vector promotion patterns (preferred slot to vector):
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+ (ORv16i8_i8 R8C:$rA)>;
-def : Pat<(v8i16 (SPUpromote_scalar R16C:$rA)),
- (ORv8i16_i16 R16C:$rA, R16C:$rA)>;
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+ (ORv8i16_i16 R16C:$rA)>;
-def : Pat<(v4i32 (SPUpromote_scalar R32C:$rA)),
- (ORv4i32_i32 R32C:$rA, R32C:$rA)>;
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+ (ORv4i32_i32 R32C:$rA)>;
-def : Pat<(v2i64 (SPUpromote_scalar R64C:$rA)),
- (ORv2i64_i64 R64C:$rA, R64C:$rA)>;
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+ (ORv2i64_i64 R64C:$rA)>;
-def : Pat<(v4f32 (SPUpromote_scalar R32FP:$rA)),
- (ORv4f32_f32 R32FP:$rA, R32FP:$rA)>;
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+ (ORv4f32_f32 R32FP:$rA)>;
-def : Pat<(v2f64 (SPUpromote_scalar R64FP:$rA)),
- (ORv2f64_f64 R64FP:$rA, R64FP:$rA)>;
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+ (ORv2f64_f64 R64FP:$rA)>;
-// ORi*_v*: Used to extract vector element 0 (the preferred slot)
+// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
+// known as converting the vector back to its preferred slot
def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
- (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
+ (ORi8_v16i8 VECREG:$rA)>;
def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
- (ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
+ (ORi16_v8i16 VECREG:$rA)>;
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
- (ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
+ (ORi32_v4i32 VECREG:$rA)>;
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
- (ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
+ (ORi64_v2i64 VECREG:$rA)>;
def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
- (ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
+ (ORf32_v4f32 VECREG:$rA)>;
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
- (ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
+ (ORf64_v2f64 VECREG:$rA)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+ class LRInst<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/*no pattern*/];
+
+ let Inst{0-10} = 0b10000010000; /* It's an OR operation */
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+ }
+
+ class LRVecInst<ValueType vectype>:
+ LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+ class LRRegInst<RegisterClass rclass>:
+ LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+ multiclass LoadRegister {
+ def v2i64: LRVecInst<v2i64>;
+ def v2f64: LRVecInst<v2f64>;
+ def v4i32: LRVecInst<v4i32>;
+ def v4f32: LRVecInst<v4f32>;
+ def v8i16: LRVecInst<v8i16>;
+ def v16i8: LRVecInst<v16i8>;
+
+ def r128: LRRegInst<GPRC>;
+ def r64: LRRegInst<R64C>;
+ def f64: LRRegInst<R64FP>;
+ def r32: LRRegInst<R32C>;
+ def f32: LRRegInst<R32FP>;
+ def r16: LRRegInst<R16C>;
+ def r8: LRRegInst<R8C>;
+ }
+
+ defm LR: LoadRegister;
+}
// ORC: Bitwise "or" with complement (c = a | ~b)
@@ -1585,12 +1793,24 @@ class SELBVecInst<ValueType vectype>:
(and (vnot (vectype VECREG:$rC)),
(vectype VECREG:$rA))))]>;
+class SELBVecCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+ [(set (vectype VECREG:$rT),
+ (select R32C:$rC,
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
class SELBRegInst<RegisterClass rclass>:
SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
[(set rclass:$rT,
(or (and rclass:$rA, rclass:$rC),
(and rclass:$rB, (not rclass:$rC))))]>;
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+ [(set rclass:$rT,
+ (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
multiclass SelectBits
{
def v16i8: SELBVecInst<v16i8>;
@@ -1603,6 +1823,16 @@ multiclass SelectBits
def r32: SELBRegInst<R32C>;
def r16: SELBRegInst<R16C>;
def r8: SELBRegInst<R8C>;
+
+ def v16i8_cond: SELBVecCondInst<v16i8>;
+ def v8i16_cond: SELBVecCondInst<v8i16>;
+ def v4i32_cond: SELBVecCondInst<v4i32>;
+ def v2i64_cond: SELBVecCondInst<v2i64>;
+
+ // SELBr64_cond is defined further down, look for i64 comparisons
+ def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def r16_cond: SELBRegCondInst<R16C, R16C>;
+ def r8_cond: SELBRegCondInst<R8C, R8C>;
}
defm SELB : SelectBits;
@@ -1625,14 +1855,6 @@ def : SPUselbPatReg<R16C, SELBr16>;
def : SPUselbPatReg<R32C, SELBr32>;
def : SPUselbPatReg<R64C, SELBr64>;
-class SelectConditional<RegisterClass rclass, SPUInstr inst>:
- Pat<(select rclass:$rCond, rclass:$rTrue, rclass:$rFalse),
- (inst rclass:$rFalse, rclass:$rTrue, rclass:$rCond)>;
-
-def : SelectConditional<R32C, SELBr32>;
-def : SelectConditional<R16C, SELBr16>;
-def : SelectConditional<R8C, SELBr8>;
-
// EQV: Equivalence (1 for each same bit, otherwise 0)
//
// Note: There are a lot of ways to match this bit operator and these patterns
@@ -1753,6 +1975,10 @@ class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
(resultvec VECREG:$rB),
(maskvec VECREG:$rC)))]>;
+class SHUFBGPRCInst:
+ SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
multiclass ShuffleBytes
{
def v16i8 : SHUFBVecInst<v16i8, v16i8>;
@@ -1769,6 +1995,8 @@ multiclass ShuffleBytes
def v2f64 : SHUFBVecInst<v2f64, v16i8>;
def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+ def gprc : SHUFBGPRCInst;
}
defm SHUFB : ShuffleBytes;
@@ -2027,7 +2255,7 @@ defm ROTHI: RotateLeftHalfwordImm;
def : Pat<(SPUvec_rotl VECREG:$rA, (i32 uimm7:$val)),
(ROTHIv8i16 VECREG:$rA, imm:$val)>;
-
+
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate word:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -2207,7 +2435,7 @@ multiclass RotateQuadByBitCount
}
defm ROTQBI: RotateQuadByBitCount;
-
+
class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
RotateShift, pattern>;
@@ -2298,7 +2526,7 @@ def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
(ROTHMIv8i16 VECREG:$rA, imm:$val)>;
-
+
def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
(ROTHMIv8i16 VECREG:$rA, imm:$val)>;
@@ -2359,7 +2587,7 @@ def ROTMIv4i32:
def : Pat<(SPUvec_srl VECREG:$rA, (i16 uimm7:$val)),
(ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
-
+
def : Pat<(SPUvec_srl VECREG:$rA, (i8 uimm7:$val)),
(ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
@@ -2682,7 +2910,7 @@ let isTerminator = 1, isBarrier = 1 in {
"hgt\t$rA, $rB", BranchResolv,
[/* no pattern to match */]>;
- def HGTIr32:
+ def HGTIr32:
RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
"hgti\t$rA, $val", BranchResolv,
[/* no pattern to match */]>;
@@ -2698,9 +2926,9 @@ let isTerminator = 1, isBarrier = 1 in {
[/* no pattern to match */]>;
}
-//------------------------------------------------------------------------
-// Comparison operators:
-//------------------------------------------------------------------------
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
@@ -2990,8 +3218,14 @@ defm CLGTI : CmpLGtrWordImm;
// define a pattern to generate the right code, as a binary operator
// (in a manner of speaking.)
//
-// N.B.: This only matches the setcc set of conditionals. Special pattern
-// matching is used for select conditionals.
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+// matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+// i64 comparisons. See the tblgen fundamentals documentation for what
+// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+// class for where ResultInstrs originates.
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
@@ -3004,10 +3238,10 @@ class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
Pat<(cond rclass:$rA, (inttype immpred:$imm)),
(xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
-def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
-def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
@@ -3128,8 +3362,8 @@ class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
SPUInstr cmpOp2>:
Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
- rclass:$rFalse, rclass:$rTrue),
- (selinstr rclass:$rTrue, rclass:$rFalse,
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
(binop (cmpOp1 rclass:$rA, rclass:$rB),
(cmpOp2 rclass:$rA, rclass:$rB)))>;
@@ -3226,54 +3460,129 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
// Various branches:
- def BRNZ:
- RI16Form<0b010000100, (outs), (ins R32C:$rCond, brtarget:$dest),
- "brnz\t$rCond,$dest",
- BranchResolv,
- [(brcond R32C:$rCond, bb:$dest)]>;
-
- def BRZ:
- RI16Form<0b000000100, (outs), (ins R32C:$rT, brtarget:$dest),
- "brz\t$rT,$dest",
- BranchResolv,
- [/* no pattern */]>;
+ class BRNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+ BranchResolv, pattern>;
- def BRHNZ:
- RI16Form<0b011000100, (outs), (ins R16C:$rCond, brtarget:$dest),
- "brhnz\t$rCond,$dest",
- BranchResolv,
- [(brcond R16C:$rCond, bb:$dest)]>;
+ class BRNZRegInst<RegisterClass rclass>:
+ BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
- def BRHZ:
- RI16Form<0b001000100, (outs), (ins R16C:$rT, brtarget:$dest),
- "brhz\t$rT,$dest",
- BranchResolv,
- [/* no pattern */]>;
-
-/*
- def BINZ:
- BICondForm<0b10010100100, "binz\t$rA, $func",
- [(SPUbinz R32C:$rA, R32C:$func)]>;
-
- def BIZ:
- BICondForm<0b00010100100, "biz\t$rA, $func",
- [(SPUbiz R32C:$rA, R32C:$func)]>;
-*/
+ class BRNZVecInst<ValueType vectype>:
+ BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+ [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+ multiclass BranchNotZero {
+ def v4i32 : BRNZVecInst<v4i32>;
+ def r32 : BRNZRegInst<R32C>;
+ }
+
+ defm BRNZ : BranchNotZero;
+
+ class BRZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+ BranchResolv, pattern>;
+
+ class BRZRegInst<RegisterClass rclass>:
+ BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRZVecInst<ValueType vectype>:
+ BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZero {
+ def v4i32: BRZVecInst<v4i32>;
+ def r32: BRZRegInst<R32C>;
+ }
+
+ defm BRZ: BranchZero;
+
+ // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+ // be useful:
+ /*
+ class BINZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+ class BINZRegInst<RegisterClass rclass>:
+ BINZInst<(ins rclass:$rA, brtarget:$dest),
+ [(brcond rclass:$rA, R32C:$dest)]>;
+
+ class BINZVecInst<ValueType vectype>:
+ BINZInst<(ins VECREG:$rA, R32C:$dest),
+ [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+ multiclass BranchNotZeroIndirect {
+ def v4i32: BINZVecInst<v4i32>;
+ def r32: BINZRegInst<R32C>;
+ }
+
+ defm BINZ: BranchNotZeroIndirect;
+
+ class BIZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+ class BIZRegInst<RegisterClass rclass>:
+ BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+ class BIZVecInst<ValueType vectype>:
+ BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+ multiclass BranchZeroIndirect {
+ def v4i32: BIZVecInst<v4i32>;
+ def r32: BIZRegInst<R32C>;
+ }
+
+ defm BIZ: BranchZeroIndirect;
+ */
+
+ class BRHNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+ pattern>;
+
+ class BRHNZRegInst<RegisterClass rclass>:
+ BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRHNZVecInst<ValueType vectype>:
+ BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchNotZeroHalfword {
+ def v8i16: BRHNZVecInst<v8i16>;
+ def r16: BRHNZRegInst<R16C>;
+ }
+
+ defm BRHNZ: BranchNotZeroHalfword;
+
+ class BRHZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+ pattern>;
+
+ class BRHZRegInst<RegisterClass rclass>:
+ BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRHZVecInst<ValueType vectype>:
+ BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZeroHalfword {
+ def v8i16: BRHZVecInst<v8i16>;
+ def r16: BRHZRegInst<R16C>;
+ }
+
+ defm BRHZ: BranchZeroHalfword;
}
//===----------------------------------------------------------------------===//
// setcc and brcond patterns:
//===----------------------------------------------------------------------===//
-def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
- (BRHZ R16C:$rA, bb:$dest)>;
-def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
- (BRHNZ R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+ (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+ (BRHNZr16 R16C:$rA, bb:$dest)>;
-def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
- (BRZ R32C:$rA, bb:$dest)>;
-def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
- (BRNZ R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+ (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+ (BRNZr32 R32C:$rA, bb:$dest)>;
multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
{
@@ -3290,8 +3599,8 @@ multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
(brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
}
-defm BRCONDeq : BranchCondEQ<seteq, BRHZ, BRZ>;
-defm BRCONDne : BranchCondEQ<setne, BRHNZ, BRNZ>;
+defm BRCONDeq : BranchCondEQ<seteq, BRHZr16, BRZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHNZr16, BRNZr32>;
multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
{
@@ -3308,8 +3617,8 @@ multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
(brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
}
-defm BRCONDugt : BranchCondLGT<setugt, BRHNZ, BRNZ>;
-defm BRCONDule : BranchCondLGT<setule, BRHZ, BRZ>;
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
SPUInstr orinst32, SPUInstr brinst32>
@@ -3335,8 +3644,8 @@ multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
bb:$dest)>;
}
-defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZ, ORr32, BRNZ>;
-defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZ, ORr32, BRZ>;
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
{
@@ -3353,8 +3662,8 @@ multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
(brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
}
-defm BRCONDgt : BranchCondGT<setgt, BRHNZ, BRNZ>;
-defm BRCONDle : BranchCondGT<setle, BRHZ, BRZ>;
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
SPUInstr orinst32, SPUInstr brinst32>
@@ -3380,8 +3689,8 @@ multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
bb:$dest)>;
}
-defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZ, ORr32, BRNZ>;
-defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZ, ORr32, BRZ>;
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
let isTerminator = 1, isBarrier = 1 in {
let isReturn = 1 in {
@@ -3397,10 +3706,12 @@ let isTerminator = 1, isBarrier = 1 in {
class FAInst<dag OOL, dag IOL, list<dag> pattern>:
RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
SPrecFP, pattern>;
+
class FAVecInst<ValueType vectype>:
FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
(fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
multiclass SFPAdd
{
def v4f32: FAVecInst<v4f32>;
@@ -3548,7 +3859,7 @@ def FSCRRf32 :
// floating reciprocal absolute square root estimate (frsqest)
// The following are probably just intrinsics
-// status and control register write
+// status and control register write
// status and control register read
//--------------------------------------
@@ -3603,7 +3914,7 @@ def FMSf32 :
// = c - a * b
// NOTE: subtraction order
// fsub a b = a - b
-// fs a b = b - a?
+// fs a b = b - a?
def FNMSf32 :
RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
"fnms\t$rT, $rA, $rB, $rC", SPrecFP,
@@ -3612,9 +3923,9 @@ def FNMSf32 :
def FNMSv4f32 :
RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
"fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
(v4f32 VECREG:$rB))))]>;
//--------------------------------------
@@ -3625,7 +3936,7 @@ def CSiFv4f32:
"csflt\t$rT, $rA, 0", SPrecFP,
[(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
-// Convert signed integer to floating point
+// Convert signed integer to floating point
def CSiFf32 :
CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
"csflt\t$rT, $rA, 0", SPrecFP,
@@ -3642,7 +3953,7 @@ def CUiFf32 :
"cuflt\t$rT, $rA, 0", SPrecFP,
[(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
-// Convert float to unsigned int
+// Convert float to unsigned int
// Assume that scale = 0
def CFUiv4f32 :
@@ -3655,7 +3966,7 @@ def CFUif32 :
"cfltu\t$rT, $rA, 0", SPrecFP,
[(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
-// Convert float to signed int
+// Convert float to signed int
// Assume that scale = 0
def CFSiv4f32 :
@@ -3788,9 +4099,9 @@ def FNMSv2f64 :
RRForm<0b01111010110, (outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
"dfnms\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fsub (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
+ [(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
(v2f64 VECREG:$rB))))]>,
RegConstraint<"$rC = $rT">,
NoEncode<"$rC">;
@@ -3813,9 +4124,9 @@ def FNMAv2f64 :
RRForm<0b11111010110, (outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
"dfnma\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fneg (fadd (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
+ [(set (v2f64 VECREG:$rT),
+ (fneg (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
(v2f64 VECREG:$rB)))))]>,
RegConstraint<"$rC = $rT">,
NoEncode<"$rC">;
@@ -3825,7 +4136,7 @@ def FNMAv2f64 :
//===----------------------------------------------------------------------==//
def : Pat<(fneg (v4f32 VECREG:$rA)),
- (XORfnegvec (v4f32 VECREG:$rA),
+ (XORfnegvec (v4f32 VECREG:$rA),
(v4f32 (ILHUv4i32 0x8000)))>;
def : Pat<(fneg R32FP:$rA),
@@ -3944,7 +4255,7 @@ def : Pat<(f32 fpimm:$imm),
def : Pat<(v4i32 v4i32Imm:$imm),
(IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
(LO16_vec v4i32Imm:$imm))>;
-
+
// 8-bit constants
def : Pat<(i8 imm:$imm),
(ILHr8 imm:$imm)>;
@@ -4001,6 +4312,69 @@ def : Pat<(i32 (anyext R16C:$rSrc)),
(ORIi16i32 R16C:$rSrc, 0)>;
//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+ (ORi8_v16i8
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+ (ORi8_v16i8
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+ (ORi8_v16i8
+ (SHUFBv4i32_m32
+ (ORv4i32_i32 R32C:$src),
+ (ORv4i32_i32 R32C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+ (ORi8_v16i8
+ (SHUFBv4i32_m32
+ (ORv8i16_i16 R16C:$src),
+ (ORv8i16_i16 R16C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+ (ORi16_v8i16
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+ (ORi16_v8i16
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+ (ORi16_v8i16
+ (SHUFBv4i32_m32
+ (ORv4i32_i32 R32C:$src),
+ (ORv4i32_i32 R32C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+ (ORi32_v4i32
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+ (ORi32_v4i32
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+
+//===----------------------------------------------------------------------===//
// Address generation: SPU, like PPC, has to split addresses into high and
// low parts in order to load them into a register.
//===----------------------------------------------------------------------===//
@@ -4047,3 +4421,5 @@ def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
// Instrinsics:
include "CellSDKIntrinsics.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 1ed1e3ba51..b22c6b5d9f 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -66,6 +66,13 @@ def SPUselb_type: SDTypeProfile<1, 3, [
def SPUvecshift_type: SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+// SPU gather bits:
+// This instruction looks at each vector (word|halfword|byte) slot's low bit
+// and forms a mask in the low order bits of the first word's preferred slot.
+def SPUgatherbits_type: SDTypeProfile<1, 1, [
+ /* no type constraints defined */
+]>;
+
//===----------------------------------------------------------------------===//
// Synthetic/pseudo-instructions
//===----------------------------------------------------------------------===//
@@ -137,14 +144,17 @@ def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
// SPU select bits instruction
def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+// SPU gather bits instruction:
+def SPUgatherbits: SDNode<"SPUISD::GATHER_BITS", SPUgatherbits_type, []>;
+
// SPU floating point interpolate
def SPUinterpolate : SDNode<"SPUISD::FPInterp", SDTFPBinOp, []>;
// SPU floating point reciprocal estimate (used for fdiv)
def SPUreciprocalEst: SDNode<"SPUISD::FPRecipEst", SDTFPUnaryOp, []>;
-def SDTpromote_scalar: SDTypeProfile<1, 1, []>;
-def SPUpromote_scalar: SDNode<"SPUISD::PROMOTE_SCALAR", SDTpromote_scalar, []>;
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index d788f837fc..802628f899 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -609,15 +609,15 @@ def symbolLSA: Operand<i32> {
let PrintMethod = "printSymbolLSA";
}
-// memory s7imm(reg) operaand
-def memri7 : Operand<iPTR> {
- let PrintMethod = "printMemRegImmS7";
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+ let PrintMethod = "printShufAddr";
let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
}
// memory s10imm(reg) operand
-def memri10 : Operand<iPTR> {
- let PrintMethod = "printMemRegImmS10";
+def dformaddr : Operand<iPTR> {
+ let PrintMethod = "printDFormAddr";
let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
}
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index beea0dfb02..cf4089fa29 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -403,11 +403,6 @@ SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS)
const {
-#if 0
- // Save and clear the LR state.
- SPUFunctionInfo *FI = MF.getInfo<SPUFunctionInfo>();
- FI->setUsesLR(MF.getRegInfo().isPhysRegUsed(LR));
-#endif
// Mark LR and SP unused, since the prolog spills them to stack and
// we don't want anyone else to spill them for us.
//
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
index 2bc0ffdb7e..72752555e4 100644
--- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -26,6 +26,13 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
PrivateGlobalPrefix = ".L";
// This corresponds to what the gcc SPU compiler emits, for consistency.
CStringSection = ".rodata.str";
+
+ // BSS section needs to be emitted as ".section"
+ BSSSection = "\t.section\t.bss";
+ BSSSection_ = getUnnamedSection("\t.section\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ true);
+
}
/// PreferredEHDataFormat - This hook allows the target to select data