1 files changed, 0 insertions, 3267 deletions
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
deleted file mode 100644
index 31b87331a9..0000000000
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ /dev/null
@@ -1,3267 +0,0 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SPUTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SPUISelLowering.h"
-#include "SPUTargetMachine.h"
-#include "SPUFrameLowering.h"
-#include "SPUMachineFunction.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-  // Byte offset of the preferred slot (counted from the MSB)
-  int prefslotOffset(EVT VT) {
-    int retval=0;
-    if (VT==MVT::i1) retval=3;
-    if (VT==MVT::i8) retval=3;
-    if (VT==MVT::i16) retval=2;
-
-    return retval;
-  }
-
-  //! Expand a library call into an actual call DAG node
-  /*!
-   \note
-   This code is taken from SelectionDAGLegalize, since it is not exposed as
-   part of the LLVM SelectionDAG API.
-   */
-
-  SDValue
-  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
-                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
-    // The input chain to this libcall is the entry node of the function.
-    // Legalizing the call will automatically add the previous call to the
-    // dependence.
-    SDValue InChain = DAG.getEntryNode();
-
-    TargetLowering::ArgListTy Args;
-    TargetLowering::ArgListEntry Entry;
-    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
-      EVT ArgVT = Op.getOperand(i).getValueType();
-      Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-      Entry.Node = Op.getOperand(i);
-      Entry.Ty = ArgTy;
-      Entry.isSExt = isSigned;
-      Entry.isZExt = !isSigned;
-      Args.push_back(Entry);
-    }
-    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                           TLI.getPointerTy());
-
-    // Splice the libcall in wherever FindInputOutputChains tells us to.
-    Type *RetTy =
-                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
-    TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned,
-                                         false, false,
-                            0, TLI.getLibcallCallingConv(LC),
-                            /*isTailCall=*/false,
-                                         /*doesNotRet=*/false,
-                                         /*isReturnValueUsed=*/true,
-                            Callee, Args, DAG, Op.getDebugLoc());
-    std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
-    return CallInfo.first;
-  }
-}
-
-SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
-  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
-    SPUTM(TM) {
-
-  // Use _setjmp/_longjmp instead of setjmp/longjmp.
-  setUseUnderscoreSetJmp(true);
-  setUseUnderscoreLongJmp(true);
-
-  // Set RTLIB libcall names as used by SPU:
-  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
-
-  // Set up the SPU's register classes:
-  addRegisterClass(MVT::i8,   &SPU::R8CRegClass);
-  addRegisterClass(MVT::i16,  &SPU::R16CRegClass);
-  addRegisterClass(MVT::i32,  &SPU::R32CRegClass);
-  addRegisterClass(MVT::i64,  &SPU::R64CRegClass);
-  addRegisterClass(MVT::f32,  &SPU::R32FPRegClass);
-  addRegisterClass(MVT::f64,  &SPU::R64FPRegClass);
-  addRegisterClass(MVT::i128, &SPU::GPRCRegClass);
-
-  // SPU has no sign or zero extended loads for i1, i8, i16:
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
-
-  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
-  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
-  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
-  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
-
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
-  // SPU constant load actions are custom lowered:
-  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
-
-  // SPU's loads and stores have to be custom lowered:
-  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
-       ++sctype) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
-    setOperationAction(ISD::LOAD,   VT, Custom);
-    setOperationAction(ISD::STORE,  VT, Custom);
-    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
-    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
-    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
-
-    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
-      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
-      setTruncStoreAction(VT, StoreVT, Expand);
-    }
-  }
-
-  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
-       ++sctype) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
-
-    setOperationAction(ISD::LOAD,   VT, Custom);
-    setOperationAction(ISD::STORE,  VT, Custom);
-
-    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
-      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
-      setTruncStoreAction(VT, StoreVT, Expand);
-    }
-  }
-
-  // Expand the jumptable branches
-  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
-
-  // Custom lower SELECT_CC for most cases, but expand by default
-  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
-  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
-  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
-  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
-  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
-
-  // SPU has no intrinsics for these particular operations:
-  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
-  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
-
-  // SPU has no division/remainder instructions
-  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
-  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
-  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
-  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
-  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
-  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
-  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
-  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
-  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
-  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
-  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
-  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
-  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
-  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
-  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
-  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
-  setOperationAction(ISD::SREM,    MVT::i128, Expand);
-  setOperationAction(ISD::UREM,    MVT::i128, Expand);
-  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
-  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
-
-  // We don't support sin/cos/sqrt/fmod
-  setOperationAction(ISD::FSIN , MVT::f64, Expand);
-  setOperationAction(ISD::FCOS , MVT::f64, Expand);
-  setOperationAction(ISD::FREM , MVT::f64, Expand);
-  setOperationAction(ISD::FSIN , MVT::f32, Expand);
-  setOperationAction(ISD::FCOS , MVT::f32, Expand);
-  setOperationAction(ISD::FREM , MVT::f32, Expand);
-
-  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
-  // for f32!)
-  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
-  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
-  setOperationAction(ISD::FMA, MVT::f64, Expand);
-  setOperationAction(ISD::FMA, MVT::f32, Expand);
-
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
-  // SPU can do rotate right and left, so legalize it... but customize for i8
-  // because instructions don't exist.
-
-  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
-  //        .td files.
-  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
-  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
-  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
-
-  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
-  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
-  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
-
-  // SPU has no native version of shift left/right for i8
-  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
-  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
-  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
-
-  // Make these operations legal and handle them during instruction selection:
-  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
-  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
-  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
-
-  // Custom lower i8, i32 and i64 multiplications
-  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
-  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
-  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
-
-  // Expand double-width multiplication
-  // FIXME: It would probably be reasonable to support some of these operations
-  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
-  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
-  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
-
-  // Need to custom handle (some) common i8, i64 math ops
-  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
-  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
-  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
-  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
-
-  // SPU does not have BSWAP. It does have i32 support CTLZ.
-  // CTPOP has to be custom lowered.
-  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
-  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
-
-  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
-  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
-  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
-  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
-  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
-
-  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
-  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
-  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
-  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
-  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8,    Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16,   Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32,   Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64,   Expand);
-  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128,  Expand);
-
-  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
-  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
-  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
-  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
-  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8,    Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16,   Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,   Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64,   Expand);
-  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128,  Expand);
-
-  // SPU has a version of select that implements (a&~c)|(b&c), just like
-  // select ought to work:
-  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
-  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
-  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
-  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
-
-  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
-  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
-  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
-  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
-  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
-
-  // Custom lower i128 -> i64 truncates
-  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
-
-  // Custom lower i32/i64 -> i128 sign extend
-  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
-
-  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
-  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
-  // to expand to a libcall, hence the custom lowering:
-  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
-
-  // FDIV on SPU requires custom lowering
-  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
-
-  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
-  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
-
-  setOperationAction(ISD::BITCAST, MVT::i32, Legal);
-  setOperationAction(ISD::BITCAST, MVT::f32, Legal);
-  setOperationAction(ISD::BITCAST, MVT::i64, Legal);
-  setOperationAction(ISD::BITCAST, MVT::f64, Legal);
-
-  // We cannot sextinreg(i1).  Expand to shifts.
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
-  // We want to legalize GlobalAddress and ConstantPool nodes into the
-  // appropriate instructions to materialize the address.
-  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
-       ++sctype) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
-
-    setOperationAction(ISD::GlobalAddress,  VT, Custom);
-    setOperationAction(ISD::ConstantPool,   VT, Custom);
-    setOperationAction(ISD::JumpTable,      VT, Custom);
-  }
-
-  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
-  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
-
-  // Use the default implementation.
-  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
-  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
-  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
-  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
-
-  // Cell SPU has instructions for converting between i64 and fp.
-  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-
-  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
-  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
-
-  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
-  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
-  // First set operation action for all vector types to expand. Then we
-  // will selectively turn on ones that can be effectively codegen'd.
-  addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass);
-  addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass);
-
-  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
-    // Set operation actions to legal types only.
-    if (!isTypeLegal(VT)) continue;
-
-    // add/sub are legal for all supported vector VT's.
-    setOperationAction(ISD::ADD,     VT, Legal);
-    setOperationAction(ISD::SUB,     VT, Legal);
-    // mul has to be custom lowered.
-    setOperationAction(ISD::MUL,     VT, Legal);
-
-    setOperationAction(ISD::AND,     VT, Legal);
-    setOperationAction(ISD::OR,      VT, Legal);
-    setOperationAction(ISD::XOR,     VT, Legal);
-    setOperationAction(ISD::LOAD,    VT, Custom);
-    setOperationAction(ISD::SELECT,  VT, Legal);
-    setOperationAction(ISD::STORE,   VT, Custom);
-
-    // These operations need to be expanded:
-    setOperationAction(ISD::SDIV,    VT, Expand);
-    setOperationAction(ISD::SREM,    VT, Expand);
-    setOperationAction(ISD::UDIV,    VT, Expand);
-    setOperationAction(ISD::UREM,    VT, Expand);
-    setOperationAction(ISD::FFLOOR,  VT, Expand);
-
-    // Expand all trunc stores
-    for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
-      MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
-      setTruncStoreAction(VT, TargetVT, Expand);
-    }
-
-    // Custom lower build_vector, constant pool spills, insert and
-    // extract vector elements:
-    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
-    setOperationAction(ISD::ConstantPool, VT, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
-  }
-
-  setOperationAction(ISD::SHL, MVT::v2i64, Expand);
-
-  setOperationAction(ISD::AND, MVT::v16i8, Custom);
-  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
-  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
-  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-
-  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
-
-  setBooleanContents(ZeroOrNegativeOneBooleanContent);
-  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
-
-  setStackPointerRegisterToSaveRestore(SPU::R1);
-
-  // We have target-specific dag combine patterns for the following nodes:
-  setTargetDAGCombine(ISD::ADD);
-  setTargetDAGCombine(ISD::ZERO_EXTEND);
-  setTargetDAGCombine(ISD::SIGN_EXTEND);
-  setTargetDAGCombine(ISD::ANY_EXTEND);
-
-  setMinFunctionAlignment(3);
-
-  computeRegisterProperties();
-
-  // Set pre-RA register scheduler default to BURR, which produces slightly
-  // better code than the default (could also be TDRR, but TargetLowering.h
-  // needs a mod to support that model):
-  setSchedulingPreference(Sched::RegPressure);
-}
-
-const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return 0;
-  case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
-  case SPUISD::Hi: return "SPUISD::Hi";
-  case SPUISD::Lo: return "SPUISD::Lo";
-  case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
-  case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
-  case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
-  case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
-  case SPUISD::CALL: return "SPUISD::CALL";
-  case SPUISD::SHUFB: return "SPUISD::SHUFB";
-  case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
-  case SPUISD::CNTB: return "SPUISD::CNTB";
-  case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
-  case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
-  case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
-  case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
-  case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
-  case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
-  case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
-  case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
-  case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
-  case SPUISD::SELB: return "SPUISD::SELB";
-  case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
-  case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
-  case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Return the Cell SPU's SETCC result type
-//===----------------------------------------------------------------------===//
-
-EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
-  // i8, i16 and i32 are valid SETCC result types
-  MVT::SimpleValueType retval;
-
-  switch(VT.getSimpleVT().SimpleTy){
-    case MVT::i1:
-    case MVT::i8:
-      retval = MVT::i8; break;
-    case MVT::i16:
-      retval = MVT::i16; break;
-    case MVT::i32:
-    default:
-      retval = MVT::i32;
-  }
-  return retval;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling convention code:
-//===----------------------------------------------------------------------===//
-
-#include "SPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-//  LowerOperation implementation
-//===----------------------------------------------------------------------===//
-
-/// Custom lower loads for CellSPU
-/*!
- All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to rotate to extract the requested element.
-
- For extending loads, we also want to ensure that the following sequence is
- emitted, e.g. for MVT::f32 extending load to MVT::f64:
-
-\verbatim
-%1  v16i8,ch = load
-%2  v16i8,ch = rotate %1
-%3  v4f8, ch = bitconvert %2
-%4  f32      = vec2perfslot %3
-%5  f64      = fp_extend %4
-\endverbatim
-*/
-static SDValue
-LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  LoadSDNode *LN = cast<LoadSDNode>(Op);
-  SDValue the_chain = LN->getChain();
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  EVT InVT = LN->getMemoryVT();
-  EVT OutVT = Op.getValueType();
-  ISD::LoadExtType ExtType = LN->getExtensionType();
-  unsigned alignment = LN->getAlignment();
-  int pso = prefslotOffset(InVT);
-  DebugLoc dl = Op.getDebugLoc();
-  EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
-                                                  (128 / InVT.getSizeInBits()));
-
-  // two sanity checks
-  assert( LN->getAddressingMode() == ISD::UNINDEXED
-          && "we should get only UNINDEXED adresses");
-  // clean aligned loads can be selected as-is
-  if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
-    return SDValue();
-
-  // Get pointerinfos to the memory chunk(s) that contain the data to load
-  uint64_t mpi_offset = LN->getPointerInfo().Offset;
-  mpi_offset -= mpi_offset%16;
-  MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
-  MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
-
-  SDValue result;
-  SDValue basePtr = LN->getBasePtr();
-  SDValue rotate;
-
-  if ((alignment%16) == 0) {
-    ConstantSDNode *CN;
-
-    // Special cases for a known aligned load to simplify the base pointer
-    // and the rotation amount:
-    if (basePtr.getOpcode() == ISD::ADD
-        && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
-      // Known offset into basePtr
-      int64_t offset = CN->getSExtValue();
-      int64_t rotamt = int64_t((offset & 0xf) - pso);
-
-      if (rotamt < 0)
-        rotamt += 16;
-
-      rotate = DAG.getConstant(rotamt, MVT::i16);
-
-      // Simplify the base pointer for this case:
-      basePtr = basePtr.getOperand(0);
-      if ((offset & ~0xf) > 0) {
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                              basePtr,
-                              DAG.getConstant((offset & ~0xf), PtrVT));
-      }
-    } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
-               || (basePtr.getOpcode() == SPUISD::IndirectAddr
-                   && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
-                   && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
-      // Plain aligned a-form address: rotate into preferred slot
-      // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
-      int64_t rotamt = -pso;
-      if (rotamt < 0)
-        rotamt += 16;
-      rotate = DAG.getConstant(rotamt, MVT::i16);
-    } else {
-      // Offset the rotate amount by the basePtr and the preferred slot
-      // byte offset
-      int64_t rotamt = -pso;
-      if (rotamt < 0)
-        rotamt += 16;
-      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
-                           basePtr,
-                           DAG.getConstant(rotamt, PtrVT));
-    }
-  } else {
-    // Unaligned load: must be more pessimistic about addressing modes:
-    if (basePtr.getOpcode() == ISD::ADD) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      MachineRegisterInfo &RegInfo = MF.getRegInfo();
-      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-      SDValue Flag;
-
-      SDValue Op0 = basePtr.getOperand(0);
-      SDValue Op1 = basePtr.getOperand(1);
-
-      if (isa<ConstantSDNode>(Op1)) {
-        // Convert the (add <ptr>, <const>) to an indirect address contained
-        // in a register. Note that this is done because we need to avoid
-        // creating a 0(reg) d-form address due to the SPU's block loads.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
-        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
-      } else {
-        // Convert the (add <arg1>, <arg2>) to an indirect address, which
-        // will likely be lowered as a reg(reg) x-form address.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-      }
-    } else {
-      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                            basePtr,
-                            DAG.getConstant(0, PtrVT));
-   }
-
-    // Offset the rotate amount by the basePtr and the preferred slot
-    // byte offset
-    rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
-                         basePtr,
-                         DAG.getConstant(-pso, PtrVT));
-  }
-
-  // Do the load as a i128 to allow possible shifting
-  SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
-                       lowMemPtr,
-                       LN->isVolatile(), LN->isNonTemporal(), false, 16);
-
-  // When the size is not greater than alignment we get all data with just
-  // one load
-  if (alignment >= InVT.getSizeInBits()/8) {
-    // Update the chain
-    the_chain = low.getValue(1);
-
-    // Rotate into the preferred slot:
-    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
-                         low.getValue(0), rotate);
-
-    // Convert the loaded v16i8 vector to the appropriate vector type
-    // specified by the operand:
-    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
-                                 InVT, (128 / InVT.getSizeInBits()));
-    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
-                         DAG.getNode(ISD::BITCAST, dl, vecVT, result));
-  }
-  // When alignment is less than the size, we might need (known only at
-  // run-time) two loads
-  // TODO: if the memory address is composed only from constants, we have
-  // extra kowledge, and might avoid the second load
-  else {
-    // storage position offset from lower 16 byte aligned memory chunk
-    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
-                                  basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
-    // get a registerfull of ones. (this implementation is a workaround: LLVM
-    // cannot handle 128 bit signed int constants)
-    SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
-    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
-    SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
-                               DAG.getNode(ISD::ADD, dl, PtrVT,
-                                           basePtr,
-                                           DAG.getConstant(16, PtrVT)),
-                               highMemPtr,
-                               LN->isVolatile(), LN->isNonTemporal(), false, 
-                               16);
-
-    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
-                                                              high.getValue(1));
-
-    // Shift the (possible) high part right to compensate the misalignemnt.
-    // if there is no highpart (i.e. value is i64 and offset is 4), this
-    // will zero out the high value.
-    high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
-                                     DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                                 DAG.getConstant( 16, MVT::i32),
-                                                 offset
-                                                ));
-
-    // Shift the low similarly
-    // TODO: add SPUISD::SHL_BYTES
-    low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
-
-    // Merge the two parts
-    result = DAG.getNode(ISD::BITCAST, dl, vecVT,
-                          DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
-
-    if (!InVT.isVector()) {
-      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
-     }
-
-  }
-    // Handle extending loads by extending the scalar result:
-    if (ExtType == ISD::SEXTLOAD) {
-      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
-    } else if (ExtType == ISD::ZEXTLOAD) {
-      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
-    } else if (ExtType == ISD::EXTLOAD) {
-      unsigned NewOpc = ISD::ANY_EXTEND;
-
-      if (OutVT.isFloatingPoint())
-        NewOpc = ISD::FP_EXTEND;
-
-      result = DAG.getNode(NewOpc, dl, OutVT, result);
-    }
-
-    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
-    SDValue retops[2] = {
-      result,
-      the_chain
-    };
-
-    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
-                         retops, sizeof(retops) / sizeof(retops[0]));
-    return result;
-}
-
-/// Custom lower stores for CellSPU
-/*!
- All CellSPU stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to generate a shuffle to insert the
- requested element into its place, then store the resulting block.
- */
-static SDValue
-LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  StoreSDNode *SN = cast<StoreSDNode>(Op);
-  SDValue Value = SN->getValue();
-  EVT VT = Value.getValueType();
-  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  DebugLoc dl = Op.getDebugLoc();
-  unsigned alignment = SN->getAlignment();
-  SDValue result;
-  EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
-                                                 (128 / StVT.getSizeInBits()));
-  // Get pointerinfos to the memory chunk(s) that contain the data to load
-  uint64_t mpi_offset = SN->getPointerInfo().Offset;
-  mpi_offset -= mpi_offset%16;
-  MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
-  MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
-
-
-  // two sanity checks
-  assert( SN->getAddressingMode() == ISD::UNINDEXED
-          && "we should get only UNINDEXED adresses");
-  // clean aligned loads can be selected as-is
-  if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
-    return SDValue();
-
-  SDValue alignLoadVec;
-  SDValue basePtr = SN->getBasePtr();
-  SDValue the_chain = SN->getChain();
-  SDValue insertEltOffs;
-
-  if ((alignment%16) == 0) {
-    ConstantSDNode *CN;
-    // Special cases for a known aligned load to simplify the base pointer
-    // and insertion byte:
-    if (basePtr.getOpcode() == ISD::ADD
-        && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
-      // Known offset into basePtr
-      int64_t offset = CN->getSExtValue();
-
-      // Simplify the base pointer for this case:
-      basePtr = basePtr.getOperand(0);
-      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                  basePtr,
-                                  DAG.getConstant((offset & 0xf), PtrVT));
-
-      if ((offset & ~0xf) > 0) {
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                              basePtr,
-                              DAG.getConstant((offset & ~0xf), PtrVT));
-      }
-    } else {
-      // Otherwise, assume it's at byte 0 of basePtr
-      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                  basePtr,
-                                  DAG.getConstant(0, PtrVT));
-      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                  basePtr,
-                                  DAG.getConstant(0, PtrVT));
-    }
-  } else {
-    // Unaligned load: must be more pessimistic about addressing modes:
-    if (basePtr.getOpcode() == ISD::ADD) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      MachineRegisterInfo &RegInfo = MF.getRegInfo();
-      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-      SDValue Flag;
-
-      SDValue Op0 = basePtr.getOperand(0);
-      SDValue Op1 = basePtr.getOperand(1);
-
-      if (isa<ConstantSDNode>(Op1)) {
-        // Convert the (add <ptr>, <const>) to an indirect address contained
-        // in a register. Note that this is done because we need to avoid
-        // creating a 0(reg) d-form address due to the SPU's block loads.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
-        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
-      } else {
-        // Convert the (add <arg1>, <arg2>) to an indirect address, which
-        // will likely be lowered as a reg(reg) x-form address.
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
-      }
-    } else {
-      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                            basePtr,
-                            DAG.getConstant(0, PtrVT));
-    }
-
-    // Insertion point is solely determined by basePtr's contents
-    insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
-                                basePtr,
-                                DAG.getConstant(0, PtrVT));
-  }
-
-  // Load the lower part of the memory to which to store.
-  SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
-                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
-                            false, 16);
-
-  // if we don't need to store over the 16 byte boundary, one store suffices
-  if (alignment >= StVT.getSizeInBits()/8) {
-    // Update the chain
-    the_chain = low.getValue(1);
-
-    LoadSDNode *LN = cast<LoadSDNode>(low);
-    SDValue theValue = SN->getValue();
-
-    if (StVT != VT
-        && (theValue.getOpcode() == ISD::AssertZext
-            || theValue.getOpcode() == ISD::AssertSext)) {
-      // Drill down and get the value for zero- and sign-extended
-      // quantities
-      theValue = theValue.getOperand(0);
-    }
-
-    // If the base pointer is already a D-form address, then just create
-    // a new D-form address with a slot offset and the orignal base pointer.
-    // Otherwise generate a D-form address with the slot offset relative
-    // to the stack pointer, which is always aligned.
-#if !defined(NDEBUG)
-      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        errs() << "CellSPU LowerSTORE: basePtr = ";
-        basePtr.getNode()->dump(&DAG);
-        errs() << "\n";
-      }
-#endif
-
-    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
-                                      insertEltOffs);
-    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
-                                      theValue);
-
-    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
-                         vectorizeOp, low,
-                         DAG.getNode(ISD::BITCAST, dl,
-                                     MVT::v4i32, insertEltOp));
-
-    result = DAG.getStore(the_chain, dl, result, basePtr,
-                          lowMemPtr,
-                          LN->isVolatile(), LN->isNonTemporal(),
-                          16);
-
-  }
-  // do the store when it might cross the 16 byte memory access boundary.
-  else {
-    // TODO issue a warning if SN->isVolatile()== true? This is likely not
-    // what the user wanted.
-
-    // address offset from nearest lower 16byte alinged address
-    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
-                                    SN->getBasePtr(),
-                                    DAG.getConstant(0xf, MVT::i32));
-    // 16 - offset
-    SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                           DAG.getConstant( 16, MVT::i32),
-                                           offset);
-    // 16 - sizeof(Value)
-    SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
-                                     DAG.getConstant( 16, MVT::i32),
-                                     DAG.getConstant( VT.getSizeInBits()/8,
-                                                      MVT::i32));
-    // get a registerfull of ones
-    SDValue ones = DAG.getConstant(-1, MVT::v4i32);
-    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
-
-    // Create the 128 bit masks that have ones where the data to store is
-    // located.
-    SDValue lowmask, himask;
-    // if the value to store don't fill up the an entire 128 bits, zero
-    // out the last bits of the mask so that only the value we want to store
-    // is masked.
-    // this is e.g. in the case of store i32, align 2
-    if (!VT.isVector()){
-      Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
-      lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
-      lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
-                                                               surplus);
-      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
-      Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
-
-    }
-    else {
-      lowmask = ones;
-      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
-    }
-    // this will zero, if there are no data that goes to the high quad
-    himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
-                                                            offset_compl);
-    lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
-                                                             offset);
-
-    // Load in the old data and zero out the parts that will be overwritten with
-    // the new data to store.
-    SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
-                               DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
-                                           DAG.getConstant( 16, PtrVT)),
-                               highMemPtr,
-                               SN->isVolatile(), SN->isNonTemporal(), 
-                               false, 16);
-    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
-                                                              hi.getValue(1));
-
-    low = DAG.getNode(ISD::AND, dl, MVT::i128,
-                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
-                        DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
-    hi = DAG.getNode(ISD::AND, dl, MVT::i128,
-                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
-                        DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
-
-    // Shift the Value to store into place. rlow contains the parts that go to
-    // the lower memory chunk, rhi has the parts that go to the upper one.
-    SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
-    rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
-    SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
-                                                            offset_compl);
-
-    // Merge the old data and the new data and store the results
-    // Need to convert vectors here to integer as 'OR'ing floats assert
-    rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
-                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
-                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
-    rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
-                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
-                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
-
-    low = DAG.getStore(the_chain, dl, rlow, basePtr,
-                          lowMemPtr,
-                          SN->isVolatile(), SN->isNonTemporal(), 16);
-    hi  = DAG.getStore(the_chain, dl, rhi,
-                            DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
-                                        DAG.getConstant( 16, PtrVT)),
-                            highMemPtr,
-                            SN->isVolatile(), SN->isNonTemporal(), 16);
-    result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
-                                                           hi.getValue(0));
-  }
-
-  return result;
-}
-
-//! Generate the address of a constant pool entry.
-static SDValue
-LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  EVT PtrVT = Op.getValueType();
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  const Constant *C = CP->getConstVal();
-  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  const TargetMachine &TM = DAG.getTarget();
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      // Just return the SDValue with the constant pool address in it.
-      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
-    } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
-    }
-  }
-
-  llvm_unreachable("LowerConstantPool: Relocation model other than static"
-                   " not supported.");
-}
-
-//! Alternate entry point for generating the address of a constant pool entry
-SDValue
-SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
-  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
-}
-
-static SDValue
-LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  const TargetMachine &TM = DAG.getTarget();
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
-    } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
-    }
-  }
-
-  llvm_unreachable("LowerJumpTable: Relocation model other than static"
-                   " not supported.");
-}
-
-static SDValue
-LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  EVT PtrVT = Op.getValueType();
-  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  const GlobalValue *GV = GSDN->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
-                                          PtrVT, GSDN->getOffset());
-  const TargetMachine &TM = DAG.getTarget();
-  SDValue Zero = DAG.getConstant(0, PtrVT);
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    if (!ST->usingLargeMem()) {
-      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
-    } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
-    }
-  } else {
-    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
-                      "not supported.");
-    /*NOTREACHED*/
-  }
-}
-
-//! Custom lower double precision floating point constants
-static SDValue
-LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  // FIXME there is no actual debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (VT == MVT::f64) {
-    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
-
-    assert((FP != 0) &&
-           "LowerConstantFP: Node is not ConstantFPSDNode");
-
-    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
-    SDValue T = DAG.getConstant(dbits, MVT::i64);
-    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
-    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                       DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
-  }
-
-  return SDValue();
-}
-
-SDValue
-SPUTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv, bool isVarArg,
-                                        const SmallVectorImpl<ISD::InputArg>
-                                          &Ins,
-                                        DebugLoc dl, SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals)
-                                          const {
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
-
-  unsigned ArgOffset = SPUFrameLowering::minStackSize();
-  unsigned ArgRegIdx = 0;
-  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
-  // FIXME: allow for other calling conventions
-  CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
-
-  // Add DAG nodes to load the arguments or copy them out of registers.
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
-    EVT ObjectVT = Ins[ArgNo].VT;
-    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
-    SDValue ArgVal;
-    CCValAssign &VA = ArgLocs[ArgNo];
-
-    if (VA.isRegLoc()) {
-      const TargetRegisterClass *ArgRegClass;
-
-      switch (ObjectVT.getSimpleVT().SimpleTy) {
-      default:
-        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
-                           Twine(ObjectVT.getEVTString()));
-      case MVT::i8:
-        ArgRegClass = &SPU::R8CRegClass;
-        break;
-      case MVT::i16:
-        ArgRegClass = &SPU::R16CRegClass;
-        break;
-      case MVT::i32:
-        ArgRegClass = &SPU::R32CRegClass;
-        break;
-      case MVT::i64:
-        ArgRegClass = &SPU::R64CRegClass;
-        break;
-      case MVT::i128:
-        ArgRegClass = &SPU::GPRCRegClass;
-        break;
-      case MVT::f32:
-        ArgRegClass = &SPU::R32FPRegClass;
-        break;
-      case MVT::f64:
-        ArgRegClass = &SPU::R64FPRegClass;
-        break;
-      case MVT::v2f64:
-      case MVT::v4f32:
-      case MVT::v2i64:
-      case MVT::v4i32:
-      case MVT::v8i16:
-      case MVT::v16i8:
-        ArgRegClass = &SPU::VECREGRegClass;
-        break;
-      }
-
-      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
-      RegInfo.addLiveIn(VA.getLocReg(), VReg);
-      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
-      ++ArgRegIdx;
-    } else {
-      // We need to load the argument to a virtual register if we determined
-      // above that we ran out of physical registers of the appropriate type
-      // or we're forced to do vararg
-      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
-      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
-                           false, false, false, 0);
-      ArgOffset += StackSlotSize;
-    }
-
-    InVals.push_back(ArgVal);
-    // Update the chain
-    Chain = ArgVal.getOperand(0);
-  }
-
-  // vararg handling:
-  if (isVarArg) {
-    // FIXME: we should be able to query the argument registers from
-    //        tablegen generated code.
-    static const uint16_t ArgRegs[] = {
-      SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
-      SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
-      SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
-      SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
-      SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
-      SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
-      SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
-      SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
-      SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
-      SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
-      SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
-    };
-    // size of ArgRegs array
-    const unsigned NumArgRegs = 77;
-
-    // We will spill (79-3)+1 registers to the stack
-    SmallVector<SDValue, 79-3+1> MemOps;
-
-    // Create the frame slot
-    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
-      FuncInfo->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
-      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
-      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
-      SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
-                                   false, false, 0);
-      Chain = Store.getOperand(0);
-      MemOps.push_back(Store);
-
-      // Increment address by stack slot size for the next stored argument
-      ArgOffset += StackSlotSize;
-    }
-    if (!MemOps.empty())
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                          &MemOps[0], MemOps.size());
-  }
-
-  return Chain;
-}
-
-/// isLSAAddress - Return the immediate to use if the specified
-/// value is representable as a LSA address.
-static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-  if (!C) return 0;
-
-  int Addr = C->getZExtValue();
-  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
-      (Addr << 14 >> 14) != Addr)
-    return 0;  // Top 14 bits have to be sext of immediate.
-
-  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
-}
-
-SDValue
-SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                             SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
-  SDValue Chain                         = CLI.Chain;
-  SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
-  CallingConv::ID CallConv              = CLI.CallConv;
-  bool isVarArg                         = CLI.IsVarArg;
-
-  // CellSPU target does not yet support tail call optimization.
-  isTailCall = false;
-
-  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
-  unsigned NumOps     = Outs.size();
-  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
-
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), ArgLocs, *DAG.getContext());
-  // FIXME: allow for other calling conventions
-  CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
-
-  const unsigned NumArgRegs = ArgLocs.size();
-
-
-  // Handy pointer type
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-  // Set up a copy of the stack pointer for use loading and storing any
-  // arguments that may not fit in the registers available for argument
-  // passing.
-  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
-
-  // Figure out which arguments are going to go in registers, and which in
-  // memory.
-  unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
-  unsigned ArgRegIdx = 0;
-
-  // Keep track of registers passing arguments
-  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
-  // And the arguments passed on the stack
-  SmallVector<SDValue, 8> MemOpChains;
-
-  for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
-    SDValue Arg = OutVals[ArgRegIdx];
-    CCValAssign &VA = ArgLocs[ArgRegIdx];
-
-    // PtrOff will be used to store the current argument to the stack if a
-    // register cannot be found for it.
-    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
-    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-
-    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unexpected ValueType for argument!");
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-    case MVT::i64:
-    case MVT::i128:
-    case MVT::f32:
-    case MVT::f64:
-    case MVT::v2i64:
-    case MVT::v2f64:
-    case MVT::v4f32:
-    case MVT::v4i32:
-    case MVT::v8i16:
-    case MVT::v16i8:
-      if (ArgRegIdx != NumArgRegs) {
-        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-      } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                           MachinePointerInfo(),
-                                           false, false, 0));
-        ArgOffset += StackSlotSize;
-      }
-      break;
-    }
-  }
-
-  // Accumulate how many bytes are to be pushed on the stack, including the
-  // linkage area, and parameter passing area.  According to the SPU ABI,
-  // we minimally need space for [LR] and [SP].
-  unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
-
-  // Insert a call sequence start
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
-                                                            true));
-
-  if (!MemOpChains.empty()) {
-    // Adjust the stack pointer for the stack arguments.
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
-  }
-
-  // Build a sequence of copy-to-reg nodes chained together with token chain
-  // and flag operands which copy the outgoing args into the appropriate regs.
-  SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = SPUISD::CALL;
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-  // node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    const GlobalValue *GV = G->getGlobal();
-    EVT CalleeVT = Callee.getValueType();
-    SDValue Zero = DAG.getConstant(0, PtrVT);
-    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
-
-    if (!ST->usingLargeMem()) {
-      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
-      // style calls, otherwise, external symbols are BRASL calls. This assumes
-      // that declared/defined symbols are in the same compilation unit and can
-      // be reached through PC-relative jumps.
-      //
-      // NOTE:
-      // This may be an unsafe assumption for JIT and really large compilation
-      // units.
-      if (GV->isDeclaration()) {
-        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
-      } else {
-        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
-      }
-    } else {
-      // "Large memory" mode: Turn all calls into indirect calls with a X-form
-      // address pairs:
-      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
-    }
-  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    EVT CalleeVT = Callee.getValueType();
-    SDValue Zero = DAG.getConstant(0, PtrVT);
-    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
-        Callee.getValueType());
-
-    if (!ST->usingLargeMem()) {
-      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
-    } else {
-      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
-    }
-  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
-    // If this is an absolute destination address that appears to be a legal
-    // local store address, use the munged value.
-    Callee = SDValue(Dest, 0);
-  }
-
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-  // Returns a chain and a flag for retval copy to use.
-  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
-                      &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
-  if (!Ins.empty())
-    InFlag = Chain.getValue(1);
-
-  // If the function returns void, just return the chain.
-  if (Ins.empty())
-    return Chain;
-
-  // Now handle the return value(s)
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                    getTargetMachine(), RVLocs, *DAG.getContext());
-  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
-
-
-  // If the call has results, copy the values out of the ret val registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign VA = RVLocs[i];
-
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
-                                     InFlag);
-    Chain = Val.getValue(1);
-    InFlag = Val.getValue(2);
-    InVals.push_back(Val);
-   }
-
-  return Chain;
-}
-
-SDValue
-SPUTargetLowering::LowerReturn(SDValue Chain,
-                               CallingConv::ID CallConv, bool isVarArg,
-                               const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               const SmallVectorImpl<SDValue> &OutVals,
-                               DebugLoc dl, SelectionDAG &DAG) const {
-
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-                 getTargetMachine(), RVLocs, *DAG.getContext());
-  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
-
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
-  SDValue Flag;
-
-  // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &VA = RVLocs[i];
-    assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             OutVals[i], Flag);
-    Flag = Chain.getValue(1);
-  }
-
-  if (Flag.getNode())
-    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  else
-    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Vector related lowering:
-//===----------------------------------------------------------------------===//
-
-static ConstantSDNode *
-getVecImm(SDNode *N) {
-  SDValue OpVal(0, 0);
-
-  // Check to see if this buildvec has a single non-undef value in its elements.
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-    if (OpVal.getNode() == 0)
-      OpVal = N->getOperand(i);
-    else if (OpVal != N->getOperand(i))
-      return 0;
-  }
-
-  if (OpVal.getNode() != 0) {
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
-      return CN;
-    }
-  }
-
-  return 0;
-}
-
-/// get_vec_i18imm - Test if this vector is a vector filled with the same value
-/// and the value fits into an unsigned 18-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    uint64_t Value = CN->getZExtValue();
-    if (ValueType == MVT::i64) {
-      uint64_t UValue = CN->getZExtValue();
-      uint32_t upper = uint32_t(UValue >> 32);
-      uint32_t lower = uint32_t(UValue);
-      if (upper != lower)
-        return SDValue();
-      Value = Value >> 32;
-    }
-    if (Value <= 0x3ffff)
-      return DAG.getTargetConstant(Value, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_vec_i16imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    int64_t Value = CN->getSExtValue();
-    if (ValueType == MVT::i64) {
-      uint64_t UValue = CN->getZExtValue();
-      uint32_t upper = uint32_t(UValue >> 32);
-      uint32_t lower = uint32_t(UValue);
-      if (upper != lower)
-        return SDValue();
-      Value = Value >> 32;
-    }
-    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
-      return DAG.getTargetConstant(Value, ValueType);
-    }
-  }
-
-  return SDValue();
-}
-
-/// get_vec_i10imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 10-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                              EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    int64_t Value = CN->getSExtValue();
-    if (ValueType == MVT::i64) {
-      uint64_t UValue = CN->getZExtValue();
-      uint32_t upper = uint32_t(UValue >> 32);
-      uint32_t lower = uint32_t(UValue);
-      if (upper != lower)
-        return SDValue();
-      Value = Value >> 32;
-    }
-    if (isInt<10>(Value))
-      return DAG.getTargetConstant(Value, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_vec_i8imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 8-bit constant, and if so, return the
-/// constant.
-///
-/// @note: The incoming vector is v16i8 because that's the only way we can load
-/// constant vectors. Thus, we test to see if the upper and lower bytes are the
-/// same value.
-SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                             EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    int Value = (int) CN->getZExtValue();
-    if (ValueType == MVT::i16
-        && Value <= 0xffff                 /* truncated from uint64_t */
-        && ((short) Value >> 8) == ((short) Value & 0xff))
-      return DAG.getTargetConstant(Value & 0xff, ValueType);
-    else if (ValueType == MVT::i8
-             && (Value & 0xff) == Value)
-      return DAG.getTargetConstant(Value, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
-/// and the value fits into a signed 16-bit constant, and if so, return the
-/// constant
-SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                               EVT ValueType) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    uint64_t Value = CN->getZExtValue();
-    if ((ValueType == MVT::i32
-          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
-        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
-      return DAG.getTargetConstant(Value >> 16, ValueType);
-  }
-
-  return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
-SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
-  }
-
-  return SDValue();
-}
-
-/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
-SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
-  if (ConstantSDNode *CN = getVecImm(N)) {
-    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
-  }
-
-  return SDValue();
-}
-
-//! Lower a BUILD_VECTOR instruction creatively:
-static SDValue
-LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  EVT EltVT = VT.getVectorElementType();
-  DebugLoc dl = Op.getDebugLoc();
-  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
-  unsigned minSplatBits = EltVT.getSizeInBits();
-
-  if (minSplatBits < 16)
-    minSplatBits = 16;
-
-  APInt APSplatBits, APSplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-
-  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                            HasAnyUndefs, minSplatBits)
-      || minSplatBits < SplatBitSize)
-    return SDValue();   // Wasn't a constant vector or splat exceeded min
-
-  uint64_t SplatBits = APSplatBits.getZExtValue();
-
-  switch (VT.getSimpleVT().SimpleTy) {
-  default:
-    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
-                       Twine(VT.getEVTString()));
-    /*NOTREACHED*/
-  case MVT::v4f32: {
-    uint32_t Value32 = uint32_t(SplatBits);
-    assert(SplatBitSize == 32
-           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
-    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
-    SDValue T = DAG.getConstant(Value32, MVT::i32);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
-  }
-  case MVT::v2f64: {
-    uint64_t f64val = uint64_t(SplatBits);
-    assert(SplatBitSize == 64
-           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
-    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
-    SDValue T = DAG.getConstant(f64val, MVT::i64);
-    return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
-  }
-  case MVT::v16i8: {
-   // 8-bit constants have to be expanded to 16-bits
-   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
-   SmallVector<SDValue, 8> Ops;
-
-   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
-   return DAG.getNode(ISD::BITCAST, dl, VT,
-                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
-  }
-  case MVT::v8i16: {
-    unsigned short Value16 = SplatBits;
-    SDValue T = DAG.getConstant(Value16, EltVT);
-    SmallVector<SDValue, 8> Ops;
-
-    Ops.assign(8, T);
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
-  }
-  case MVT::v4i32: {
-    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
-  }
-  case MVT::v2i64: {
-    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
-  }
-  }
-}
-
-/*!
- */
-SDValue
-SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
-                     DebugLoc dl) {
-  uint32_t upper = uint32_t(SplatVal >> 32);
-  uint32_t lower = uint32_t(SplatVal);
-
-  if (upper == lower) {
-    // Magic constant that can be matched by IL, ILA, et. al.
-    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
-    return DAG.getNode(ISD::BITCAST, dl, OpVT,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                   Val, Val, Val, Val));
-  } else {
-    bool upper_special, lower_special;
-
-    // NOTE: This code creates common-case shuffle masks that can be easily
-    // detected as common expressions. It is not attempting to create highly
-    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
-
-    // Detect if the upper or lower half is a special shuffle mask pattern:
-    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
-    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
-
-    // Both upper and lower are special, lower to a constant pool load:
-    if (lower_special && upper_special) {
-      SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
-      SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
-      SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                         UpperVal, LowerVal, UpperVal, LowerVal);
-      return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
-    }
-
-    SDValue LO32;
-    SDValue HI32;
-    SmallVector<SDValue, 16> ShufBytes;
-    SDValue Result;
-
-    // Create lower vector if not a special pattern
-    if (!lower_special) {
-      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
-      LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
-                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                     LO32C, LO32C, LO32C, LO32C));
-    }
-
-    // Create upper vector if not a special pattern
-    if (!upper_special) {
-      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
-      HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
-                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                     HI32C, HI32C, HI32C, HI32C));
-    }
-
-    // If either upper or lower are special, then the two input operands are
-    // the same (basically, one of them is a "don't care")
-    if (lower_special)
-      LO32 = HI32;
-    if (upper_special)
-      HI32 = LO32;
-
-    for (int i = 0; i < 4; ++i) {
-      uint64_t val = 0;
-      for (int j = 0; j < 4; ++j) {
-        SDValue V;
-        bool process_upper, process_lower;
-        val <<= 8;
-        process_upper = (upper_special && (i & 1) == 0);
-        process_lower = (lower_special && (i & 1) == 1);
-
-        if (process_upper || process_lower) {
-          if ((process_upper && upper == 0)
-                  || (process_lower && lower == 0))
-            val |= 0x80;
-          else if ((process_upper && upper == 0xffffffff)
-                  || (process_lower && lower == 0xffffffff))
-            val |= 0xc0;
-          else if ((process_upper && upper == 0x80000000)
-                  || (process_lower && lower == 0x80000000))
-            val |= (j == 0 ? 0xe0 : 0x80);
-        } else
-          val |= i * 4 + j + ((i & 1) * 16);
-      }
-
-      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
-    }
-
-    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
-                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                   &ShufBytes[0], ShufBytes.size()));
-  }
-}
-
-/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
-/// which the Cell can operate. The code inspects V3 to ascertain whether the
-/// permutation vector, V3, is monotonically increasing with one "exception"
-/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
-/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
-/// In either case, the net result is going to eventually invoke SHUFB to
-/// permute/shuffle the bytes from V1 and V2.
-/// \note
-/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
-/// control word for byte/halfword/word insertion. This takes care of a single
-/// element move from V2 into V1.
-/// \note
-/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
-  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
-  SDValue V1 = Op.getOperand(0);
-  SDValue V2 = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
-
-  // If we have a single element being moved from V1 to V2, this can be handled
-  // using the C*[DX] compute mask instructions, but the vector elements have
-  // to be monotonically increasing with one exception element, and the source
-  // slot of the element to move must be the same as the destination.
-  EVT VecVT = V1.getValueType();
-  EVT EltVT = VecVT.getVectorElementType();
-  unsigned EltsFromV2 = 0;
-  unsigned V2EltOffset = 0;
-  unsigned V2EltIdx0 = 0;
-  unsigned CurrElt = 0;
-  unsigned MaxElts = VecVT.getVectorNumElements();
-  unsigned PrevElt = 0;
-  bool monotonic = true;
-  bool rotate = true;
-  int rotamt=0;
-  EVT maskVT;             // which of the c?d instructions to use
-
-  if (EltVT == MVT::i8) {
-    V2EltIdx0 = 16;
-    maskVT = MVT::v16i8;
-  } else if (EltVT == MVT::i16) {
-    V2EltIdx0 = 8;
-    maskVT = MVT::v8i16;
-  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
-    V2EltIdx0 = 4;
-    maskVT = MVT::v4i32;
-  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
-    V2EltIdx0 = 2;
-    maskVT = MVT::v2i64;
-  } else
-    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
-
-  for (unsigned i = 0; i != MaxElts; ++i) {
-    if (SVN->getMaskElt(i) < 0)
-      continue;
-
-    unsigned SrcElt = SVN->getMaskElt(i);
-
-    if (monotonic) {
-      if (SrcElt >= V2EltIdx0) {
-        // TODO: optimize for the monotonic case when several consecutive
-        // elements are taken form V2. Do we ever get such a case?
-        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
-          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
-        else
-          monotonic = false;
-        ++EltsFromV2;
-      } else if (CurrElt != SrcElt) {
-        monotonic = false;
-      }
-
-      ++CurrElt;
-    }
-
-    if (rotate) {
-      if (PrevElt > 0 && SrcElt < MaxElts) {
-        if ((PrevElt == SrcElt - 1)
-            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
-          PrevElt = SrcElt;
-        } else {
-          rotate = false;
-        }
-      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
-        // First time or after a "wrap around"
-        rotamt = SrcElt-i;
-        PrevElt = SrcElt;
-      } else {
-        // This isn't a rotation, takes elements from vector 2
-        rotate = false;
-      }
-    }
-  }
-
-  if (EltsFromV2 == 1 && monotonic) {
-    // Compute mask and shuffle
-    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-    // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
-    // R1 ($sp) is used here only as it is guaranteed to have last bits zero
-    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(V2EltOffset, MVT::i32));
-    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
-                                     maskVT, Pointer);
-
-    // Use shuffle mask in SHUFB synthetic instruction:
-    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
-                       ShufMaskOp);
-  } else if (rotate) {
-    if (rotamt < 0)
-      rotamt +=MaxElts;
-    rotamt *= EltVT.getSizeInBits()/8;
-    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
-                       V1, DAG.getConstant(rotamt, MVT::i16));
-  } else {
-   // Convert the SHUFFLE_VECTOR mask's input element units to the
-   // actual bytes.
-    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
-
-    SmallVector<SDValue, 16> ResultMask;
-    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
-      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
-
-      for (unsigned j = 0; j < BytesPerElement; ++j)
-        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
-    }
-    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
-                                    &ResultMask[0], ResultMask.size());
-    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
-  }
-}
-
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
-  DebugLoc dl = Op.getDebugLoc();
-
-  if (Op0.getNode()->getOpcode() == ISD::Constant) {
-    // For a constant, build the appropriate constant vector, which will
-    // eventually simplify to a vector register load.
-
-    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
-    SmallVector<SDValue, 16> ConstVecValues;
-    EVT VT;
-    size_t n_copies;
-
-    // Create a constant vector:
-    switch (Op.getValueType().getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unexpected constant value type in "
-                              "LowerSCALAR_TO_VECTOR");
-    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
-    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
-    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
-    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
-    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
-    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
-    }
-
-    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
-    for (size_t j = 0; j < n_copies; ++j)
-      ConstVecValues.push_back(CValue);
-
-    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
-                       &ConstVecValues[0], ConstVecValues.size());
-  } else {
-    // Otherwise, copy the value from one register to another:
-    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-    case MVT::i64:
-    case MVT::f32:
-    case MVT::f64:
-      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
-    }
-  }
-}
-
-static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  SDValue N = Op.getOperand(0);
-  SDValue Elt = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue retval;
-
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
-    // Constant argument:
-    int EltNo = (int) C->getZExtValue();
-
-    // sanity checks:
-    if (VT == MVT::i8 && EltNo >= 16)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
-    else if (VT == MVT::i16 && EltNo >= 8)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
-    else if (VT == MVT::i32 && EltNo >= 4)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
-    else if (VT == MVT::i64 && EltNo >= 2)
-      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
-
-    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
-      // i32 and i64: Element 0 is the preferred slot
-      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
-    }
-
-    // Need to generate shuffle mask and extract:
-    int prefslot_begin = -1, prefslot_end = -1;
-    int elt_byte = EltNo * VT.getSizeInBits() / 8;
-
-    switch (VT.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Invalid value type!");
-    case MVT::i8: {
-      prefslot_begin = prefslot_end = 3;
-      break;
-    }
-    case MVT::i16: {
-      prefslot_begin = 2; prefslot_end = 3;
-      break;
-    }
-    case MVT::i32:
-    case MVT::f32: {
-      prefslot_begin = 0; prefslot_end = 3;
-      break;
-    }
-    case MVT::i64:
-    case MVT::f64: {
-      prefslot_begin = 0; prefslot_end = 7;
-      break;
-    }
-    }
-
-    assert(prefslot_begin != -1 && prefslot_end != -1 &&
-           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
-
-    unsigned int ShufBytes[16] = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-    };
-    for (int i = 0; i < 16; ++i) {
-      // zero fill uppper part of preferred slot, don't care about the
-      // other slots:
-      unsigned int mask_val;
-      if (i <= prefslot_end) {
-        mask_val =
-          ((i < prefslot_begin)
-           ? 0x80
-           : elt_byte + (i - prefslot_begin));
-
-        ShufBytes[i] = mask_val;
-      } else
-        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
-    }
-
-    SDValue ShufMask[4];
-    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
-      unsigned bidx = i * 4;
-      unsigned int bits = ((ShufBytes[bidx] << 24) |
-                           (ShufBytes[bidx+1] << 16) |
-                           (ShufBytes[bidx+2] << 8) |
-                           ShufBytes[bidx+3]);
-      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
-    }
-
-    SDValue ShufMaskVec =
-      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
-
-    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
-                                     N, N, ShufMaskVec));
-  } else {
-    // Variable index: Rotate the requested element into slot 0, then replicate
-    // slot 0 across the vector
-    EVT VecVT = N.getValueType();
-    if (!VecVT.isSimple() || !VecVT.isVector()) {
-      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
-                        "vector type!");
-    }
-
-    // Make life easier by making sure the index is zero-extended to i32
-    if (Elt.getValueType() != MVT::i32)
-      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
-
-    // Scale the index to a bit/byte shift quantity
-    APInt scaleFactor =
-            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
-    unsigned scaleShift = scaleFactor.logBase2();
-    SDValue vecShift;
-
-    if (scaleShift > 0) {
-      // Scale the shift factor:
-      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
-                        DAG.getConstant(scaleShift, MVT::i32));
-    }
-
-    vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
-
-    // Replicate the bytes starting at byte 0 across the entire vector (for
-    // consistency with the notion of a unified register set)
-    SDValue replicate;
-
-    switch (VT.getSimpleVT().SimpleTy) {
-    default:
-      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
-                        "type");
-      /*NOTREACHED*/
-    case MVT::i8: {
-      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              factor, factor, factor, factor);
-      break;
-    }
-    case MVT::i16: {
-      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              factor, factor, factor, factor);
-      break;
-    }
-    case MVT::i32:
-    case MVT::f32: {
-      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              factor, factor, factor, factor);
-      break;
-    }
-    case MVT::i64:
-    case MVT::f64: {
-      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
-      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                              loFactor, hiFactor, loFactor, hiFactor);
-      break;
-    }
-    }
-
-    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
-                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
-                                     vecShift, vecShift, replicate));
-  }
-
-  return retval;
-}
-
-static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  SDValue VecOp = Op.getOperand(0);
-  SDValue ValOp = Op.getOperand(1);
-  SDValue IdxOp = Op.getOperand(2);
-  DebugLoc dl = Op.getDebugLoc();
-  EVT VT = Op.getValueType();
-  EVT eltVT = ValOp.getValueType();
-
-  // use 0 when the lane to insert to is 'undef'
-  int64_t Offset=0;
-  if (IdxOp.getOpcode() != ISD::UNDEF) {
-    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
-    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
-    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
-  }
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  // Use $sp ($1) because it's always 16-byte aligned and it's available:
-  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
-                                DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(Offset, PtrVT));
-  // widen the mask when dealing with half vectors
-  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
-                                128/ VT.getVectorElementType().getSizeInBits());
-  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
-
-  SDValue result =
-    DAG.getNode(SPUISD::SHUFB, dl, VT,
-                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
-                VecOp,
-                DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
-
-  return result;
-}
-
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
-                           const TargetLowering &TLI)
-{
-  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
-  DebugLoc dl = Op.getDebugLoc();
-  EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
-
-  assert(Op.getValueType() == MVT::i8);
-  switch (Opc) {
-  default:
-    llvm_unreachable("Unhandled i8 math operator");
-  case ISD::ADD: {
-    // 8-bit addition: Promote the arguments up to 16-bits and truncate
-    // the result:
-    SDValue N1 = Op.getOperand(1);
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-
-  }
-
-  case ISD::SUB: {
-    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
-    // the result:
-    SDValue N1 = Op.getOperand(1);
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  case ISD::ROTR:
-  case ISD::ROTL: {
-    SDValue N1 = Op.getOperand(1);
-    EVT N1VT = N1.getValueType();
-
-    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
-    if (!N1VT.bitsEq(ShiftVT)) {
-      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
-                       ? ISD::ZERO_EXTEND
-                       : ISD::TRUNCATE;
-      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
-    }
-
-    // Replicate lower 8-bits into upper 8:
-    SDValue ExpandArg =
-      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
-                  DAG.getNode(ISD::SHL, dl, MVT::i16,
-                              N0, DAG.getConstant(8, MVT::i32)));
-
-    // Truncate back down to i8
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
-  }
-  case ISD::SRL:
-  case ISD::SHL: {
-    SDValue N1 = Op.getOperand(1);
-    EVT N1VT = N1.getValueType();
-
-    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
-    if (!N1VT.bitsEq(ShiftVT)) {
-      unsigned N1Opc = ISD::ZERO_EXTEND;
-
-      if (N1.getValueType().bitsGT(ShiftVT))
-        N1Opc = ISD::TRUNCATE;
-
-      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
-    }
-
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  case ISD::SRA: {
-    SDValue N1 = Op.getOperand(1);
-    EVT N1VT = N1.getValueType();
-
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    if (!N1VT.bitsEq(ShiftVT)) {
-      unsigned N1Opc = ISD::SIGN_EXTEND;
-
-      if (N1VT.bitsGT(ShiftVT))
-        N1Opc = ISD::TRUNCATE;
-      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
-    }
-
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  case ISD::MUL: {
-    SDValue N1 = Op.getOperand(1);
-
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
-                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-  }
-  }
-}
-
-//! Lower byte immediate operations for v16i8 vectors:
-static SDValue
-LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
-  SDValue ConstVec;
-  SDValue Arg;
-  EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
-
-  ConstVec = Op.getOperand(0);
-  Arg = Op.getOperand(1);
-  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
-    if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
-      ConstVec = ConstVec.getOperand(0);
-    } else {
-      ConstVec = Op.getOperand(1);
-      Arg = Op.getOperand(0);
-      if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
-        ConstVec = ConstVec.getOperand(0);
-      }
-    }
-  }
-
-  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
-    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
-    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
-
-    APInt APSplatBits, APSplatUndef;
-    unsigned SplatBitSize;
-    bool HasAnyUndefs;
-    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
-
-    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                              HasAnyUndefs, minSplatBits)
-        && minSplatBits <= SplatBitSize) {
-      uint64_t SplatBits = APSplatBits.getZExtValue();
-      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
-
-      SmallVector<SDValue, 16> tcVec;
-      tcVec.assign(16, tc);
-      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
-                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
-    }
-  }
-
-  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
-  // lowered.  Return the operation, rather than a null SDValue.
-  return Op;
-}
-
-//! Custom lowering for CTPOP (count population)
-/*!
-  Custom lowering code that counts the number ones in the input
-  operand. SPU has such an instruction, but it counts the number of
-  ones per byte, which then have to be accumulated.
-*/
-static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getValueType();
-  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
-                               VT, (128 / VT.getSizeInBits()));
-  DebugLoc dl = Op.getDebugLoc();
-
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Invalid value type!");
-  case MVT::i8: {
-    SDValue N = Op.getOperand(0);
-    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
-
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
-  }
-
-  case MVT::i16: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
-    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
-
-    SDValue N = Op.getOperand(0);
-    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
-    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
-    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
-
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
-    // CNTB_result becomes the chain to which all of the virtual registers
-    // CNTB_reg, SUM1_reg become associated:
-    SDValue CNTB_result =
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
-
-    SDValue CNTB_rescopy =
-      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
-    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
-
-    return DAG.getNode(ISD::AND, dl, MVT::i16,
-                       DAG.getNode(ISD::ADD, dl, MVT::i16,
-                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
-                                               Tmp1, Shift1),
-                                   Tmp1),
-                       Mask0);
-  }
-
-  case MVT::i32: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
-    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-
-    SDValue N = Op.getOperand(0);
-    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
-    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
-    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
-    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
-
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
-
-    // CNTB_result becomes the chain to which all of the virtual registers
-    // CNTB_reg, SUM1_reg become associated:
-    SDValue CNTB_result =
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
-
-    SDValue CNTB_rescopy =
-      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
-
-    SDValue Comp1 =
-      DAG.getNode(ISD::SRL, dl, MVT::i32,
-                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
-                  Shift1);
-
-    SDValue Sum1 =
-      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
-                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
-
-    SDValue Sum1_rescopy =
-      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
-
-    SDValue Comp2 =
-      DAG.getNode(ISD::SRL, dl, MVT::i32,
-                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
-                  Shift2);
-    SDValue Sum2 =
-      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
-                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
-
-    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
-  }
-
-  case MVT::i64:
-    break;
-  }
-
-  return SDValue();
-}
-
-//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
-/*!
- f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
- All conversions to i64 are expanded to a libcall.
- */
-static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                              const SPUTargetLowering &TLI) {
-  EVT OpVT = Op.getValueType();
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
-      || OpVT == MVT::i64) {
-    // Convert f32 / f64 to i32 / i64 via libcall.
-    RTLIB::Libcall LC =
-            (Op.getOpcode() == ISD::FP_TO_SINT)
-             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
-             : RTLIB::getFPTOUINT(Op0VT, OpVT);
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
-    SDValue Dummy;
-    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
-  }
-
-  return Op;
-}
-
-//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
-/*!
- i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
- All conversions from i64 are expanded to a libcall.
- */
-static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
-                              const SPUTargetLowering &TLI) {
-  EVT OpVT = Op.getValueType();
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
-      || Op0VT == MVT::i64) {
-    // Convert i32, i64 to f64 via libcall:
-    RTLIB::Libcall LC =
-            (Op.getOpcode() == ISD::SINT_TO_FP)
-             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
-             : RTLIB::getUINTTOFP(Op0VT, OpVT);
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
-    SDValue Dummy;
-    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
-  }
-
-  return Op;
-}
-
-//! Lower ISD::SETCC
-/*!
- This handles MVT::f64 (double floating point) condition lowering
- */
-static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
-                          const TargetLowering &TLI) {
-  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
-  DebugLoc dl = Op.getDebugLoc();
-  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
-
-  SDValue lhs = Op.getOperand(0);
-  SDValue rhs = Op.getOperand(1);
-  EVT lhsVT = lhs.getValueType();
-  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
-
-  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
-  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-  EVT IntVT(MVT::i64);
-
-  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
-  // selected to a NOP:
-  SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
-  SDValue lhsHi32 =
-          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
-                      DAG.getNode(ISD::SRL, dl, IntVT,
-                                  i64lhs, DAG.getConstant(32, MVT::i32)));
-  SDValue lhsHi32abs =
-          DAG.getNode(ISD::AND, dl, MVT::i32,
-                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
-  SDValue lhsLo32 =
-          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
-
-  // SETO and SETUO only use the lhs operand:
-  if (CC->get() == ISD::SETO) {
-    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
-    // SETUO
-    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-    return DAG.getNode(ISD::XOR, dl, ccResultVT,
-                       DAG.getSetCC(dl, ccResultVT,
-                                    lhs, DAG.getConstantFP(0.0, lhsVT),
-                                    ISD::SETUO),
-                       DAG.getConstant(ccResultAllOnes, ccResultVT));
-  } else if (CC->get() == ISD::SETUO) {
-    // Evaluates to true if Op0 is [SQ]NaN
-    return DAG.getNode(ISD::AND, dl, ccResultVT,
-                       DAG.getSetCC(dl, ccResultVT,
-                                    lhsHi32abs,
-                                    DAG.getConstant(0x7ff00000, MVT::i32),
-                                    ISD::SETGE),
-                       DAG.getSetCC(dl, ccResultVT,
-                                    lhsLo32,
-                                    DAG.getConstant(0, MVT::i32),
-                                    ISD::SETGT));
-  }
-
-  SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
-  SDValue rhsHi32 =
-          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
-                      DAG.getNode(ISD::SRL, dl, IntVT,
-                                  i64rhs, DAG.getConstant(32, MVT::i32)));
-
-  // If a value is negative, subtract from the sign magnitude constant:
-  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
-
-  // Convert the sign-magnitude representation into 2's complement:
-  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
-                                      lhsHi32, DAG.getConstant(31, MVT::i32));
-  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
-  SDValue lhsSelect =
-          DAG.getNode(ISD::SELECT, dl, IntVT,
-                      lhsSelectMask, lhsSignMag2TC, i64lhs);
-
-  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
-                                      rhsHi32, DAG.getConstant(31, MVT::i32));
-  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
-  SDValue rhsSelect =
-          DAG.getNode(ISD::SELECT, dl, IntVT,
-                      rhsSelectMask, rhsSignMag2TC, i64rhs);
-
-  unsigned compareOp;
-
-  switch (CC->get()) {
-  case ISD::SETOEQ:
-  case ISD::SETUEQ:
-    compareOp = ISD::SETEQ; break;
-  case ISD::SETOGT:
-  case ISD::SETUGT:
-    compareOp = ISD::SETGT; break;
-  case ISD::SETOGE:
-  case ISD::SETUGE:
-    compareOp = ISD::SETGE; break;
-  case ISD::SETOLT:
-  case ISD::SETULT:
-    compareOp = ISD::SETLT; break;
-  case ISD::SETOLE:
-  case ISD::SETULE:
-    compareOp = ISD::SETLE; break;
-  case ISD::SETUNE:
-  case ISD::SETONE:
-    compareOp = ISD::SETNE; break;
-  default:
-    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
-  }
-
-  SDValue result =
-          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
-                       (ISD::CondCode) compareOp);
-
-  if ((CC->get() & 0x8) == 0) {
-    // Ordered comparison:
-    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
-                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
-                                  ISD::SETO);
-    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
-                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
-                                  ISD::SETO);
-    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
-
-    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
-  }
-
-  return result;
-}
-
-//! Lower ISD::SELECT_CC
-/*!
-  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
-  SELB instruction.
-
-  \note Need to revisit this in the future: if the code path through the true
-  and false value computations is longer than the latency of a branch (6
-  cycles), then it would be more advantageous to branch and insert a new basic
-  block and branch on the condition. However, this code does not make that
-  assumption, given the simplisitc uses so far.
- */
-
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
-                              const TargetLowering &TLI) {
-  EVT VT = Op.getValueType();
-  SDValue lhs = Op.getOperand(0);
-  SDValue rhs = Op.getOperand(1);
-  SDValue trueval = Op.getOperand(2);
-  SDValue falseval = Op.getOperand(3);
-  SDValue condition = Op.getOperand(4);
-  DebugLoc dl = Op.getDebugLoc();
-
-  // NOTE: SELB's arguments: $rA, $rB, $mask
-  //
-  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
-  // where bits in $mask are 1. CCond will be inverted, having 1s where the
-  // condition was true and 0s where the condition was false. Hence, the
-  // arguments to SELB get reversed.
-
-  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
-  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
-  // with another "cannot select select_cc" assert:
-
-  SDValue compare = DAG.getNode(ISD::SETCC, dl,
-                                TLI.getSetCCResultType(Op.getValueType()),
-                                lhs, rhs, condition);
-  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
-}
-
-//! Custom lower ISD::TRUNCATE
-static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
-{
-  // Type to truncate to
-  EVT VT = Op.getValueType();
-  MVT simpleVT = VT.getSimpleVT();
-  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
-                               VT, (128 / VT.getSizeInBits()));
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Type to truncate from
-  SDValue Op0 = Op.getOperand(0);
-  EVT Op0VT = Op0.getValueType();
-
-  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
-    // Create shuffle mask, least significant doubleword of quadword
-    unsigned maskHigh = 0x08090a0b;
-    unsigned maskLow = 0x0c0d0e0f;
-    // Use a shuffle to perform the truncation
-    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                   DAG.getConstant(maskHigh, MVT::i32),
-                                   DAG.getConstant(maskLow, MVT::i32),
-                                   DAG.getConstant(maskHigh, MVT::i32),
-                                   DAG.getConstant(maskLow, MVT::i32));
-
-    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
-                                       Op0, Op0, shufMask);
-
-    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
-  }
-
-  return SDValue();             // Leave the truncate unmolested
-}
-
-/*!
- * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
- * algorithm is to duplicate the sign bit using rotmai to generate at
- * least one byte full of sign bits. Then propagate the "sign-byte" into
- * the leftmost words and the i64/i32 into the rightmost words using shufb.
- *
- * @param Op The sext operand
- * @param DAG The current DAG
- * @return The SDValue with the entire instruction sequence
- */
-static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
-{
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Type to extend to
-  MVT OpVT = Op.getValueType().getSimpleVT();
-
-  // Type to extend from
-  SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType().getSimpleVT();
-
-  // extend i8 & i16 via i32
-  if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
-    Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
-    Op0VT = MVT::i32;
-  }
-
-  // The type to extend to needs to be a i128 and
-  // the type to extend from needs to be i64 or i32.
-  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
-          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
-  (void)OpVT;
-
-  // Create shuffle mask
-  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
-  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
-  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
-  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                                 DAG.getConstant(mask1, MVT::i32),
-                                 DAG.getConstant(mask1, MVT::i32),
-                                 DAG.getConstant(mask2, MVT::i32),
-                                 DAG.getConstant(mask3, MVT::i32));
-
-  // Word wise arithmetic right shift to generate at least one byte
-  // that contains sign bits.
-  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
-  SDValue sraVal = DAG.getNode(ISD::SRA,
-                 dl,
-                 mvt,
-                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
-                 DAG.getConstant(31, MVT::i32));
-
-  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
-  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
-                                        dl, Op0VT, Op0,
-                                        DAG.getTargetConstant(
-                                                  SPU::GPRCRegClass.getID(),
-                                                  MVT::i32)), 0);
-  // Shuffle bytes - Copy the sign bits into the upper 64 bits
-  // and the input value into the lower 64 bits.
-  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
-        extended, sraVal, shufMask);
-  return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
-}
-
-//! Custom (target-specific) lowering entry point
-/*!
-  This is where LLVM's DAG selection process calls to do target-specific
-  lowering of nodes.
- */
-SDValue
-SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
-  unsigned Opc = (unsigned) Op.getOpcode();
-  EVT VT = Op.getValueType();
-
-  switch (Opc) {
-  default: {
-#ifndef NDEBUG
-    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
-    errs() << "Op.getOpcode() = " << Opc << "\n";
-    errs() << "*Op.getNode():\n";
-    Op.getNode()->dump();
-#endif
-    llvm_unreachable(0);
-  }
-  case ISD::LOAD:
-  case ISD::EXTLOAD:
-  case ISD::SEXTLOAD:
-  case ISD::ZEXTLOAD:
-    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::STORE:
-    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::ConstantPool:
-    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::GlobalAddress:
-    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::JumpTable:
-    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::ConstantFP:
-    return LowerConstantFP(Op, DAG);
-
-  // i8, i64 math ops:
-  case ISD::ADD:
-  case ISD::SUB:
-  case ISD::ROTR:
-  case ISD::ROTL:
-  case ISD::SRL:
-  case ISD::SHL:
-  case ISD::SRA: {
-    if (VT == MVT::i8)
-      return LowerI8Math(Op, DAG, Opc, *this);
-    break;
-  }
-
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT:
-    return LowerFP_TO_INT(Op, DAG, *this);
-
-  case ISD::SINT_TO_FP:
-  case ISD::UINT_TO_FP:
-    return LowerINT_TO_FP(Op, DAG, *this);
-
-  // Vector-related lowering.
-  case ISD::BUILD_VECTOR:
-    return LowerBUILD_VECTOR(Op, DAG);
-  case ISD::SCALAR_TO_VECTOR:
-    return LowerSCALAR_TO_VECTOR(Op, DAG);
-  case ISD::VECTOR_SHUFFLE:
-    return LowerVECTOR_SHUFFLE(Op, DAG);
-  case ISD::EXTRACT_VECTOR_ELT:
-    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
-  case ISD::INSERT_VECTOR_ELT:
-    return LowerINSERT_VECTOR_ELT(Op, DAG);
-
-  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
-  case ISD::AND:
-  case ISD::OR:
-  case ISD::XOR:
-    return LowerByteImmed(Op, DAG);
-
-  // Vector and i8 multiply:
-  case ISD::MUL:
-    if (VT == MVT::i8)
-      return LowerI8Math(Op, DAG, Opc, *this);
-
-  case ISD::CTPOP:
-    return LowerCTPOP(Op, DAG);
-
-  case ISD::SELECT_CC:
-    return LowerSELECT_CC(Op, DAG, *this);
-
-  case ISD::SETCC:
-    return LowerSETCC(Op, DAG, *this);
-
-  case ISD::TRUNCATE:
-    return LowerTRUNCATE(Op, DAG);
-
-  case ISD::SIGN_EXTEND:
-    return LowerSIGN_EXTEND(Op, DAG);
-  }
-
-  return SDValue();
-}
-
-void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
-                                           SmallVectorImpl<SDValue>&Results,
-                                           SelectionDAG &DAG) const
-{
-#if 0
-  unsigned Opc = (unsigned) N->getOpcode();
-  EVT OpVT = N->getValueType(0);
-
-  switch (Opc) {
-  default: {
-    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
-    errs() << "Op.getOpcode() = " << Opc << "\n";
-    errs() << "*Op.getNode():\n";
-    N->dump();
-    abort();
-    /*NOTREACHED*/
-  }
-  }
-#endif
-
-  /* Otherwise, return unchanged */
-}
-
-//===----------------------------------------------------------------------===//
-// Target Optimization Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
-{
-#if 0
-  TargetMachine &TM = getTargetMachine();
-#endif
-  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
-  EVT NodeVT = N->getValueType(0);      // The node's value type
-  EVT Op0VT = Op0.getValueType();       // The first operand's result
-  SDValue Result;                       // Initially, empty result
-  DebugLoc dl = N->getDebugLoc();
-
-  switch (N->getOpcode()) {
-  default: break;
-  case ISD::ADD: {
-    SDValue Op1 = N->getOperand(1);
-
-    if (Op0.getOpcode() == SPUISD::IndirectAddr
-        || Op1.getOpcode() == SPUISD::IndirectAddr) {
-      // Normalize the operands to reduce repeated code
-      SDValue IndirectArg = Op0, AddArg = Op1;
-
-      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
-        IndirectArg = Op1;
-        AddArg = Op0;
-      }
-
-      if (isa<ConstantSDNode>(AddArg)) {
-        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
-        SDValue IndOp1 = IndirectArg.getOperand(1);
-
-        if (CN0->isNullValue()) {
-          // (add (SPUindirect <arg>, <arg>), 0) ->
-          // (SPUindirect <arg>, <arg>)
-
-#if !defined(NDEBUG)
-          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            errs() << "\n"
-                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
-                 << "With:    (SPUindirect <arg>, <arg>)\n";
-          }
-#endif
-
-          return IndirectArg;
-        } else if (isa<ConstantSDNode>(IndOp1)) {
-          // (add (SPUindirect <arg>, <const>), <const>) ->
-          // (SPUindirect <arg>, <const + const>)
-          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
-          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
-          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
-
-#if !defined(NDEBUG)
-          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            errs() << "\n"
-                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
-                 << "), " << CN0->getSExtValue() << ")\n"
-                 << "With:    (SPUindirect <arg>, "
-                 << combinedConst << ")\n";
-          }
-#endif
-
-          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
-                             IndirectArg, combinedValue);
-        }
-      }
-    }
-    break;
-  }
-  case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND: {
-    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
-      // (any_extend (SPUextract_elt0 <arg>)) ->
-      // (SPUextract_elt0 <arg>)
-      // Types must match, however...
-#if !defined(NDEBUG)
-      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        errs() << "\nReplace: ";
-        N->dump(&DAG);
-        errs() << "\nWith:    ";
-        Op0.getNode()->dump(&DAG);
-        errs() << "\n";
-      }
-#endif
-
-      return Op0;
-    }
-    break;
-  }
-  case SPUISD::IndirectAddr: {
-    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
-      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
-      if (CN != 0 && CN->isNullValue()) {
-        // (SPUindirect (SPUaform <addr>, 0), 0) ->
-        // (SPUaform <addr>, 0)
-
-        DEBUG(errs() << "Replace: ");
-        DEBUG(N->dump(&DAG));
-        DEBUG(errs() << "\nWith:    ");
-        DEBUG(Op0.getNode()->dump(&DAG));
-        DEBUG(errs() << "\n");
-
-        return Op0;
-      }
-    } else if (Op0.getOpcode() == ISD::ADD) {
-      SDValue Op1 = N->getOperand(1);
-      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
-        // (SPUindirect (add <arg>, <arg>), 0) ->
-        // (SPUindirect <arg>, <arg>)
-        if (CN1->isNullValue()) {
-
-#if !defined(NDEBUG)
-          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            errs() << "\n"
-                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
-                 << "With:    (SPUindirect <arg>, <arg>)\n";
-          }
-#endif
-
-          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
-                             Op0.getOperand(0), Op0.getOperand(1));
-        }
-      }
-    }
-    break;
-  }
-  case SPUISD::SHL_BITS:
-  case SPUISD::SHL_BYTES:
-  case SPUISD::ROTBYTES_LEFT: {
-    SDValue Op1 = N->getOperand(1);
-
-    // Kill degenerate vector shifts:
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
-      if (CN->isNullValue()) {
-        Result = Op0;
-      }
-    }
-    break;
-  }
-  case SPUISD::PREFSLOT2VEC: {
-    switch (Op0.getOpcode()) {
-    default:
-      break;
-    case ISD::ANY_EXTEND:
-    case ISD::ZERO_EXTEND:
-    case ISD::SIGN_EXTEND: {
-      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
-      // <arg>
-      // but only if the SPUprefslot2vec and <arg> types match.
-      SDValue Op00 = Op0.getOperand(0);
-      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
-        SDValue Op000 = Op00.getOperand(0);
-        if (Op000.getValueType() == NodeVT) {
-          Result = Op000;
-        }
-      }
-      break;
-    }
-    case SPUISD::VEC2PREFSLOT: {
-      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
-      // <arg>
-      Result = Op0.getOperand(0);
-      break;
-    }
-    }
-    break;
-  }
-  }
-
-  // Otherwise, return unchanged.
-#ifndef NDEBUG
-  if (Result.getNode()) {
-    DEBUG(errs() << "\nReplace.SPU: ");
-    DEBUG(N->dump(&DAG));
-    DEBUG(errs() << "\nWith:        ");
-    DEBUG(Result.getNode()->dump(&DAG));
-    DEBUG(errs() << "\n");
-  }
-#endif
-
-  return Result;
-}
-
-//===----------------------------------------------------------------------===//
-// Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-SPUTargetLowering::ConstraintType
-SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
-  if (ConstraintLetter.size() == 1) {
-    switch (ConstraintLetter[0]) {
-    default: break;
-    case 'b':
-    case 'r':
-    case 'f':
-    case 'v':
-    case 'y':
-      return C_RegisterClass;
-    }
-  }
-  return TargetLowering::getConstraintType(ConstraintLetter);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-SPUTargetLowering::getSingleConstraintMatchWeight(
-    AsmOperandInfo &info, const char *constraint) const {
-  ConstraintWeight weight = CW_Invalid;
-  Value *CallOperandVal = info.CallOperandVal;
-    // If we don't have a value, we can't do a match,
-    // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
-    return CW_Default;
-  // Look at the constraint type.
-  switch (*constraint) {
-  default:
-    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
-    //FIXME: Seems like the supported constraint letters were just copied
-    // from PPC, as the following doesn't correspond to the GCC docs.
-    // I'm leaving it so until someone adds the corresponding lowering support.
-  case 'b':
-  case 'r':
-  case 'f':
-  case 'd':
-  case 'v':
-  case 'y':
-    weight = CW_Register;
-    break;
-  }
-  return weight;
-}
-
-std::pair<unsigned, const TargetRegisterClass*>
-SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                EVT VT) const
-{
-  if (Constraint.size() == 1) {
-    // GCC RS6000 Constraint Letters
-    switch (Constraint[0]) {
-    case 'b':   // R1-R31
-    case 'r':   // R0-R31
-      if (VT == MVT::i64)
-        return std::make_pair(0U, &SPU::R64CRegClass);
-      return std::make_pair(0U, &SPU::R32CRegClass);
-    case 'f':
-      if (VT == MVT::f32)
-        return std::make_pair(0U, &SPU::R32FPRegClass);
-      if (VT == MVT::f64)
-        return std::make_pair(0U, &SPU::R64FPRegClass);
-      break;
-    case 'v':
-      return std::make_pair(0U, &SPU::GPRCRegClass);
-    }
-  }
-
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//! Compute used/known bits for a SPU operand
-void
-SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                  APInt &KnownZero,
-                                                  APInt &KnownOne,
-                                                  const SelectionDAG &DAG,
-                                                  unsigned Depth ) const {
-#if 0
-  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
-
-  switch (Op.getOpcode()) {
-  default:
-    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
-    break;
-  case CALL:
-  case SHUFB:
-  case SHUFFLE_MASK:
-  case CNTB:
-  case SPUISD::PREFSLOT2VEC:
-  case SPUISD::LDRESULT:
-  case SPUISD::VEC2PREFSLOT:
-  case SPUISD::SHLQUAD_L_BITS:
-  case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_ROTL:
-  case SPUISD::VEC_ROTR:
-  case SPUISD::ROTBYTES_LEFT:
-  case SPUISD::SELECT_MASK:
-  case SPUISD::SELB:
-  }
-#endif
-}
-
-unsigned
-SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
-                                                   unsigned Depth) const {
-  switch (Op.getOpcode()) {
-  default:
-    return 1;
-
-  case ISD::SETCC: {
-    EVT VT = Op.getValueType();
-
-    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
-      VT = MVT::i32;
-    }
-    return VT.getSizeInBits();
-  }
-  }
-}
-
-// LowerAsmOperandForConstraint
-void
-SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
-                                                std::string &Constraint,
-                                                std::vector<SDValue> &Ops,
-                                                SelectionDAG &DAG) const {
-  // Default, for the time being, to the base class handler
-  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
-}
-
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode.
-bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
-                                                Type *Ty) const {
-  // SPU's addresses are 256K:
-  return (V > -(1 << 18) && V < (1 << 18) - 1);
-}
-
-bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
-  return false;
-}
-
-bool
-SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  // The SPU target isn't yet aware of offsets.
-  return false;
-}
-
-// can we compare to Imm without writing it into a register?
-bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
-  //ceqi, cgti, etc. all take s10 operand
-  return isInt<10>(Imm);
-}
-
-bool
-SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                         Type * ) const{
-
-  // A-form: 18bit absolute address.
-  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
-    return true;
-
-  // D-form: reg + 14bit offset
-  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
-    return true;
-
-  // X-form: reg+reg
-  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
-    return true;
-
-  return false;
-}