summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp630
1 files changed, 460 insertions, 170 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b4163c5450..7e6f2ad595 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <utility>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -7245,8 +7246,430 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
-MachineBasicBlock *ARMTargetLowering::
-EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+namespace {
+// This class is a helper for lowering the COPY_STRUCT_BYVAL_I32 instruction.
+// It defines the operations needed to lower the byval copy. We use a helper
+// class because the opcodes and machine instructions are different for each
+// subtarget, but the overall algorithm for the lowering is the same. The
+// implementation of each operation will be defined separately for arm, thumb1,
+// and thumb2 targets by subclassing this base class. See
+// ARMTargetLowering::EmitStructByval() for how these operations are used.
+class TargetStructByvalEmitter {
+public:
+ TargetStructByvalEmitter(const TargetInstrInfo *TII_,
+ MachineRegisterInfo &MRI_,
+ const TargetRegisterClass *TRC_)
+ : TII(TII_), MRI(MRI_), TRC(TRC_) {}
+
+ // Emit a post-increment load of "unit" size. The unit size is based on the
+ // alignment of the struct being copied (4, 2, or 1 bytes). Alignments higher
+ // than 4 are handled separately by using NEON instructions.
+ //
+ // \param baseReg the register holding the address to load.
+ // \param baseOut the register to recieve the incremented address.
+ // \returns the register holding the loaded value.
+ virtual unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned baseReg,
+ unsigned baseOut) = 0;
+
+ // Emit a post-increment store of "unit" size. The unit size is based on the
+ // alignment of the struct being copied (4, 2, or 1 bytes). Alignments higher
+ // than 4 are handled separately by using NEON instructions.
+ //
+ // \param baseReg the register holding the address to store.
+ // \param storeReg the register holding the value to store.
+ // \param baseOut the register to recieve the incremented address.
+ virtual void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned baseReg, unsigned storeReg,
+ unsigned baseOut) = 0;
+
+ // Emit a post-increment load of one byte.
+ //
+ // \param baseReg the register holding the address to load.
+ // \param baseOut the register to recieve the incremented address.
+ // \returns the register holding the loaded value.
+ virtual unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned baseReg,
+ unsigned baseOut) = 0;
+
+ // Emit a post-increment store of one byte.
+ //
+ // \param baseReg the register holding the address to store.
+ // \param storeReg the register holding the value to store.
+ // \param baseOut the register to recieve the incremented address.
+ virtual void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned baseReg, unsigned storeReg,
+ unsigned baseOut) = 0;
+
+ // Emit a load of a constant value.
+ //
+ // \param Constant the register holding the address to store.
+ // \returns the register holding the loaded value.
+ virtual unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned Constant,
+ const DataLayout *DL) = 0;
+
+ // Emit a subtract of a register minus immediate, with the immediate equal to
+ // the "unit" size. The unit size is based on the alignment of the struct
+ // being copied (16, 8, 4, 2, or 1 bytes).
+ //
+ // \param InReg the register holding the initial value.
+ // \param OutReg the register to recieve the subtracted value.
+ virtual void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned InReg, unsigned OutReg) = 0;
+
+ // Emit a branch based on a condition code of not equal.
+ //
+ // \param TargetBB the destination of the branch.
+ virtual void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, MachineBasicBlock *TargetBB) = 0;
+
+ // Find the constant pool index for the given constant. This method is
+ // implemented in the base class because it is the same for all subtargets.
+ //
+ // \param LoopSize the constant value for which the index should be returned.
+ // \returns the constant pool index for the constant.
+ unsigned getConstantPoolIndex(MachineFunction *MF, const DataLayout *DL,
+ unsigned LoopSize) {
+ MachineConstantPool *ConstantPool = MF->getConstantPool();
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = DL->getPrefTypeAlignment(Int32Ty);
+ if (Align == 0)
+ Align = DL->getTypeAllocSize(C->getType());
+ return ConstantPool->getConstantPoolIndex(C, Align);
+ }
+
+ // Return the register class used by the subtarget.
+ //
+ // \returns the target register class.
+ const TargetRegisterClass *getTRC() const { return TRC; }
+
+ virtual ~TargetStructByvalEmitter() {};
+
+protected:
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo &MRI;
+ const TargetRegisterClass *TRC;
+};
+
+class ARMStructByvalEmitter : public TargetStructByvalEmitter {
+public:
+ ARMStructByvalEmitter(const TargetInstrInfo *TII, MachineRegisterInfo &MRI,
+ unsigned LoadStoreSize)
+ : TargetStructByvalEmitter(
+ TII, MRI, (const TargetRegisterClass *)&ARM::GPRRegClass),
+ UnitSize(LoadStoreSize),
+ UnitLdOpc(LoadStoreSize == 4
+ ? ARM::LDR_POST_IMM
+ : LoadStoreSize == 2
+ ? ARM::LDRH_POST
+ : LoadStoreSize == 1 ? ARM::LDRB_POST_IMM : 0),
+ UnitStOpc(LoadStoreSize == 4
+ ? ARM::STR_POST_IMM
+ : LoadStoreSize == 2
+ ? ARM::STRH_POST
+ : LoadStoreSize == 1 ? ARM::STRB_POST_IMM : 0) {}
+
+ unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned baseOut) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch).addReg(
+ baseOut, RegState::Define).addReg(baseReg).addReg(0).addImm(UnitSize));
+ return scratch;
+ }
+
+ void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned storeReg, unsigned baseOut) {
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), baseOut).addReg(
+ storeReg).addReg(baseReg).addReg(0).addImm(UnitSize));
+ }
+
+ unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned baseOut) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRB_POST_IMM), scratch)
+ .addReg(baseOut, RegState::Define).addReg(baseReg)
+ .addReg(0).addImm(1));
+ return scratch;
+ }
+
+ void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned storeReg, unsigned baseOut) {
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::STRB_POST_IMM), baseOut)
+ .addReg(storeReg).addReg(baseReg).addReg(0).addImm(1));
+ }
+
+ unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned Constant,
+ const DataLayout *DL) {
+ unsigned constReg = MRI.createVirtualRegister(TRC);
+ unsigned Idx = getConstantPoolIndex(BB->getParent(), DL, Constant);
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ constReg, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
+ return constReg;
+ }
+
+ void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned InReg, unsigned OutReg) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, dl, TII->get(ARM::SUBri), OutReg);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(InReg).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+
+ void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ MachineBasicBlock *TargetBB) {
+ BuildMI(*BB, MI, dl, TII->get(ARM::Bcc)).addMBB(TargetBB).addImm(ARMCC::NE)
+ .addReg(ARM::CPSR);
+ }
+
+private:
+ const unsigned UnitSize;
+ const unsigned UnitLdOpc;
+ const unsigned UnitStOpc;
+};
+
+class Thumb2StructByvalEmitter : public TargetStructByvalEmitter {
+public:
+ Thumb2StructByvalEmitter(const TargetInstrInfo *TII, MachineRegisterInfo &MRI,
+ unsigned LoadStoreSize)
+ : TargetStructByvalEmitter(
+ TII, MRI, (const TargetRegisterClass *)&ARM::tGPRRegClass),
+ UnitSize(LoadStoreSize),
+ UnitLdOpc(LoadStoreSize == 4
+ ? ARM::t2LDR_POST
+ : LoadStoreSize == 2
+ ? ARM::t2LDRH_POST
+ : LoadStoreSize == 1 ? ARM::t2LDRB_POST : 0),
+ UnitStOpc(LoadStoreSize == 4
+ ? ARM::t2STR_POST
+ : LoadStoreSize == 2
+ ? ARM::t2STRH_POST
+ : LoadStoreSize == 1 ? ARM::t2STRB_POST : 0) {}
+
+ unsigned emitUnitLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned baseOut) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitLdOpc), scratch).addReg(
+ baseOut, RegState::Define).addReg(baseReg).addImm(UnitSize));
+ return scratch;
+ }
+
+ void emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned storeReg, unsigned baseOut) {
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(UnitStOpc), baseOut)
+ .addReg(storeReg).addReg(baseReg).addImm(UnitSize));
+ }
+
+ unsigned emitByteLoad(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned baseOut) {
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::t2LDRB_POST), scratch)
+ .addReg(baseOut, RegState::Define).addReg(baseReg)
+ .addImm(1));
+ return scratch;
+ }
+
+ void emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned storeReg, unsigned baseOut) {
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::t2STRB_POST), baseOut)
+ .addReg(storeReg).addReg(baseReg).addImm(1));
+ }
+
+ unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned Constant,
+ const DataLayout *DL) {
+ unsigned VConst = MRI.createVirtualRegister(TRC);
+ unsigned Vtmp = VConst;
+ if ((Constant & 0xFFFF0000) != 0)
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(Constant & 0xFFFF));
+
+ if ((Constant & 0xFFFF0000) != 0)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), VConst)
+ .addReg(Vtmp).addImm(Constant >> 16));
+ return VConst;
+ }
+
+ void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned InReg, unsigned OutReg) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, MI, dl, TII->get(ARM::t2SUBri), OutReg);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(InReg).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+
+ void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ MachineBasicBlock *TargetBB) {
+ BuildMI(BB, dl, TII->get(ARM::t2Bcc)).addMBB(TargetBB).addImm(ARMCC::NE)
+ .addReg(ARM::CPSR);
+ }
+
+private:
+ const unsigned UnitSize;
+ const unsigned UnitLdOpc;
+ const unsigned UnitStOpc;
+};
+
+// This class is a thin wrapper that delegates most of the work to the correct
+// TargetStructByvalEmitter implementation. It also handles the lowering for
+// targets that support neon because the neon implementation is the same for all
+// targets that support it.
+class StructByvalEmitter {
+public:
+ StructByvalEmitter(unsigned LoadStoreSize, const ARMSubtarget *Subtarget,
+ const TargetInstrInfo *TII_, MachineRegisterInfo &MRI_,
+ const DataLayout *DL_)
+ : UnitSize(LoadStoreSize),
+ TargetEmitter(
+ Subtarget->isThumb2()
+ ? static_cast<TargetStructByvalEmitter *>(
+ new Thumb2StructByvalEmitter(TII_, MRI_,
+ LoadStoreSize))
+ : static_cast<TargetStructByvalEmitter *>(
+ new ARMStructByvalEmitter(TII_, MRI_,
+ LoadStoreSize))),
+ TII(TII_), MRI(MRI_), DL(DL_),
+ VecTRC(UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0),
+ VecLdOpc(UnitSize == 16 ? ARM::VLD1q32wb_fixed
+ : UnitSize == 8 ? ARM::VLD1d32wb_fixed : 0),
+ VecStOpc(UnitSize == 16 ? ARM::VST1q32wb_fixed
+ : UnitSize == 8 ? ARM::VST1d32wb_fixed : 0) {}
+
+ // Emit a post-increment load of "unit" size. The unit size is based on the
+ // alignment of the struct being copied (16, 8, 4, 2, or 1 bytes). Loads of 16
+ // or 8 bytes use NEON instructions to load the value.
+ //
+ // \param baseReg the register holding the address to load.
+ // \param baseOut the register to recieve the incremented address. If baseOut
+ // is 0 then a new register is created to hold the incremented address.
+ // \returns a pair of registers holding the loaded value and the updated
+ // address.
+ std::pair<unsigned, unsigned> emitUnitLoad(MachineBasicBlock *BB,
+ MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg,
+ unsigned baseOut = 0) {
+ unsigned scratch = 0;
+ if (baseOut == 0)
+ baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
+ if (UnitSize >= 8) { // neon
+ scratch = MRI.createVirtualRegister(VecTRC);
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(VecLdOpc), scratch).addReg(
+ baseOut, RegState::Define).addReg(baseReg).addImm(0));
+ } else {
+ scratch = TargetEmitter->emitUnitLoad(BB, MI, dl, baseReg, baseOut);
+ }
+ return std::make_pair(scratch, baseOut);
+ }
+
+ // Emit a post-increment store of "unit" size. The unit size is based on the
+ // alignment of the struct being copied (16, 8, 4, 2, or 1 bytes). Stores of
+ // 16 or 8 bytes use NEON instructions to store the value.
+ //
+ // \param baseReg the register holding the address to store.
+ // \param storeReg the register holding the value to store.
+ // \param baseOut the register to recieve the incremented address. If baseOut
+ // is 0 then a new register is created to hold the incremented address.
+ // \returns the register holding the updated address.
+ unsigned emitUnitStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned storeReg,
+ unsigned baseOut = 0) {
+ if (baseOut == 0)
+ baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
+ if (UnitSize >= 8) { // neon
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(VecStOpc), baseOut)
+ .addReg(baseReg).addImm(0).addReg(storeReg));
+ } else {
+ TargetEmitter->emitUnitStore(BB, MI, dl, baseReg, storeReg, baseOut);
+ }
+ return baseOut;
+ }
+
+ // Emit a post-increment load of one byte.
+ //
+ // \param baseReg the register holding the address to load.
+ // \returns a pair of registers holding the loaded value and the updated
+ // address.
+ std::pair<unsigned, unsigned> emitByteLoad(MachineBasicBlock *BB,
+ MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg) {
+ unsigned baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
+ unsigned scratch =
+ TargetEmitter->emitByteLoad(BB, MI, dl, baseReg, baseOut);
+ return std::make_pair(scratch, baseOut);
+ }
+
+ // Emit a post-increment store of one byte.
+ //
+ // \param baseReg the register holding the address to store.
+ // \param storeReg the register holding the value to store.
+ // \returns the register holding the updated address.
+ unsigned emitByteStore(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned baseReg, unsigned storeReg) {
+ unsigned baseOut = MRI.createVirtualRegister(TargetEmitter->getTRC());
+ TargetEmitter->emitByteStore(BB, MI, dl, baseReg, storeReg, baseOut);
+ return baseOut;
+ }
+
+ // Emit a load of the constant LoopSize.
+ //
+ // \param LoopSize the constant to load.
+ // \returns the register holding the loaded constant.
+ unsigned emitConstantLoad(MachineBasicBlock *BB, MachineInstr *MI,
+ DebugLoc &dl, unsigned LoopSize) {
+ return TargetEmitter->emitConstantLoad(BB, MI, dl, LoopSize, DL);
+ }
+
+ // Emit a subtract of a register minus immediate, with the immediate equal to
+ // the "unit" size. The unit size is based on the alignment of the struct
+ // being copied (16, 8, 4, 2, or 1 bytes).
+ //
+ // \param InReg the register holding the initial value.
+ // \param OutReg the register to recieve the subtracted value.
+ void emitSubImm(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ unsigned InReg, unsigned OutReg) {
+ TargetEmitter->emitSubImm(BB, MI, dl, InReg, OutReg);
+ }
+
+ // Emit a branch based on a condition code of not equal.
+ //
+ // \param TargetBB the destination of the branch.
+ void emitBranchNE(MachineBasicBlock *BB, MachineInstr *MI, DebugLoc &dl,
+ MachineBasicBlock *TargetBB) {
+ TargetEmitter->emitBranchNE(BB, MI, dl, TargetBB);
+ }
+
+ // Return the register class used by the subtarget.
+ //
+ // \returns the target register class.
+ const TargetRegisterClass *getTRC() const { return TargetEmitter->getTRC(); }
+
+private:
+ const unsigned UnitSize;
+ OwningPtr<TargetStructByvalEmitter> TargetEmitter;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo &MRI;
+ const DataLayout *DL;
+
+ const TargetRegisterClass *VecTRC;
+ const unsigned VecLdOpc;
+ const unsigned VecStOpc;
+};
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
@@ -7261,23 +7684,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize = 0;
-
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- const TargetRegisterClass *TRC_Vec = 0;
+ unsigned UnitSize = 0;
if (Align & 1) {
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
- ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
- strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
@@ -7285,27 +7698,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16) {
- ldrOpc = ARM::VLD1q32wb_fixed;
- strOpc = ARM::VST1q32wb_fixed;
+ if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
- }
- else if ((Align % 8 == 0) && SizeVal >= 8) {
- ldrOpc = ARM::VLD1d32wb_fixed;
- strOpc = ARM::VST1d32wb_fixed;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
- }
}
// Can't use NEON instructions.
- if (UnitSize == 0) {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ if (UnitSize == 0)
UnitSize = 4;
- }
}
+ StructByvalEmitter ByvalEmitter(UnitSize, Subtarget, TII, MRI,
+ getDataLayout());
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7316,67 +7720,22 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(destIn).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(UnitSize));
- }
- srcIn = srcOut;
- destIn = destOut;
+ std::pair<unsigned, unsigned> res =
+ ByvalEmitter.emitUnitLoad(BB, MI, dl, srcIn);
+ unsigned scratch = res.first;
+ srcIn = res.second;
+ destIn = ByvalEmitter.emitUnitStore(BB, MI, dl, destIn, scratch);
}
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn)
- .addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
- srcIn = srcOut;
- destIn = destOut;
+ std::pair<unsigned, unsigned> res =
+ ByvalEmitter.emitByteLoad(BB, MI, dl, srcIn);
+ unsigned scratch = res.first;
+ srcIn = res.second;
+ destIn = ByvalEmitter.emitByteStore(BB, MI, dl, destIn, scratch);
}
MI->eraseFromParent(); // The instruction is gone now.
return BB;
@@ -7414,34 +7773,7 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Load an immediate to varEnd.
- unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
- if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
-
- if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
- } else {
- MachineConstantPool *ConstantPool = MF->getConstantPool();
- Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
- const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
-
- // MachineConstantPool wants an explicit alignment.
- unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
- if (Align == 0)
- Align = getDataLayout()->getTypeAllocSize(C->getType());
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
- }
+ unsigned varEnd = ByvalEmitter.emitConstantLoad(BB, MI, dl, LoopSize);
BB->addSuccessor(loopMBB);
// Generate the loop body:
@@ -7450,12 +7782,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// destPhi = PHI(destLoop, dst)
MachineBasicBlock *entryBB = BB;
BB = loopMBB;
- unsigned varLoop = MRI.createVirtualRegister(TRC);
- unsigned varPhi = MRI.createVirtualRegister(TRC);
- unsigned srcLoop = MRI.createVirtualRegister(TRC);
- unsigned srcPhi = MRI.createVirtualRegister(TRC);
- unsigned destLoop = MRI.createVirtualRegister(TRC);
- unsigned destPhi = MRI.createVirtualRegister(TRC);
+ unsigned varLoop = MRI.createVirtualRegister(ByvalEmitter.getTRC());
+ unsigned varPhi = MRI.createVirtualRegister(ByvalEmitter.getTRC());
+ unsigned srcLoop = MRI.createVirtualRegister(ByvalEmitter.getTRC());
+ unsigned srcPhi = MRI.createVirtualRegister(ByvalEmitter.getTRC());
+ unsigned destLoop = MRI.createVirtualRegister(ByvalEmitter.getTRC());
+ unsigned destPhi = MRI.createVirtualRegister(ByvalEmitter.getTRC());
BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
.addReg(varLoop).addMBB(loopMBB)
@@ -7469,39 +7801,16 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(destPhi).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(UnitSize));
+ {
+ std::pair<unsigned, unsigned> res =
+ ByvalEmitter.emitUnitLoad(BB, BB->end(), dl, srcPhi, srcLoop);
+ unsigned scratch = res.first;
+ ByvalEmitter.emitUnitStore(BB, BB->end(), dl, destPhi, scratch, destLoop);
}
// Decrement loop variable by UnitSize.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ ByvalEmitter.emitSubImm(BB, BB->end(), dl, varPhi, varLoop);
+ ByvalEmitter.emitBranchNE(BB, BB->end(), dl, loopMBB);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
@@ -7510,36 +7819,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
- unsigned srcOut = MRI.createVirtualRegister(TRC);
- unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
- srcIn = srcOut;
- destIn = destOut;
+ std::pair<unsigned, unsigned> res =
+ ByvalEmitter.emitByteLoad(BB, StartOfExit, dl, srcIn);
+ unsigned scratch = res.first;
+ srcIn = res.second;
+ destIn = ByvalEmitter.emitByteStore(BB, StartOfExit, dl, destIn, scratch);
}
MI->eraseFromParent(); // The instruction is gone now.