summaryrefslogtreecommitdiff
path: root/lib/Target/SystemZ/SystemZISelLowering.cpp
diff options
context:
space:
mode:
authorRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-27 09:54:29 +0000
committerRichard Sandiford <rsandifo@linux.vnet.ibm.com>2013-08-27 09:54:29 +0000
commit842a1be06c53757e7498c9894abc1431b633a92f (patch)
treed784058c7258583429a11c277dd14f4cf3aff4e6 /lib/Target/SystemZ/SystemZISelLowering.cpp
parentc9617b9a9dfcb550adcf06f83a58a5e522414cc1 (diff)
downloadllvm-842a1be06c53757e7498c9894abc1431b633a92f.tar.gz
llvm-842a1be06c53757e7498c9894abc1431b633a92f.tar.bz2
llvm-842a1be06c53757e7498c9894abc1431b633a92f.tar.xz
[SystemZ] Extend memcpy and memset support to all constant lengths
Lengths up to a certain threshold (currently 6 * 256) use a series of MVCs. Lengths above that threshold use a loop to handle X*256 bytes followed by a single MVC to handle the excess (if any). This loop will also be needed in future when support for variable lengths is added. Because the same tablegen classes are used to define MVC and CLC, the patch also has the side-effect of defining a pseudo loop instruction for CLC. That instruction isn't used yet (and wouldn't be handled correctly if it were). I'm planning to use it soon though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189331 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/SystemZ/SystemZISelLowering.cpp')
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp168
1 files changed, 147 insertions, 21 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 20afab782a..216ca3450c 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1917,7 +1917,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
OPCODE(MVC);
+ OPCODE(MVC_LOOP);
OPCODE(CLC);
+ OPCODE(CLC_LOOP);
OPCODE(STRCMP);
OPCODE(STPCPY);
OPCODE(SEARCH_STRING);
@@ -1952,18 +1954,31 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
return NewMBB;
}
-// Split MBB after MI and return the new block (the one that contains
-// instructions after MI).
-static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+// Split MBB before MI and return the new block (the one that contains MI).
+static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
- NewMBB->splice(NewMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- MBB->end());
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
return NewMBB;
}
+// Force base value Base into a register before MI. Return the register.
+static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
+ const SystemZInstrInfo *TII) {
+ if (Base.isReg())
+ return Base.getReg();
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
+ .addOperand(Base).addImm(0).addReg(0);
+ return Reg;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
@@ -1978,7 +1993,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1999,7 +2014,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
// ...
MBB = JoinMBB;
- BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
.addReg(TrueReg).addMBB(StartMBB)
.addReg(FalseReg).addMBB(FalseMBB);
@@ -2046,7 +2061,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -2122,7 +2137,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -2244,7 +2259,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
@@ -2351,7 +2366,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
@@ -2465,17 +2480,126 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode) const {
const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
- MachineOperand DestBase = MI->getOperand(0);
+ MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
uint64_t DestDisp = MI->getOperand(1).getImm();
- MachineOperand SrcBase = MI->getOperand(2);
+ MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2));
uint64_t SrcDisp = MI->getOperand(3).getImm();
uint64_t Length = MI->getOperand(4).getImm();
- BuildMI(*MBB, MI, DL, TII->get(Opcode))
- .addOperand(DestBase).addImm(DestDisp).addImm(Length)
- .addOperand(SrcBase).addImm(SrcDisp);
+ // Check for the loop form, in which operand 5 is the trip count.
+ if (MI->getNumExplicitOperands() > 5) {
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+
+ uint64_t StartCountReg = MI->getOperand(5).getReg();
+ uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
+ uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
+ forceReg(MI, DestBase, TII));
+
+ const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
+ uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ MRI.createVirtualRegister(RC));
+ uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+ MRI.createVirtualRegister(RC));
+
+ RC = &SystemZ::GR64BitRegClass;
+ uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
+ uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
+ // [ %NextDestReg, LoopMBB ]
+ // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
+ // [ %NextSrcReg, LoopMBB ]
+ // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
+ // [ %NextCountReg, LoopMBB ]
+ // PFD 2, 768+DestDisp(%ThisDestReg)
+ // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
+ // %NextDestReg = LA 256(%ThisDestReg)
+ // %NextSrcReg = LA 256(%ThisSrcReg)
+ // %NextCountReg = AGHI %ThisCountReg, -1
+ // CGHI %NextCountReg, 0
+ // JLH LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
+ .addReg(StartDestReg).addMBB(StartMBB)
+ .addReg(NextDestReg).addMBB(LoopMBB);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
+ .addReg(StartSrcReg).addMBB(StartMBB)
+ .addReg(NextSrcReg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
+ .addReg(StartCountReg).addMBB(StartMBB)
+ .addReg(NextCountReg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PFD))
+ .addImm(SystemZ::PFD_WRITE)
+ .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
+ .addReg(ThisSrcReg).addImm(SrcDisp);
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
+ .addReg(ThisDestReg).addImm(256).addReg(0);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
+ .addReg(ThisSrcReg).addImm(256).addReg(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
+ .addReg(ThisCountReg).addImm(-1);
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(NextCountReg).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DestBase = MachineOperand::CreateReg(NextDestReg, false);
+ SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
+ Length &= 255;
+ MBB = DoneMBB;
+ }
+ // Handle any remaining bytes with straight-line code.
+ while (Length > 0) {
+ uint64_t ThisLength = std::min(Length, uint64_t(256));
+ // The previous iteration might have created out-of-range displacements.
+ // Apply them using LAY if so.
+ if (!isUInt<12>(DestDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(DestBase).addImm(DestDisp).addReg(0);
+ DestBase = MachineOperand::CreateReg(Reg, false);
+ DestDisp = 0;
+ }
+ if (!isUInt<12>(SrcDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
+ SrcBase = MachineOperand::CreateReg(Reg, false);
+ SrcDisp = 0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(Opcode))
+ .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
+ .addOperand(SrcBase).addImm(SrcDisp);
+ DestDisp += ThisLength;
+ SrcDisp += ThisLength;
+ Length -= ThisLength;
+ }
MI->eraseFromParent();
return MBB;
@@ -2503,7 +2627,7 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -2765,9 +2889,11 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
- case SystemZ::MVCWrapper:
+ case SystemZ::MVCSequence:
+ case SystemZ::MVCLoop:
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
- case SystemZ::CLCWrapper:
+ case SystemZ::CLCSequence:
+ case SystemZ::CLCLoop:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);