diff options
author | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-28 09:01:51 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@linux.vnet.ibm.com> | 2013-08-28 09:01:51 +0000 |
commit | 47e70960945ecb33a361987a9745e3dc80a1c78c (patch) | |
tree | 60e839e72b6139cc77be4a33330870733fe523d9 /lib | |
parent | ead2d5a4be3012699252e015ce774733590bf9b0 (diff) | |
download | llvm-47e70960945ecb33a361987a9745e3dc80a1c78c.tar.gz llvm-47e70960945ecb33a361987a9745e3dc80a1c78c.tar.bz2 llvm-47e70960945ecb33a361987a9745e3dc80a1c78c.tar.xz |
[SystemZ] Extend memcmp support to all constant lengths
This uses the infrastructure added for memcpy and memmove in r189331.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189458 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/SystemZ/SystemZISelLowering.cpp | 83 | ||||
-rw-r--r-- | lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 35 |
2 files changed, 92 insertions, 26 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 216ca3450c..dd230c62a5 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1954,6 +1954,18 @@ static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { return NewMBB; } +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + // Split MBB before MI and return the new block (the one that contains MI). static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, MachineBasicBlock *MBB) { @@ -2490,6 +2502,11 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, uint64_t SrcDisp = MI->getOperand(3).getImm(); uint64_t Length = MI->getOperand(4).getImm(); + // When generating more than one CLC, all but the last will need to + // branch to the end when a difference is found. + MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? + splitBlockAfter(MI, MBB) : 0); + // Check for the loop form, in which operand 5 is the trip count. if (MI->getNumExplicitOperands() > 5) { bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); @@ -2514,6 +2531,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); // StartMBB: // # fall through to LoopMMB @@ -2521,39 +2539,54 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, // LoopMBB: // %ThisDestReg = phi [ %StartDestReg, StartMBB ], - // [ %NextDestReg, LoopMBB ] + // [ %NextDestReg, NextMBB ] // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], - // [ %NextSrcReg, LoopMBB ] + // [ %NextSrcReg, NextMBB ] // %ThisCountReg = phi [ %StartCountReg, StartMBB ], - // [ %NextCountReg, LoopMBB ] - // PFD 2, 768+DestDisp(%ThisDestReg) + // [ %NextCountReg, NextMBB ] + // ( PFD 2, 768+DestDisp(%ThisDestReg) ) // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) - // %NextDestReg = LA 256(%ThisDestReg) - // %NextSrcReg = LA 256(%ThisSrcReg) - // %NextCountReg = AGHI %ThisCountReg, -1 - // CGHI %NextCountReg, 0 - // JLH LoopMBB - // # fall through to DoneMMB + // ( JLH EndMBB ) // - // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + // The prefetch is used only for MVC. The JLH is used only for CLC. MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) .addReg(StartDestReg).addMBB(StartMBB) - .addReg(NextDestReg).addMBB(LoopMBB); + .addReg(NextDestReg).addMBB(NextMBB); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) .addReg(StartSrcReg).addMBB(StartMBB) - .addReg(NextSrcReg).addMBB(LoopMBB); + .addReg(NextSrcReg).addMBB(NextMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) .addReg(StartCountReg).addMBB(StartMBB) - .addReg(NextCountReg).addMBB(LoopMBB); - BuildMI(MBB, DL, TII->get(SystemZ::PFD)) - .addImm(SystemZ::PFD_WRITE) - .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); + .addReg(NextCountReg).addMBB(NextMBB); + if (Opcode == SystemZ::MVC) + BuildMI(MBB, DL, TII->get(SystemZ::PFD)) + .addImm(SystemZ::PFD_WRITE) + .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(ThisDestReg).addImm(DestDisp).addImm(256) .addReg(ThisSrcReg).addImm(SrcDisp); + if (EndMBB) { + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + } + + // NextMBB: + // %NextDestReg = LA 256(%ThisDestReg) + // %NextSrcReg = LA 256(%ThisSrcReg) + // %NextCountReg = AGHI %ThisCountReg, -1 + // CGHI %NextCountReg, 0 + // JLH LoopMBB + // # fall through to DoneMMB + // + // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. + MBB = NextMBB; + BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) .addReg(ThisDestReg).addImm(256).addReg(0); if (!HaveSingleBase) @@ -2599,6 +2632,22 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; + // If there's another CLC to go, branch to the end if a difference + // was found. + if (EndMBB && Length > 0) { + MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) + .addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + MBB->addSuccessor(NextMBB); + MBB = NextMBB; + } + } + if (EndMBB) { + MBB->addSuccessor(EndMBB); + MBB = EndMBB; + MBB->addLiveIn(SystemZ::CC); } MI->eraseFromParent(); diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 6026b1f6f0..dc2f225acb 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -141,6 +141,28 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain, return SDValue(); } +// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), +// deciding whether to use a loop or straight-line code. +static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain, + SDValue Src1, SDValue Src2, uint64_t Size) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + EVT PtrVT = Src1.getValueType(); + // A two-CLC sequence is a clear win over a loop, not least because it + // needs only one branch. A three-CLC sequence needs the same number + // of branches as a loop (i.e. 2), but is shorter. That brings us to + // lengths greater than 768 bytes. It seems relatively likely that + // a difference will be found within the first 768 bytes, so we just + // optimize for the smallest number of branch instructions, in order + // to avoid polluting the prediction buffer too much. A loop only ever + // needs 2 branches, whereas a straight-line sequence would need 3 or more. + if (Size > 3 * 256) + return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, PtrVT), + DAG.getConstant(Size / 256, PtrVT)); + return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, + DAG.getConstant(Size, PtrVT)); +} + // Convert the current CC value into an integer that is 0 if CC == 0, // less than zero if CC == 1 and greater than zero if CC >= 2. // The sequence starts with IPM, which puts CC into bits 29 and 28 @@ -159,17 +181,12 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { - EVT PtrVT = Src1.getValueType(); if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) { uint64_t Bytes = CSize->getZExtValue(); - if (Bytes >= 1 && Bytes <= 0x100) { - // A single CLC. - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, - Src1, Src2, Size, DAG.getConstant(0, PtrVT)); - SDValue Glue = Chain.getValue(1); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); - } + assert(Bytes > 0 && "Caller should have handled 0-size case"); + Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); + SDValue Glue = Chain.getValue(1); + return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); } return std::make_pair(SDValue(), SDValue()); } |