summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorYi Jiang <yjiang@apple.com>2014-04-21 19:34:27 +0000
committerYi Jiang <yjiang@apple.com>2014-04-21 19:34:27 +0000
commit5d473a08317ce9f150701c1a3cdb2e3df10922fc (patch)
treee85ee6ff6b17d4d9997f994d4982e3dfb72d4df7 /lib
parent59b626b938925200c7df6c534e4736949490bd77 (diff)
downloadllvm-5d473a08317ce9f150701c1a3cdb2e3df10922fc.tar.gz
llvm-5d473a08317ce9f150701c1a3cdb2e3df10922fc.tar.bz2
llvm-5d473a08317ce9f150701c1a3cdb2e3df10922fc.tar.xz
ARM64: Combine shifts and uses from different basic block to bit-extract instruction
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206774 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp192
-rw-r--r--lib/Target/ARM64/ARM64ISelDAGToDAG.cpp40
-rw-r--r--lib/Target/ARM64/ARM64ISelLowering.cpp2
3 files changed, 231 insertions, 3 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 009e153e0e..27fdd5187b 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -628,6 +628,187 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
return MadeChange;
}
+/// isExtractBitsCandidateUse - Check if the candidates could
+/// be combined with shift instruction, which includes:
+/// 1. Truncate instruction
+/// 2. And instruction and the imm is a mask of the low bits:
+/// imm & (imm+1) == 0
+bool isExtractBitsCandidateUse(Instruction *User) {
+ if (!isa<TruncInst>(User)) {
+ if (User->getOpcode() != Instruction::And ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return false;
+
+ unsigned Cimm = dyn_cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+
+ if (Cimm & (Cimm + 1))
+ return false;
+ }
+ return true;
+}
+
+/// SinkShiftAndTruncate - sink both shift and truncate instruction
+/// to the use of truncate's BB.
+bool
+SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
+ DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
+ const TargetLowering &TLI) {
+ BasicBlock *UserBB = User->getParent();
+ DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
+ TruncInst *TruncI = dyn_cast<TruncInst>(User);
+ bool MadeChange = false;
+
+ for (Value::user_iterator TruncUI = TruncI->user_begin(),
+ TruncE = TruncI->user_end();
+ TruncUI != TruncE;) {
+
+ Use &TruncTheUse = TruncUI.getUse();
+ Instruction *TruncUser = cast<Instruction>(*TruncUI);
+ // Preincrement use iterator so we don't invalidate it.
+
+ ++TruncUI;
+
+ int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
+ if (!ISDOpcode)
+ continue;
+
+ // If the use is actually a legal node, there will not be an implicit
+ // truncate.
+ if (TLI.isOperationLegalOrCustom(ISDOpcode,
+ EVT::getEVT(TruncUser->getType())))
+ continue;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(TruncUser))
+ continue;
+
+ BasicBlock *TruncUserBB = TruncUser->getParent();
+
+ if (UserBB == TruncUserBB)
+ continue;
+
+ BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
+ CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
+
+ if (!InsertedShift && !InsertedTrunc) {
+ BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ // Sink the shift
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ else
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+
+ // Sink the trunc
+ BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
+ TruncInsertPt++;
+
+ InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
+ TruncI->getType(), "", TruncInsertPt);
+
+ MadeChange = true;
+
+ TruncTheUse = InsertedTrunc;
+ }
+ }
+ return MadeChange;
+}
+
+/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
+/// the uses could potentially be combined with this shift instruction and
+/// generate BitExtract instruction. It will only be applied if the architecture
+/// supports BitExtract instruction. Here is an example:
+/// BB1:
+/// %x.extract.shift = lshr i64 %arg1, 32
+/// BB2:
+/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
+/// ==>
+///
+/// BB2:
+/// %x.extract.shift.1 = lshr i64 %arg1, 32
+/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
+///
+/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
+/// instruction.
+/// Return true if any changes are made.
+static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
+ const TargetLowering &TLI) {
+ BasicBlock *DefBB = ShiftI->getParent();
+
+ /// Only insert instructions in each block once.
+ DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
+
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ if (!isExtractBitsCandidateUse(User))
+ continue;
+
+ BasicBlock *UserBB = User->getParent();
+
+ if (UserBB == DefBB) {
+ // If the shift and truncate instruction are in the same BB. The use of
+ // the truncate(TruncUse) may still introduce another truncate if not
+ // legal. In this case, we would like to sink both shift and truncate
+ // instruction to the BB of TruncUse.
+ // for example:
+ // BB1:
+ // i64 shift.result = lshr i64 opnd, imm
+ // trunc.result = trunc shift.result to i16
+ //
+ // BB2:
+ // ----> We will have an implicit truncate here if the architecture does
+ // not have i16 compare.
+ // cmp i16 trunc.result, opnd2
+ //
+ if (isa<TruncInst>(User) && shiftIsLegal
+ // If the type of the truncate is legal, no trucate will be
+ // introduced in other basic blocks.
+ && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
+ MadeChange =
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
+
+ continue;
+ }
+ // If we have already inserted a shift into this block, use it.
+ BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
+
+ if (!InsertedShift) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ else
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+
+ MadeChange = true;
+ }
+
+ // Replace a use of the shift with a use of the new shift.
+ TheUse = InsertedShift;
+ }
+
+ // If we removed all uses, nuke the shift.
+ if (ShiftI->use_empty())
+ ShiftI->eraseFromParent();
+
+ return MadeChange;
+}
+
namespace {
class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
protected:
@@ -3225,6 +3406,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
return false;
}
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+
+ if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
+ BinOp->getOpcode() == Instruction::LShr)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (TLI && CI && TLI->hasExtractBitsInsn())
+ return OptimizeExtractBits(BinOp, CI, *TLI);
+
+ return false;
+ }
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
index 8ef13e781b..f96249937e 100644
--- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
+++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
@@ -1183,6 +1183,14 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
// Make sure to clamp the MSB so that we preserve the semantics of the
// original operations.
ClampMSB = true;
+ } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
+ isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ // If the shift result was truncated, we can still combine them.
+ Opd0 = Op0->getOperand(0).getOperand(0);
+
+ // Use the type of SRL node.
+ VT = Opd0->getValueType(0);
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
Opd0 = Op0->getOperand(0);
} else if (BiggerPattern) {
@@ -1277,8 +1285,19 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
// we're looking for a shift of a shift
uint64_t Shl_imm = 0;
+ uint64_t Trunc_bits = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
Opd0 = N->getOperand(0).getOperand(0);
+ } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
+ N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
+ // We are looking for a shift of truncate. Truncate from i64 to i32 could
+ // be considered as setting high 32 bits as zero. Our strategy here is to
+ // always generate 64bit UBFM. This consistency will help the CSE pass
+ // later find more redundancy.
+ Opd0 = N->getOperand(0).getOperand(0);
+ Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
+ VT = Opd0->getValueType(0);
+ assert(VT == MVT::i64 && "the promoted type should be i64");
} else if (BiggerPattern) {
// Let's pretend a 0 shift left has been performed.
// FIXME: Currently we limit this to the bigger pattern case,
@@ -1295,7 +1314,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
"bad amount in shift node!");
// Note: The width operand is encoded as width-1.
- unsigned Width = VT.getSizeInBits() - Srl_imm - 1;
+ unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
int sLSB = Srl_imm - Shl_imm;
if (sLSB < 0)
return false;
@@ -1354,8 +1373,23 @@ SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
return NULL;
EVT VT = N->getValueType(0);
- SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT) };
+
+ // If the bit extract operation is 64bit but the original type is 32bit, we
+ // need to add one EXTRACT_SUBREG.
+ if ((Opc == ARM64::SBFMXri || Opc == ARM64::UBFMXri) && VT == MVT::i32) {
+ SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
+ CurDAG->getTargetConstant(MSB, MVT::i64)};
+
+ SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
+ SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
+ MachineSDNode *Node =
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
+ SDValue(BFM, 0), SubReg);
+ return Node;
+ }
+
+ SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
+ CurDAG->getTargetConstant(MSB, VT)};
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3);
}
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp
index 7fee5646d1..ee498e65c7 100644
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/ARM64/ARM64ISelLowering.cpp
@@ -438,6 +438,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
setDivIsWellDefined(true);
RequireStrictAlign = StrictAlign;
+
+ setHasExtractBitsInsn(true);
}
void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {