summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Gerbarg <lgg@apple.com>2014-05-29 20:29:47 +0000
committerLouis Gerbarg <lgg@apple.com>2014-05-29 20:29:47 +0000
commit78874456fc58ca993635e31ef5ab5517769efb93 (patch)
tree6db8242763d3f6ab8c7eae30d9e347fd01611deb
parent960fc3503fed972763cbd21ffd03b85ef2e58c82 (diff)
downloadllvm-78874456fc58ca993635e31ef5ab5517769efb93.tar.gz
llvm-78874456fc58ca993635e31ef5ab5517769efb93.tar.bz2
llvm-78874456fc58ca993635e31ef5ab5517769efb93.tar.xz
Add support for combining GEPs across PHI nodes
Currently LLVM will generally merge GEPs. This allows backends to use more complex addressing modes. In some cases this is not happening because there is PHI inbetween the two GEPs: GEP1--\ |-->PHI1-->GEP3 GEP2--/ This patch checks to see if GEP1 and GEP2 are similiar enough that they can be cloned (GEP12) in GEP3's BB, allowing GEP->GEP merging (GEP123): GEP1--\ --\ --\ |-->PHI1-->GEP3 ==> |-->PHI2->GEP12->GEP3 == > |-->PHI2->GEP123 GEP2--/ --/ --/ This also breaks certain use chains that are preventing GEP->GEP merges that the the existing instcombine would merge otherwise. Tests included. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209843 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp79
-rw-r--r--test/Transforms/InstCombine/gepphigep.ll56
2 files changed, 135 insertions, 0 deletions
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4c36887f62..c72d099d9f 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1220,6 +1220,85 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (MadeChange) return &GEP;
}
+ // Check to see if the inputs to the PHI node are getelementptr instructions.
+ if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
+ GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
+ if (!Op1)
+ return nullptr;
+
+ signed DI = -1;
+
+ for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
+ GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I);
+ if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
+ return nullptr;
+
+ for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
+ if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
+ return nullptr;
+
+ if (Op1->getOperand(J) != Op2->getOperand(J)) {
+ if (DI == -1) {
+ // We have not seen any differences yet in the GEPs feeding the
+ // PHI yet, so we record this one if it is allowed to be a
+ // variable.
+
+ // The first two arguments can vary for any GEP, the rest have to be
+ // static for struct slots
+ if (J > 1) {
+ SmallVector<Value*, 8> Idxs(GEP.idx_begin(), GEP.idx_begin()+J-1);
+ Type *Ty =
+ GetElementPtrInst::getIndexedType(Op1->getOperand(0)->getType(),
+ Idxs);
+ if (Ty->isStructTy())
+ return nullptr;
+ }
+
+ DI = J;
+ } else {
+ // The GEP is different by more than one input. While this could be
+ // extended to support GEPs that vary by more than one variable it
+ // doesn't make sense since it greatly increases the complexity and
+ // would result in an R+R+R addressing mode which no backend
+ // directly supports and would need to be broken into several
+ // simpler instructions anyway.
+ return nullptr;
+ }
+ }
+ }
+ }
+
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+
+ if (DI == -1) {
+ // All the GEPs feeding the PHI are identical. Clone one down into our
+ // BB so that it can be merged with the current GEP.
+ GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
+ NewGEP);
+ } else {
+ // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
+ // into the current block so it can be merged, and create a new PHI to
+ // set that index.
+ Instruction *InsertPt = Builder->GetInsertPoint();
+ Builder->SetInsertPoint(PN);
+ PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+ PN->getNumOperands());
+ Builder->SetInsertPoint(InsertPt);
+
+ for (auto &I : PN->operands())
+ NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
+ PN->getIncomingBlock(I));
+
+ NewGEP->setOperand(DI, NewPN);
+ GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
+ NewGEP);
+ NewGEP->setOperand(DI, NewPN);
+ }
+
+ GEP.setOperand(0, NewGEP);
+ PtrOp = NewGEP;
+ }
+
// Combine Indices - If the source pointer to this getelementptr instruction
// is a getelementptr instruction, combine the indices of the two
// getelementptr instructions into a single instruction.
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
new file mode 100644
index 0000000000..9aab609901
--- /dev/null
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -0,0 +1,56 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+%struct1 = type { %struct2*, i32, i32, i32 }
+%struct2 = type { i32, i32 }
+
+define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+ %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0
+ %tmp1 = load %struct2** %tmp, align 8
+ br i1 %tmp4, label %bb1, label %bb2
+
+bb1:
+ %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9
+ %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0
+ store i32 0, i32* %tmp11, align 4
+ br label %bb3
+
+bb2:
+ %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19
+ %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0
+ store i32 0, i32* %tmp21, align 4
+ br label %bb3
+
+bb3:
+ %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ]
+ %tmp24 = getelementptr inbounds %struct2* %phi, i64 0, i32 1
+ %tmp25 = load i32* %tmp24, align 4
+ ret i32 %tmp25
+
+; CHECK-LABEL: @test1(
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: %[[PHI:[0-9A-Za-z]+]] = phi i64 [ %tmp9, %bb1 ], [ %tmp19, %bb2 ]
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %[[PHI]], i32 1
+
+}
+
+define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
+bb:
+ %tmp = getelementptr inbounds %struct1* %dm, i64 0, i32 0
+ %tmp1 = load %struct2** %tmp, align 8
+ %tmp10 = getelementptr inbounds %struct2* %tmp1, i64 %tmp9
+ %tmp11 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 0
+ store i32 0, i32* %tmp11, align 4
+ %tmp20 = getelementptr inbounds %struct2* %tmp1, i64 %tmp19
+ %tmp21 = getelementptr inbounds %struct2* %tmp20, i64 0, i32 0
+ store i32 0, i32* %tmp21, align 4
+ %tmp24 = getelementptr inbounds %struct2* %tmp10, i64 0, i32 1
+ %tmp25 = load i32* %tmp24, align 4
+ ret i32 %tmp25
+
+; CHECK-LABEL: @test2(
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp19, i32 0
+; CHECK: getelementptr inbounds %struct2* %tmp1, i64 %tmp9, i32 1
+}