summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/MachineCSE.cpp69
-rw-r--r--test/CodeGen/ARM/machine-cse-cmp.ll31
-rw-r--r--test/CodeGen/Thumb2/thumb2-cbnz.ll1
-rw-r--r--test/CodeGen/X86/machine-cse.ll24
4 files changed, 107 insertions, 18 deletions
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 8c02cd7ddb..5dd51a7944 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -26,13 +26,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
-
using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
@@ -82,9 +83,11 @@ namespace {
MachineBasicBlock::const_iterator E) const ;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs) const;
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs) const;
+ SmallSet<unsigned,8> &PhysRefs,
+ bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -189,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs) const {
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVector<unsigned,2> &PhysDefs) const{
MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -207,6 +211,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
PhysRefs.insert(Reg);
+ if (MO.isDef())
+ PhysDefs.push_back(Reg);
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
PhysRefs.insert(*Alias);
}
@@ -215,20 +221,39 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs) const {
+ SmallSet<unsigned,8> &PhysRefs,
+ bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
- // not in the same basic block as the given instruction.
- MachineBasicBlock *MBB = MI->getParent();
- if (CSMI->getParent() != MBB)
- return false;
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() == 1 && *MBB->pred_begin() == CSMBB)
+ CrossMBB = true;
+ else
+ return false;
+ }
MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
- while (I != E && I->isDebugValue())
+ while (I != E && I != EE && I->isDebugValue())
++I;
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
if (I == E)
return true;
@@ -393,16 +418,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
SmallSet<unsigned,8> PhysRefs;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+ SmallVector<unsigned, 2> PhysDefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false;
- // ... Unless the CS is local and it also defines the physical register
- // which is not clobbered in between and the physical register uses
- // were not clobbered.
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, CrossMBBPhysDef))
FoundCSE = true;
}
@@ -457,6 +484,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
MRI->clearKillFlags(CSEPairs[i].second);
}
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ unsigned LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn))
+ MBB->addLiveIn(LiveIn);
+ }
+ ++NumCrossBBCSEs;
+ }
+
MI->eraseFromParent();
++NumCSEs;
if (!PhysRefs.empty())
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f3bc..f566974c23 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
;rdar://8003725
@G1 = external global i32
@@ -6,6 +6,7 @@
define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
entry:
+; CHECK: f1:
; CHECK: cmp
; CHECK: moveq
; CHECK-NOT: cmp
@@ -16,3 +17,31 @@ entry:
%tmp4 = add i32 %tmp2, %tmp3
ret i32 %tmp4
}
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+ %0 = load i32* @foo, align 4
+ %cmp28 = icmp sgt i32 %0, 0
+ br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph: ; preds = %entry
+ %1 = icmp sgt i32 %0, 1
+ %smax = select i1 %1, i32 %0, i32 1
+ call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+ unreachable
+
+for.cond1.preheader: ; preds = %entry
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 7993bbf0ed..893bd0fdae 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -24,7 +24,6 @@ bb7: ; preds = %bb3
bb9: ; preds = %bb7
; CHECK: cmp r0, #0
-; CHECK: cmp r0, #0
; CHECK-NEXT: cbnz
%0 = tail call double @foo(double %b) nounwind readnone ; <double> [#uses=0]
br label %bb11
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d819fc8f6e..a757cde6ab 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
; rdar://7610418
%ptr = type { i8* }
@@ -77,3 +77,25 @@ bb.nph743.us: ; preds = %for.body53.us, %if.
sw.bb307: ; preds = %sw.bb, %entry
ret void
}
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+ %cmp = icmp ugt i32 %a, %b
+ br i1 %cmp, label %return, label %if.end
+
+if.end: ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+ %cmp1 = icmp ult i32 %a, %b
+ %. = sext i1 %cmp1 to i32
+ br label %return
+
+return: ; preds = %if.end, %entry
+ %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+ ret i32 %retval.0
+}