summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/LiveIntervalAnalysis.h20
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp67
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp55
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll50
4 files changed, 189 insertions, 3 deletions
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index bf7469093a..1e8dde1255 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -165,6 +165,26 @@ namespace llvm {
bool shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead = 0);
+ /// extendToIndices - Extend the live range of LI to reach all points in
+ /// Indices. The points in the Indices array must be jointly dominated by
+ /// existing defs in LI. PHI-defs are added as needed to maintain SSA form.
+ ///
+ /// If a SlotIndex in Indices is the end index of a basic block, LI will be
+ /// extended to be live out of the basic block.
+ ///
+ /// See also LiveRangeCalc::extend().
+ void extendToIndices(LiveInterval *LI, ArrayRef<SlotIndex> Indices);
+
+ /// pruneValue - If an LI value is live at Kill, prune its live range by
+ /// removing any liveness reachable from Kill. Add live range end points to
+ /// EndPoints such that extendToIndices(LI, EndPoints) will reconstruct the
+ /// value's live range.
+ ///
+ /// Calling pruneValue() and extendToIndices() can be used to reconstruct
+ /// SSA form after adding defs to a virtual register.
+ void pruneValue(LiveInterval *LI, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints);
+
SlotIndexes *getSlotIndexes() const {
return Indexes;
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index f7d601e23e..6c26b79f11 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -731,6 +731,73 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
return CanSeparate;
}
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+ ArrayRef<SlotIndex> Indices) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints) {
+ LiveRangeQuery LRQ(*LI, Kill);
+ assert (!LRQ.valueDefined() && "Can't prune value at the defining instr");
+
+ // Also can't prune a value that isn't there.
+ VNInfo *VNI = LRQ.valueOut();
+ if (!VNI)
+ return;
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+ SlotIndex MBBStart, MBBEnd;
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+ // If VNI isn't live out from KillMBB, the value is trivially pruned.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(Kill, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ return;
+ }
+
+ // VNI is live out of KillMBB.
+ LI->removeRange(Kill, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+
+ // Find all blocks that are reachable from MBB without leaving VNI's live
+ // range.
+ for (df_iterator<MachineBasicBlock*>
+ I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) {
+ MachineBasicBlock *MBB = *I;
+ // KillMBB itself was already handled.
+ if (MBB == KillMBB) {
+ ++I;
+ continue;
+ }
+
+ // Check if VNI is live in to MBB.
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveRangeQuery LRQ(*LI, MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI live range. Prune the search.
+ I.skipChildren();
+ continue;
+ }
+
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
+
+ // VNI is live through MBB.
+ LI->removeRange(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
+ }
+}
//===----------------------------------------------------------------------===//
// Register allocator hooks.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 85a62b2645..67cad14afd 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1277,6 +1277,11 @@ public:
/// Returns false if any conflicts were impossible to resolve.
bool resolveConflicts(JoinVals &Other);
+ /// Prune the live range of values in Other.LI where they would conflict with
+ /// CR_Replace values in LI. Collect end points for restoring the live range
+ /// after joining.
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
/// Erase any machine instructions that have been coalesced away.
/// Add erased instructions to ErasedInstrs.
/// Add foreign virtual registers to ShrinkRegs if their live range ended at
@@ -1421,6 +1426,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// We have overlapping values, or possibly a kill of Other.
// Recursively compute assignments up the dominator tree.
Other.computeAssignment(V.OtherVNI->id, *this);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
// Don't attempt resolving PHI values for now.
if (VNI->isPHIDef())
@@ -1435,7 +1441,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
if (CP.isCoalescable(DefMI)) {
// Some of the lanes copied from OtherVNI may be undef, making them undef
// here too.
- V.ValidLanes &= ~V.WriteLanes | Other.Vals[V.OtherVNI->id].ValidLanes;
+ V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
return CR_Erase;
}
@@ -1453,7 +1459,23 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
stripCopies(VNI) == stripCopies(V.OtherVNI))
return CR_Erase;
- // FIXME: Identify CR_Replace opportunities.
+ // If the lanes written by this instruction were all undef in OtherVNI, it is
+ // still safe to join the live ranges. This can't be done with a simple value
+ // mapping, though - OtherVNI will map to multiple values:
+ //
+ // 1 %dst:ssub0 = FOO <-- OtherVNI
+ // 2 %src = BAR <-- VNI
+ // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
+ // 4 BAZ %dst<kill>
+ // 5 QUUX %src<kill>
+ //
+ // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+ // handles this complex value mapping.
+ if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+ return CR_Replace;
+
+ // FIXME: Identify CR_Replace opportunities where the clobbered lanes are
+ // dead.
return CR_Impossible;
}
@@ -1514,6 +1536,18 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
return true;
}
+void JoinVals::pruneValues(JoinVals &Other,
+ SmallVectorImpl<SlotIndex> &EndPoints) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ if (Vals[i].Resolution != CR_Replace)
+ continue;
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+ << ": " << Other.LI << '\n');
+ }
+}
+
void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
@@ -1557,6 +1591,14 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// All clear, the live ranges can be merged.
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints);
+ RHSVals.pruneValues(LHSVals, EndPoints);
+
// Erase COPY and IMPLICIT_DEF instructions. This may cause some external
// registers to require trimming.
SmallVector<unsigned, 8> ShrinkRegs;
@@ -1574,6 +1616,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// ranges. They are reinserted after register allocation.
MRI->clearKillFlags(LHS.reg);
MRI->clearKillFlags(RHS.reg);
+
+ if (EndPoints.empty())
+ return true;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LHS << '\n');
+ LIS->extendToIndices(&LHS, EndPoints);
return true;
}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index fb0f4c67c9..dfb5b17306 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a9 -new-coalescer | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios0.0.0"
@@ -66,3 +66,51 @@ do.end: ; preds = %do.body
declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+
+; CHECK: f3
+; This function has lane insertions that span basic blocks.
+; The trivial REG_SEQUENCE lowering can't handle that, but the coalescer can.
+;
+; void f3(float *p, float *q) {
+; float32x2_t x;
+; x[1] = p[3];
+; if (q)
+; x[0] = q[0] + q[1];
+; else
+; x[0] = p[2];
+; vst1_f32(p+4, x);
+; }
+;
+; CHECK-NOT: vmov
+; CHECK-NOT: vorr
+define void @f3(float* %p, float* %q) nounwind ssp {
+entry:
+ %arrayidx = getelementptr inbounds float* %p, i32 3
+ %0 = load float* %arrayidx, align 4
+ %vecins = insertelement <2 x float> undef, float %0, i32 1
+ %tobool = icmp eq float* %q, null
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %1 = load float* %q, align 4
+ %arrayidx2 = getelementptr inbounds float* %q, i32 1
+ %2 = load float* %arrayidx2, align 4
+ %add = fadd float %1, %2
+ %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
+ br label %if.end
+
+if.else: ; preds = %entry
+ %arrayidx4 = getelementptr inbounds float* %p, i32 2
+ %3 = load float* %arrayidx4, align 4
+ %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
+ %add.ptr = getelementptr inbounds float* %p, i32 4
+ %4 = bitcast float* %add.ptr to i8*
+ tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind