summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp43
-rw-r--r--test/CodeGen/X86/sse-domains.ll45
2 files changed, 80 insertions, 8 deletions
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index c25f7db26c..fc0b612464 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -160,7 +160,7 @@ private:
void collapse(DomainValue *dv, unsigned domain);
bool merge(DomainValue *A, DomainValue *B);
- void enterBasicBlock(MachineBasicBlock*);
+ bool enterBasicBlock(MachineBasicBlock*);
void leaveBasicBlock(MachineBasicBlock*);
void visitInstr(MachineInstr*);
void visitGenericInstr(MachineInstr*);
@@ -317,7 +317,13 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
return true;
}
-void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+// Return true if some predecessor hasn't been processed yet (like on a loop
+// back-edge).
+bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+ // Detect back-edges from predecessors we haven't processed yet.
+ bool seenBackEdge = false;
+
// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
@@ -326,7 +332,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
pe = MBB->pred_end(); pi != pe; ++pi) {
LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) continue;
+ if (fi == LiveOuts.end()) {
+ seenBackEdge = true;
+ continue;
+ }
+ if (!fi->second)
+ continue;
DomainValue *pdv = resolve(fi->second[rx]);
if (!pdv) continue;
if (!LiveRegs || !LiveRegs[rx]) {
@@ -350,12 +361,19 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
force(rx, pdv->getFirstDomain());
}
}
+ return seenBackEdge;
}
void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
// Save live registers at end of MBB - used by enterBasicBlock().
- if (LiveRegs)
- LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ // Also use LiveOuts as a visited set to detect back-edges.
+ if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) {
+ // Insertion failed, this must be the second pass.
+ // Release all the DomainValues instead of keeping them.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ release(LiveRegs[i]);
+ delete[] LiveRegs;
+ }
LiveRegs = 0;
}
@@ -545,23 +563,32 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock *Entry = MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+ SmallVector<MachineBasicBlock*, 16> Loops;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
MachineBasicBlock *MBB = *MBBI;
- enterBasicBlock(MBB);
+ if (enterBasicBlock(MBB))
+ Loops.push_back(MBB);
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I)
visitInstr(I);
leaveBasicBlock(MBB);
}
+ // Visit all the loop blocks again in order to merge DomainValues from
+ // back-edges.
+ for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Loops[i];
+ enterBasicBlock(MBB);
+ leaveBasicBlock(MBB);
+ }
+
// Clear the LiveOuts vectors and collapse any remaining DomainValues.
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
- if (FI == LiveOuts.end())
+ if (FI == LiveOuts.end() || !FI->second)
continue;
- assert(FI->second && "Null entry");
for (unsigned i = 0, e = NumRegs; i != e; ++i)
if (FI->second[i])
release(FI->second[i]);
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
new file mode 100644
index 0000000000..d26d32287e
--- /dev/null
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+; CHECK: f
+;
+; This function contains load / store / and operations that all can execute in
+; any domain. The only domain-specific operation is the %add = shl... operation
+; which is <4 x i32>.
+;
+; The paddd instruction can only influence the other operations through the loop
+; back-edge. Check that everything is still moved into the integer domain.
+
+define void @f(<4 x i32>* nocapture %p, i32 %n) nounwind uwtable ssp {
+entry:
+ br label %while.body
+
+; Materialize a zeroinitializer and a constant-pool load in the integer domain.
+; The order is not important.
+; CHECK: pxor
+; CHECK: movdqa
+
+; The instructions in the loop must all be integer domain as well.
+; CHECK: while.body
+; CHECK: pand
+; CHECK: movdqa
+; CHECK: movdqa
+; Finally, the controlling integer-only instruction.
+; CHECK: paddd
+while.body:
+ %p.addr.04 = phi <4 x i32>* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+ %n.addr.03 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+ %x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ]
+ %dec = add nsw i32 %n.addr.03, -1
+ %and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
+ %incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1
+ store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
+ %0 = load <4 x i32>* %incdec.ptr, align 16
+ %add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+ ret void
+}