diff options
-rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 235 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/s000-alias-misched.ll | 101 |
2 files changed, 244 insertions, 92 deletions
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ae7697242c..ef33b12367 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -86,56 +86,77 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { } while (1); } -/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject +/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. -static const Value *getUnderlyingObject(const Value *V) { - // First just call Value::getUnderlyingObject to let it do what it does. +static void getUnderlyingObjects(const Value *V, + SmallVectorImpl<Value *> &Objects) { + SmallPtrSet<const Value*, 16> Visited; + SmallVector<const Value *, 4> Working(1, V); do { - V = GetUnderlyingObject(V); - // If it found an inttoptr, use special code to continue climing. - if (Operator::getOpcode(V) != Instruction::IntToPtr) - break; - const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); - // If that succeeded in finding a pointer, continue the search. - if (!O->getType()->isPointerTy()) - break; - V = O; - } while (1); - return V; + V = Working.pop_back_val(); + + SmallVector<Value *, 4> Objs; + GetUnderlyingObjects(const_cast<Value *>(V), Objs); + + for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + V = *I; + if (!Visited.insert(V)) + continue; + if (Operator::getOpcode(V) == Instruction::IntToPtr) { + const Value *O = + getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); + if (O->getType()->isPointerTy()) { + Working.push_back(O); + continue; + } + } + Objects.push_back(const_cast<Value *>(V)); + } + } while (!Working.empty()); } -/// getUnderlyingObjectForInstr - If this machine instr has memory reference +/// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known -/// object, return the Value for that object. Otherwise return null. -static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, - bool &MayAlias) { - MayAlias = true; +/// object, return the Value for that object. +static void getUnderlyingObjectsForInstr(const MachineInstr *MI, + const MachineFrameInfo *MFI, + SmallVectorImpl<std::pair<const Value *, bool> > &Objects) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) - return 0; + return; const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) - return 0; - - V = getUnderlyingObject(V); - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (PSV->isAliased(MFI)) - return 0; - - MayAlias = PSV->mayAlias(MFI); - return V; - } + return; + + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs); + + for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + bool MayAlias = true; + V = *I; + + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + + if (PSV->isAliased(MFI)) { + Objects.clear(); + return; + } - if (isIdentifiedObject(V)) - return V; + MayAlias = PSV->mayAlias(MFI); + } else if (!isIdentifiedObject(V)) { + Objects.clear(); + return; + } - return 0; + Objects.push_back(std::make_pair(V, MayAlias)); + } } void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { @@ -453,23 +474,29 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, if ((*MI->memoperands_begin())->isVolatile() || MI->hasUnmodeledSideEffects()) return true; - const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return true; - V = getUnderlyingObject(V); - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - // Similarly to getUnderlyingObjectForInstr: - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (PSV->isAliased(MFI)) + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs); + for (SmallVector<Value *, 4>::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { + V = *I; + + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return true; + } + + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) return true; } - // Does this pointer refer to a distinct and identifiable object? - if (!isIdentifiedObject(V)) - return true; return false; } @@ -821,59 +848,70 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { - bool MayAlias = true; - if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { + SmallVector<std::pair<const Value *, bool>, 4> Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + + if (Objs.empty()) { + // Treat all other stores conservatively. + goto new_alias_chain; + } + + bool MayAlias = false; + for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { + const Value *V = K->first; + bool ThisMayAlias = K->second; + if (ThisMayAlias) + MayAlias = true; + // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. MapVector<const Value *, SUnit *>::iterator I = - ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); MapVector<const Value *, SUnit *>::iterator IE = - ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); I->second = SU; } else { - if (MayAlias) + if (ThisMayAlias) AliasMemDefs[V] = SU; else NonAliasMemDefs[V] = SU; } // Handle the uses in MemUses, if there are any. MapVector<const Value *, std::vector<SUnit *> >::iterator J = - ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); MapVector<const Value *, std::vector<SUnit *> >::iterator JE = - ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } - if (MayAlias) { - // Add dependencies from all the PendingLoads, i.e. loads - // with no underlying object. - for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, - TrueMemOrderLatency); - // Add dependence on alias chain, if needed. - if (AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); - // But we also should check dependent instructions for the - // SU in question. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - } - // Add dependence on barrier chain, if needed. - // There is no point to check aliasing on barrier event. Even if - // SU and barrier _could_ be reordered, they should not. In addition, - // we have lost all RejectMemNodes below barrier. - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); - } else { - // Treat all other stores conservatively. - goto new_alias_chain; } + if (MayAlias) { + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + // Add dependence on alias chain, if needed. + if (AliasChain) + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + // But we also should check dependent instructions for the + // SU in question. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + } + // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); if (!ExitSU.isPred(SU)) // Push store's up a bit to avoid them getting in between cmp @@ -884,20 +922,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - if (const Value *V = - getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { - // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector<const Value *, SUnit *>::iterator I = - ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<const Value *, SUnit *>::iterator IE = - ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); - if (I != IE) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); - if (MayAlias) - AliasMemUses[V].push_back(SU); - else - NonAliasMemUses[V].push_back(SU); - } else { + SmallVector<std::pair<const Value *, bool>, 4> Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + + if (Objs.empty()) { // A load with no underlying object. Depend on all // potentially aliasing stores. for (MapVector<const Value *, SUnit *>::iterator I = @@ -906,6 +934,29 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.push_back(SU); MayAlias = true; + } else { + MayAlias = false; + } + + for (SmallVector<std::pair<const Value *, bool>, 4>::iterator + J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { + const Value *V = J->first; + bool ThisMayAlias = J->second; + + if (ThisMayAlias) + MayAlias = true; + + // A load from a specific PseudoSourceValue. Add precise dependencies. + MapVector<const Value *, SUnit *>::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector<const Value *, SUnit *>::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + if (ThisMayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); } if (MayAlias) adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll new file mode 100644 index 0000000000..d03ee8738e --- /dev/null +++ b/test/CodeGen/PowerPC/s000-alias-misched.ll @@ -0,0 +1,101 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" +; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s +; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s + +@aa = external global [256 x [256 x double]], align 32 +@bb = external global [256 x [256 x double]], align 32 +@cc = external global [256 x [256 x double]], align 32 +@.str1 = external hidden unnamed_addr constant [6 x i8], align 1 +@X = external global [16000 x double], align 32 +@Y = external global [16000 x double], align 32 +@Z = external global [16000 x double], align 32 +@U = external global [16000 x double], align 32 +@V = external global [16000 x double], align 32 +@.str137 = external hidden unnamed_addr constant [14 x i8], align 1 + +declare void @check(i32 signext) nounwind + +declare signext i32 @printf(i8* nocapture, ...) nounwind + +declare signext i32 @init(i8*) nounwind + +define signext i32 @s000() nounwind { +entry: + %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) + %call1 = tail call i64 @clock() nounwind + br label %for.cond2.preheader + +; CHECK: @s000 + +for.cond2.preheader: ; preds = %for.end, %entry + %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ] + br label %for.body4 + +for.body4: ; preds = %for.body4, %for.cond2.preheader + %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ] + %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv + %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv + %0 = bitcast double* %arrayidx to <1 x double>* + %1 = load <1 x double>* %0, align 32, !tbaa !0 + %add = fadd <1 x double> %1, <double 1.000000e+00> + %2 = bitcast double* %arrayidx6 to <1 x double>* + store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0 + %indvars.iv.next.322 = or i64 %indvars.iv, 4 + %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322 + %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322 + %3 = bitcast double* %arrayidx.4 to <1 x double>* + %4 = load <1 x double>* %3, align 32, !tbaa !0 + %add.4 = fadd <1 x double> %4, <double 1.000000e+00> + %5 = bitcast double* %arrayidx6.4 to <1 x double>* + store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0 + %indvars.iv.next.726 = or i64 %indvars.iv, 8 + %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726 + %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726 + %6 = bitcast double* %arrayidx.8 to <1 x double>* + %7 = load <1 x double>* %6, align 32, !tbaa !0 + %add.8 = fadd <1 x double> %7, <double 1.000000e+00> + %8 = bitcast double* %arrayidx6.8 to <1 x double>* + store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0 + %indvars.iv.next.1130 = or i64 %indvars.iv, 12 + %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130 + %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130 + %9 = bitcast double* %arrayidx.12 to <1 x double>* + %10 = load <1 x double>* %9, align 32, !tbaa !0 + %add.12 = fadd <1 x double> %10, <double 1.000000e+00> + %11 = bitcast double* %arrayidx6.12 to <1 x double>* + store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0 + %indvars.iv.next.15 = add i64 %indvars.iv, 16 + %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32 + %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000 + br i1 %exitcond.15, label %for.end, label %for.body4 + +; All of the loads should come before all of the stores. +; CHECK: mtctr +; CHECK: stfd +; CHECK-NOT: lfd +; CHECK: bdnz + +for.end: ; preds = %for.body4 + %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind + %inc9 = add nsw i32 %nl.018, 1 + %exitcond = icmp eq i32 %inc9, 400000 + br i1 %exitcond, label %for.end10, label %for.cond2.preheader + +for.end10: ; preds = %for.end + %call11 = tail call i64 @clock() nounwind + %sub = sub nsw i64 %call11, %call1 + %conv = sitofp i64 %sub to double + %div = fdiv double %conv, 1.000000e+06 + %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind + tail call void @check(i32 signext 1) + ret i32 0 +} + +declare i64 @clock() nounwind + +declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double) + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} |