diff options
-rw-r--r-- | lib/Transforms/Scalar/InstructionCombining.cpp | 15 | ||||
-rw-r--r-- | test/Transforms/InstCombine/phi-merge-gep.ll | 101 |
2 files changed, 115 insertions, 1 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 47a02bd842..006d67b603 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -326,7 +326,7 @@ namespace { // instruction. Instead, visit methods should return the value returned by // this function. Instruction *EraseInstFromFunction(Instruction &I) { - DEBUG(errs() << "IC: erase " << I); + DEBUG(errs() << "IC: erase " << I << '\n'); assert(I.use_empty() && "Cannot erase instruction that is used!"); // Make sure that we reprocess all operands now that we reduced their @@ -10401,6 +10401,10 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // This is true if all GEP bases are allocas and if all indices into them are // constants. bool AllBasePointersAreAllocas = true; + + // We don't want to replace this phi if the replacement would require + // more than one phi. + bool NeededPhi = false; // Scan to see if all operands are the same opcode, all have one use, and all // kill their operands (i.e. the operands have one use). @@ -10432,7 +10436,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) return 0; + + // If we already needed a PHI for an earlier operand, and another operand + // also requires a PHI, we'd be introducing more PHIs than we're + // eliminating, which increases register pressure on entry to the PHI's + // block. + if (NeededPhi) + return 0; + FixedOperands[op] = 0; // Needs a PHI. + NeededPhi = true; } } diff --git a/test/Transforms/InstCombine/phi-merge-gep.ll b/test/Transforms/InstCombine/phi-merge-gep.ll new file mode 100644 index 0000000000..73c770e08c --- /dev/null +++ b/test/Transforms/InstCombine/phi-merge-gep.ll @@ -0,0 +1,101 @@ +; RUN: opt < %s -S -instcombine > %t +; RUN: grep {= getelementptr} %t | count 20 +; RUN: grep {= phi} %t | count 13 + +; Don't push the geps through these phis; they have multiple uses! + +define void @foo(float* %Ar, float* %Ai, i64 %As, float* %Cr, float* %Ci, i64 %Cs, i64 %n) nounwind { +entry: + %0 = getelementptr inbounds float* %Ar, i64 0 ; <float*> [#uses=1] + %1 = getelementptr inbounds float* %Ai, i64 0 ; <float*> [#uses=1] + %2 = mul i64 %n, %As ; <i64> [#uses=1] + %3 = getelementptr inbounds float* %Ar, i64 %2 ; <float*> [#uses=1] + %4 = mul i64 %n, %As ; <i64> [#uses=1] + %5 = getelementptr inbounds float* %Ai, i64 %4 ; <float*> [#uses=1] + %6 = mul i64 %n, 2 ; <i64> [#uses=1] + %7 = mul i64 %6, %As ; <i64> [#uses=1] + %8 = getelementptr inbounds float* %Ar, i64 %7 ; <float*> [#uses=1] + %9 = mul i64 %n, 2 ; <i64> [#uses=1] + %10 = mul i64 %9, %As ; <i64> [#uses=1] + %11 = getelementptr inbounds float* %Ai, i64 %10 ; <float*> [#uses=1] + %12 = getelementptr inbounds float* %Cr, i64 0 ; <float*> [#uses=1] + %13 = getelementptr inbounds float* %Ci, i64 0 ; <float*> [#uses=1] + %14 = mul i64 %n, %Cs ; <i64> [#uses=1] + %15 = getelementptr inbounds float* %Cr, i64 %14 ; <float*> [#uses=1] + %16 = mul i64 %n, %Cs ; <i64> [#uses=1] + %17 = getelementptr inbounds float* %Ci, i64 %16 ; <float*> [#uses=1] + %18 = mul i64 %n, 2 ; <i64> [#uses=1] + %19 = mul i64 %18, %Cs ; <i64> [#uses=1] + %20 = getelementptr inbounds float* %Cr, i64 %19 ; <float*> [#uses=1] + %21 = mul i64 %n, 2 ; <i64> [#uses=1] + %22 = mul i64 %21, %Cs ; <i64> [#uses=1] + %23 = getelementptr inbounds float* %Ci, i64 %22 ; <float*> [#uses=1] + br label %bb13 + +bb: ; preds = %bb13 + %24 = load float* %A0r.0, align 4 ; <float> [#uses=1] + %25 = load float* %A0i.0, align 4 ; <float> [#uses=1] + %26 = load float* %A1r.0, align 4 ; <float> [#uses=2] + %27 = load float* %A1i.0, align 4 ; <float> [#uses=2] + %28 = load float* %A2r.0, align 4 ; <float> [#uses=2] + %29 = load float* %A2i.0, align 4 ; <float> [#uses=2] + %30 = fadd float %26, %28 ; <float> [#uses=2] + %31 = fadd float %27, %29 ; <float> [#uses=2] + %32 = fsub float %26, %28 ; <float> [#uses=1] + %33 = fsub float %27, %29 ; <float> [#uses=1] + %34 = fadd float %24, %30 ; <float> [#uses=2] + %35 = fadd float %25, %31 ; <float> [#uses=2] + %36 = fmul float %30, -1.500000e+00 ; <float> [#uses=1] + %37 = fmul float %31, -1.500000e+00 ; <float> [#uses=1] + %38 = fadd float %34, %36 ; <float> [#uses=2] + %39 = fadd float %35, %37 ; <float> [#uses=2] + %40 = fmul float %32, 0x3FEBB67AE0000000 ; <float> [#uses=2] + %41 = fmul float %33, 0x3FEBB67AE0000000 ; <float> [#uses=2] + %42 = fadd float %38, %41 ; <float> [#uses=1] + %43 = fsub float %39, %40 ; <float> [#uses=1] + %44 = fsub float %38, %41 ; <float> [#uses=1] + %45 = fadd float %39, %40 ; <float> [#uses=1] + store float %34, float* %C0r.0, align 4 + store float %35, float* %C0i.0, align 4 + store float %42, float* %C1r.0, align 4 + store float %43, float* %C1i.0, align 4 + store float %44, float* %C2r.0, align 4 + store float %45, float* %C2i.0, align 4 + %46 = getelementptr inbounds float* %A0r.0, i64 %As ; <float*> [#uses=1] + %47 = getelementptr inbounds float* %A0i.0, i64 %As ; <float*> [#uses=1] + %48 = getelementptr inbounds float* %A1r.0, i64 %As ; <float*> [#uses=1] + %49 = getelementptr inbounds float* %A1i.0, i64 %As ; <float*> [#uses=1] + %50 = getelementptr inbounds float* %A2r.0, i64 %As ; <float*> [#uses=1] + %51 = getelementptr inbounds float* %A2i.0, i64 %As ; <float*> [#uses=1] + %52 = getelementptr inbounds float* %C0r.0, i64 %Cs ; <float*> [#uses=1] + %53 = getelementptr inbounds float* %C0i.0, i64 %Cs ; <float*> [#uses=1] + %54 = getelementptr inbounds float* %C1r.0, i64 %Cs ; <float*> [#uses=1] + %55 = getelementptr inbounds float* %C1i.0, i64 %Cs ; <float*> [#uses=1] + %56 = getelementptr inbounds float* %C2r.0, i64 %Cs ; <float*> [#uses=1] + %57 = getelementptr inbounds float* %C2i.0, i64 %Cs ; <float*> [#uses=1] + %58 = add nsw i64 %i.0, 1 ; <i64> [#uses=1] + br label %bb13 + +bb13: ; preds = %bb, %entry + %i.0 = phi i64 [ 0, %entry ], [ %58, %bb ] ; <i64> [#uses=2] + %C2i.0 = phi float* [ %23, %entry ], [ %57, %bb ] ; <float*> [#uses=2] + %C2r.0 = phi float* [ %20, %entry ], [ %56, %bb ] ; <float*> [#uses=2] + %C1i.0 = phi float* [ %17, %entry ], [ %55, %bb ] ; <float*> [#uses=2] + %C1r.0 = phi float* [ %15, %entry ], [ %54, %bb ] ; <float*> [#uses=2] + %C0i.0 = phi float* [ %13, %entry ], [ %53, %bb ] ; <float*> [#uses=2] + %C0r.0 = phi float* [ %12, %entry ], [ %52, %bb ] ; <float*> [#uses=2] + %A2i.0 = phi float* [ %11, %entry ], [ %51, %bb ] ; <float*> [#uses=2] + %A2r.0 = phi float* [ %8, %entry ], [ %50, %bb ] ; <float*> [#uses=2] + %A1i.0 = phi float* [ %5, %entry ], [ %49, %bb ] ; <float*> [#uses=2] + %A1r.0 = phi float* [ %3, %entry ], [ %48, %bb ] ; <float*> [#uses=2] + %A0i.0 = phi float* [ %1, %entry ], [ %47, %bb ] ; <float*> [#uses=2] + %A0r.0 = phi float* [ %0, %entry ], [ %46, %bb ] ; <float*> [#uses=2] + %59 = icmp slt i64 %i.0, %n ; <i1> [#uses=1] + br i1 %59, label %bb, label %bb14 + +bb14: ; preds = %bb13 + br label %return + +return: ; preds = %bb14 + ret void +} |