Create an new preheader in PPCCTRLoops to avoid counter register clobbers

Some IR-level instructions (such as FP <-> i64 conversions) are not chained w.r.t. the mtctr intrinsic and yet may become function calls that clobber the counter register. At the selection-DAG level, these might be reordered with the mtctr intrinsic causing miscompiles. To avoid this situation, if an existing preheader has instructions that might use the counter register, create a new preheader for the mtctr intrinsic. This extra block will be remerged with the old preheader at the MI level, but will prevent unwanted reordering at the selection-DAG level. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182045 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2013-05-16 19:58:38 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2013-05-16 19:58:38 +0000
commit: c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80 (patch)
tree: 6880887689abe3c97fd79a896ab470b8da4090b7
parent: 02e168003f45cf8e0a277c6b8c85c1a3032b1dec (diff)
download: llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.gz
llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.bz2
llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.xz
2 files changed, 197 insertions, 153 deletions
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 6bbd571894..5d001fc072 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -96,6 +96,7 @@ namespace {
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
                                   Loop *L);
 
+    bool mightUseCTR(const Triple &TT, BasicBlock *BB);
     bool convertToCTRLoop(Loop *L);
   private:
     PPCTargetMachine *TM;
@@ -143,6 +144,161 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
   return MadeChange;
 }
 
+bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+       J != JE; ++J) {
+    if (CallInst *CI = dyn_cast<CallInst>(J)) {
+      if (!TM)
+        return true;
+      const TargetLowering *TLI = TM->getTargetLowering();
+
+      if (Function *F = CI->getCalledFunction()) {
+        // Most intrinsics don't become function calls, but some might.
+        // sin, cos, exp and log are always calls.
+        unsigned Opcode;
+        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+          switch (F->getIntrinsicID()) {
+          default: continue;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                       !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+          case Intrinsic::longjmp:
+          case Intrinsic::memcpy:
+          case Intrinsic::memmove:
+          case Intrinsic::memset:
+          case Intrinsic::powi:
+          case Intrinsic::log:
+          case Intrinsic::log2:
+          case Intrinsic::log10:
+          case Intrinsic::exp:
+          case Intrinsic::exp2:
+          case Intrinsic::pow:
+          case Intrinsic::sin:
+          case Intrinsic::cos:
+            return true;
+          case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
+          case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
+          case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
+          case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
+          case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
+          case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+          }
+        }
+
+        // PowerPC does not use [US]DIVREM or other library calls for
+        // operations on regular types which are not otherwise library calls
+        // (i.e. soft float or atomics). If adapting for targets that do,
+        // additional care is required here.
+
+        LibFunc::Func Func;
+        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+            LibInfo->getLibFunc(F->getName(), Func) &&
+            LibInfo->hasOptimizedCodeGen(Func)) {
+          // Non-read-only functions are never treated as intrinsics.
+          if (!CI->onlyReadsMemory())
+            return true;
+
+          // Conversion happens only for FP calls.
+          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+            return true;
+
+          switch (Func) {
+          default: return true;
+          case LibFunc::copysign:
+          case LibFunc::copysignf:
+          case LibFunc::copysignl:
+            continue; // ISD::FCOPYSIGN is never a library call.
+          case LibFunc::fabs:
+          case LibFunc::fabsf:
+          case LibFunc::fabsl:
+            continue; // ISD::FABS is never a library call.
+          case LibFunc::sqrt:
+          case LibFunc::sqrtf:
+          case LibFunc::sqrtl:
+            Opcode = ISD::FSQRT; break;
+          case LibFunc::floor:
+          case LibFunc::floorf:
+          case LibFunc::floorl:
+            Opcode = ISD::FFLOOR; break;
+          case LibFunc::nearbyint:
+          case LibFunc::nearbyintf:
+          case LibFunc::nearbyintl:
+            Opcode = ISD::FNEARBYINT; break;
+          case LibFunc::ceil:
+          case LibFunc::ceilf:
+          case LibFunc::ceill:
+            Opcode = ISD::FCEIL; break;
+          case LibFunc::rint:
+          case LibFunc::rintf:
+          case LibFunc::rintl:
+            Opcode = ISD::FRINT; break;
+          case LibFunc::trunc:
+          case LibFunc::truncf:
+          case LibFunc::truncl:
+            Opcode = ISD::FTRUNC; break;
+          }
+
+          MVT VTy =
+            TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+          if (VTy == MVT::Other)
+            return true;
+          
+          if (TLI->isOperationLegalOrCustom(Opcode, VTy))
+            continue;
+          else if (VTy.isVector() &&
+                   TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
+            continue;
+
+          return true;
+        }
+      }
+
+      return true;
+    } else if (isa<BinaryOperator>(J) &&
+               J->getType()->getScalarType()->isPPC_FP128Ty()) {
+      // Most operations on ppc_f128 values become calls.
+      return true;
+    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+      CastInst *CI = cast<CastInst>(J);
+      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+          (TT.isArch32Bit() &&
+           (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
+            CI->getDestTy()->getScalarType()->isIntegerTy(64))
+          ))
+        return true;
+    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+      // On PowerPC, indirect jumps use the counter register.
+      return true;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+      if (!TM)
+        return true;
+      const TargetLowering *TLI = TM->getTargetLowering();
+
+      if (TLI->supportJumpTables() &&
+          SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+        return true;
+    }
+  }
+
+  return false;
+}
+
 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   bool MadeChange = false;
 
@@ -173,158 +329,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   // We don't want to spill/restore the counter register, and so we don't
   // want to use the counter register if the loop contains calls.
   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
-       I != IE; ++I) {
-    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
-         J != JE; ++J) {
-      if (CallInst *CI = dyn_cast<CallInst>(J)) {
-        if (!TM)
-          return MadeChange;
-        const TargetLowering *TLI = TM->getTargetLowering();
-
-        if (Function *F = CI->getCalledFunction()) {
-          // Most intrinsics don't become function calls, but some might.
-          // sin, cos, exp and log are always calls.
-          unsigned Opcode;
-          if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
-            switch (F->getIntrinsicID()) {
-            default: continue;
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                         !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
-            case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
-   // let's return it to _setjmp state
-#  pragma pop_macro("setjmp")
-#  undef setjmp_undefined_for_msvc
-#endif
-
-            case Intrinsic::longjmp:
-            case Intrinsic::memcpy:
-            case Intrinsic::memmove:
-            case Intrinsic::memset:
-            case Intrinsic::powi:
-            case Intrinsic::log:
-            case Intrinsic::log2:
-            case Intrinsic::log10:
-            case Intrinsic::exp:
-            case Intrinsic::exp2:
-            case Intrinsic::pow:
-            case Intrinsic::sin:
-            case Intrinsic::cos:
-              return MadeChange;
-            case Intrinsic::sqrt:      Opcode = ISD::FSQRT;      break;
-            case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
-            case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
-            case Intrinsic::trunc:     Opcode = ISD::FTRUNC;     break;
-            case Intrinsic::rint:      Opcode = ISD::FRINT;      break;
-            case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
-            }
-          }
-
-          // PowerPC does not use [US]DIVREM or other library calls for
-          // operations on regular types which are not otherwise library calls
-          // (i.e. soft float or atomics). If adapting for targets that do,
-          // additional care is required here.
-
-          LibFunc::Func Func;
-          if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
-              LibInfo->getLibFunc(F->getName(), Func) &&
-              LibInfo->hasOptimizedCodeGen(Func)) {
-            // Non-read-only functions are never treated as intrinsics.
-            if (!CI->onlyReadsMemory())
-              return MadeChange;
-
-            // Conversion happens only for FP calls.
-            if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
-              return MadeChange;
-
-            switch (Func) {
-            default: return MadeChange;
-            case LibFunc::copysign:
-            case LibFunc::copysignf:
-            case LibFunc::copysignl:
-              continue; // ISD::FCOPYSIGN is never a library call.
-            case LibFunc::fabs:
-            case LibFunc::fabsf:
-            case LibFunc::fabsl:
-              continue; // ISD::FABS is never a library call.
-            case LibFunc::sqrt:
-            case LibFunc::sqrtf:
-            case LibFunc::sqrtl:
-              Opcode = ISD::FSQRT; break;
-            case LibFunc::floor:
-            case LibFunc::floorf:
-            case LibFunc::floorl:
-              Opcode = ISD::FFLOOR; break;
-            case LibFunc::nearbyint:
-            case LibFunc::nearbyintf:
-            case LibFunc::nearbyintl:
-              Opcode = ISD::FNEARBYINT; break;
-            case LibFunc::ceil:
-            case LibFunc::ceilf:
-            case LibFunc::ceill:
-              Opcode = ISD::FCEIL; break;
-            case LibFunc::rint:
-            case LibFunc::rintf:
-            case LibFunc::rintl:
-              Opcode = ISD::FRINT; break;
-            case LibFunc::trunc:
-            case LibFunc::truncf:
-            case LibFunc::truncl:
-              Opcode = ISD::FTRUNC; break;
-            }
-
-            MVT VTy =
-              TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
-            if (VTy == MVT::Other)
-              return MadeChange;
-            
-            if (TLI->isOperationLegalOrCustom(Opcode, VTy))
-              continue;
-            else if (VTy.isVector() &&
-                     TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
-              continue;
-
-            return MadeChange;
-          }
-        }
-
-        return MadeChange;
-      } else if (isa<BinaryOperator>(J) &&
-                 J->getType()->getScalarType()->isPPC_FP128Ty()) {
-        // Most operations on ppc_f128 values become calls.
-        return MadeChange;
-      } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
-                 isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
-        CastInst *CI = cast<CastInst>(J);
-        if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
-            CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
-            (TT.isArch32Bit() &&
-             (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
-              CI->getDestTy()->getScalarType()->isIntegerTy(64))
-            ))
-          return MadeChange;
-      } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
-        // On PowerPC, indirect jumps use the counter register.
-        return MadeChange;
-      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
-        if (!TM)
-          return MadeChange;
-        const TargetLowering *TLI = TM->getTargetLowering();
-
-        if (TLI->supportJumpTables() &&
-            SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
-          return MadeChange;
-      }
-    }
-  }
+       I != IE; ++I)
+    if (mightUseCTR(TT, *I))
+      return MadeChange;
 
   SmallVector<BasicBlock*, 4> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
@@ -391,7 +398,12 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
     return MadeChange;
 
   BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader)
+
+  // If we don't have a preheader, then insert one. If we already have a
+  // preheader, then we can use it (except if the preheader contains a use of
+  // the CTR register because some such uses might be reordered by the
+  // selection DAG after the mtctr instruction).
+  if (!Preheader || mightUseCTR(TT, Preheader))
     Preheader = InsertPreheaderForLoop(L);
   if (!Preheader)
     return MadeChange;
diff --git a/test/CodeGen/PowerPC/ctrloop-fp64.ll b/test/CodeGen/PowerPC/ctrloop-fp64.ll
index 78974248da..313177c91e 100644
--- a/test/CodeGen/PowerPC/ctrloop-fp64.ll
+++ b/test/CodeGen/PowerPC/ctrloop-fp64.ll
@@ -26,3 +26,35 @@ for.end:                                          ; preds = %for.body
 ; CHECK: @foo
 ; CHECK-NOT: mtctr
 
+@init_value = global double 1.000000e+00, align 8
+@data64 = global [8000 x i64] zeroinitializer, align 8
+
+define i32 @main(i32 %argc, i8** nocapture %argv) {
+entry:
+  %0 = load double* @init_value, align 8
+  %conv = fptosi double %0 to i64
+  %broadcast.splatinsert.i = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %broadcast.splat.i = shufflevector <2 x i64> %broadcast.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
+  br label %vector.body.i
+
+vector.body.i:                                    ; preds = %vector.body.i, %entry
+  %index.i = phi i32 [ 0, %entry ], [ %index.next.i, %vector.body.i ]
+  %next.gep.i = getelementptr [8000 x i64]* @data64, i32 0, i32 %index.i
+  %1 = bitcast i64* %next.gep.i to <2 x i64>*
+  store <2 x i64> %broadcast.splat.i, <2 x i64>* %1, align 8
+  %next.gep.sum24.i = or i32 %index.i, 2
+  %2 = getelementptr [8000 x i64]* @data64, i32 0, i32 %next.gep.sum24.i
+  %3 = bitcast i64* %2 to <2 x i64>*
+  store <2 x i64> %broadcast.splat.i, <2 x i64>* %3, align 8
+  %index.next.i = add i32 %index.i, 4
+  %4 = icmp eq i32 %index.next.i, 8000
+  br i1 %4, label %_Z4fillIPxxEvT_S1_T0_.exit, label %vector.body.i
+
+_Z4fillIPxxEvT_S1_T0_.exit:                       ; preds = %vector.body.i
+  ret i32 0
+}
+
+; CHECK: @main
+; CHECK: __fixdfdi
+; CHECK: mtctr
+
author	Hal Finkel <hfinkel@anl.gov>	2013-05-16 19:58:38 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2013-05-16 19:58:38 +0000
commit	c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80 (patch)
tree	6880887689abe3c97fd79a896ab470b8da4090b7
parent	02e168003f45cf8e0a277c6b8c85c1a3032b1dec (diff)
download	llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.gz llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.bz2 llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.xz