summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2013-05-16 19:58:38 +0000
committerHal Finkel <hfinkel@anl.gov>2013-05-16 19:58:38 +0000
commitc482454e3cc2a33a2cf2d1cf0881c7c5e2641c80 (patch)
tree6880887689abe3c97fd79a896ab470b8da4090b7
parent02e168003f45cf8e0a277c6b8c85c1a3032b1dec (diff)
downloadllvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.gz
llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.bz2
llvm-c482454e3cc2a33a2cf2d1cf0881c7c5e2641c80.tar.xz
Create an new preheader in PPCCTRLoops to avoid counter register clobbers
Some IR-level instructions (such as FP <-> i64 conversions) are not chained w.r.t. the mtctr intrinsic and yet may become function calls that clobber the counter register. At the selection-DAG level, these might be reordered with the mtctr intrinsic causing miscompiles. To avoid this situation, if an existing preheader has instructions that might use the counter register, create a new preheader for the mtctr intrinsic. This extra block will be remerged with the old preheader at the MI level, but will prevent unwanted reordering at the selection-DAG level. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182045 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp318
-rw-r--r--test/CodeGen/PowerPC/ctrloop-fp64.ll32
2 files changed, 197 insertions, 153 deletions
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 6bbd571894..5d001fc072 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -96,6 +96,7 @@ namespace {
SmallVectorImpl<BasicBlock*> &SplitPreds,
Loop *L);
+ bool mightUseCTR(const Triple &TT, BasicBlock *BB);
bool convertToCTRLoop(Loop *L);
private:
PPCTargetMachine *TM;
@@ -143,6 +144,161 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
return MadeChange;
}
+bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+ J != JE; ++J) {
+ if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ if (!TM)
+ return true;
+ const TargetLowering *TLI = TM->getTargetLowering();
+
+ if (Function *F = CI->getCalledFunction()) {
+ // Most intrinsics don't become function calls, but some might.
+ // sin, cos, exp and log are always calls.
+ unsigned Opcode;
+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (F->getIntrinsicID()) {
+ default: continue;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::longjmp:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::powi:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ return true;
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ }
+ }
+
+ // PowerPC does not use [US]DIVREM or other library calls for
+ // operations on regular types which are not otherwise library calls
+ // (i.e. soft float or atomics). If adapting for targets that do,
+ // additional care is required here.
+
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ // Non-read-only functions are never treated as intrinsics.
+ if (!CI->onlyReadsMemory())
+ return true;
+
+ // Conversion happens only for FP calls.
+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+ return true;
+
+ switch (Func) {
+ default: return true;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ continue; // ISD::FABS is never a library call.
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ Opcode = ISD::FSQRT; break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ Opcode = ISD::FFLOOR; break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ Opcode = ISD::FNEARBYINT; break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ Opcode = ISD::FCEIL; break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ Opcode = ISD::FRINT; break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ Opcode = ISD::FTRUNC; break;
+ }
+
+ MVT VTy =
+ TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+ if (VTy == MVT::Other)
+ return true;
+
+ if (TLI->isOperationLegalOrCustom(Opcode, VTy))
+ continue;
+ else if (VTy.isVector() &&
+ TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
+ continue;
+
+ return true;
+ }
+ }
+
+ return true;
+ } else if (isa<BinaryOperator>(J) &&
+ J->getType()->getScalarType()->isPPC_FP128Ty()) {
+ // Most operations on ppc_f128 values become calls.
+ return true;
+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+ CastInst *CI = cast<CastInst>(J);
+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+ (TT.isArch32Bit() &&
+ (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
+ CI->getDestTy()->getScalarType()->isIntegerTy(64))
+ ))
+ return true;
+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+ // On PowerPC, indirect jumps use the counter register.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+ if (!TM)
+ return true;
+ const TargetLowering *TLI = TM->getTargetLowering();
+
+ if (TLI->supportJumpTables() &&
+ SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
@@ -173,158 +329,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// We don't want to spill/restore the counter register, and so we don't
// want to use the counter register if the loop contains calls.
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I) {
- for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
- J != JE; ++J) {
- if (CallInst *CI = dyn_cast<CallInst>(J)) {
- if (!TM)
- return MadeChange;
- const TargetLowering *TLI = TM->getTargetLowering();
-
- if (Function *F = CI->getCalledFunction()) {
- // Most intrinsics don't become function calls, but some might.
- // sin, cos, exp and log are always calls.
- unsigned Opcode;
- if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
- switch (F->getIntrinsicID()) {
- default: continue;
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::longjmp:
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- case Intrinsic::powi:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::sin:
- case Intrinsic::cos:
- return MadeChange;
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- }
- }
-
- // PowerPC does not use [US]DIVREM or other library calls for
- // operations on regular types which are not otherwise library calls
- // (i.e. soft float or atomics). If adapting for targets that do,
- // additional care is required here.
-
- LibFunc::Func Func;
- if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
- LibInfo->getLibFunc(F->getName(), Func) &&
- LibInfo->hasOptimizedCodeGen(Func)) {
- // Non-read-only functions are never treated as intrinsics.
- if (!CI->onlyReadsMemory())
- return MadeChange;
-
- // Conversion happens only for FP calls.
- if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
- return MadeChange;
-
- switch (Func) {
- default: return MadeChange;
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
- continue; // ISD::FABS is never a library call.
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
- Opcode = ISD::FSQRT; break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
- Opcode = ISD::FFLOOR; break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
- Opcode = ISD::FNEARBYINT; break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
- Opcode = ISD::FCEIL; break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
- Opcode = ISD::FRINT; break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
- Opcode = ISD::FTRUNC; break;
- }
-
- MVT VTy =
- TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
- if (VTy == MVT::Other)
- return MadeChange;
-
- if (TLI->isOperationLegalOrCustom(Opcode, VTy))
- continue;
- else if (VTy.isVector() &&
- TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
- continue;
-
- return MadeChange;
- }
- }
-
- return MadeChange;
- } else if (isa<BinaryOperator>(J) &&
- J->getType()->getScalarType()->isPPC_FP128Ty()) {
- // Most operations on ppc_f128 values become calls.
- return MadeChange;
- } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
- isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
- CastInst *CI = cast<CastInst>(J);
- if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
- CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- (TT.isArch32Bit() &&
- (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
- CI->getDestTy()->getScalarType()->isIntegerTy(64))
- ))
- return MadeChange;
- } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
- // On PowerPC, indirect jumps use the counter register.
- return MadeChange;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (!TM)
- return MadeChange;
- const TargetLowering *TLI = TM->getTargetLowering();
-
- if (TLI->supportJumpTables() &&
- SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
- return MadeChange;
- }
- }
- }
+ I != IE; ++I)
+ if (mightUseCTR(TT, *I))
+ return MadeChange;
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -391,7 +398,12 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
return MadeChange;
BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader)
+
+ // If we don't have a preheader, then insert one. If we already have a
+ // preheader, then we can use it (except if the preheader contains a use of
+ // the CTR register because some such uses might be reordered by the
+ // selection DAG after the mtctr instruction).
+ if (!Preheader || mightUseCTR(TT, Preheader))
Preheader = InsertPreheaderForLoop(L);
if (!Preheader)
return MadeChange;
diff --git a/test/CodeGen/PowerPC/ctrloop-fp64.ll b/test/CodeGen/PowerPC/ctrloop-fp64.ll
index 78974248da..313177c91e 100644
--- a/test/CodeGen/PowerPC/ctrloop-fp64.ll
+++ b/test/CodeGen/PowerPC/ctrloop-fp64.ll
@@ -26,3 +26,35 @@ for.end: ; preds = %for.body
; CHECK: @foo
; CHECK-NOT: mtctr
+@init_value = global double 1.000000e+00, align 8
+@data64 = global [8000 x i64] zeroinitializer, align 8
+
+define i32 @main(i32 %argc, i8** nocapture %argv) {
+entry:
+ %0 = load double* @init_value, align 8
+ %conv = fptosi double %0 to i64
+ %broadcast.splatinsert.i = insertelement <2 x i64> undef, i64 %conv, i32 0
+ %broadcast.splat.i = shufflevector <2 x i64> %broadcast.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ br label %vector.body.i
+
+vector.body.i: ; preds = %vector.body.i, %entry
+ %index.i = phi i32 [ 0, %entry ], [ %index.next.i, %vector.body.i ]
+ %next.gep.i = getelementptr [8000 x i64]* @data64, i32 0, i32 %index.i
+ %1 = bitcast i64* %next.gep.i to <2 x i64>*
+ store <2 x i64> %broadcast.splat.i, <2 x i64>* %1, align 8
+ %next.gep.sum24.i = or i32 %index.i, 2
+ %2 = getelementptr [8000 x i64]* @data64, i32 0, i32 %next.gep.sum24.i
+ %3 = bitcast i64* %2 to <2 x i64>*
+ store <2 x i64> %broadcast.splat.i, <2 x i64>* %3, align 8
+ %index.next.i = add i32 %index.i, 4
+ %4 = icmp eq i32 %index.next.i, 8000
+ br i1 %4, label %_Z4fillIPxxEvT_S1_T0_.exit, label %vector.body.i
+
+_Z4fillIPxxEvT_S1_T0_.exit: ; preds = %vector.body.i
+ ret i32 0
+}
+
+; CHECK: @main
+; CHECK: __fixdfdi
+; CHECK: mtctr
+