diff options
author | Eric Christopher <echristo@apple.com> | 2011-06-18 00:09:57 +0000 |
---|---|---|
committer | Eric Christopher <echristo@apple.com> | 2011-06-18 00:09:57 +0000 |
commit | a3071455e540530ecb644a2e59098129f70ce705 (patch) | |
tree | a51df0e3e0ceb2871401063337954c2c46a7a20f | |
parent | ccfae86da1e78e5985bdc9d64d1116c539cf19ba (diff) | |
download | llvm-a3071455e540530ecb644a2e59098129f70ce705.tar.gz llvm-a3071455e540530ecb644a2e59098129f70ce705.tar.bz2 llvm-a3071455e540530ecb644a2e59098129f70ce705.tar.xz |
Fix UMULO support for 2x register width to allow the full
range without a libcall to a new mulo<mode> libcall
that we'd have to create.
Finishes the rest of rdar://9090077 and rdar://9210061
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133318 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 21 | ||||
-rw-r--r-- | test/CodeGen/X86/muloti.ll | 47 |
2 files changed, 66 insertions, 2 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 6797bece13..cbb8da8f18 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2160,6 +2160,27 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); DebugLoc dl = N->getDebugLoc(); + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + RHS, DAG.getConstant(0, VT), ISD::SETNE); + SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, + DAG.getConstant(1, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); + SDValue Overflow; + Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll index eb9b6460cd..2f0986e831 100644 --- a/test/CodeGen/X86/muloti.ll +++ b/test/CodeGen/X86/muloti.ll @@ -2,9 +2,8 @@ %0 = type { i64, i64 } %1 = type { i128, i1 } -@.str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1 - define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp { +; CHECK: x entry: %tmp16 = zext i64 %a.coerce0 to i128 %tmp11 = zext i64 %a.coerce1 to i128 @@ -33,6 +32,50 @@ nooverflow: ; preds = %entry ret %0 %tmp24 } +define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp { +entry: +; CHECK: foo + %retval = alloca i128, align 16 + %coerce = alloca i128, align 16 + %a.addr = alloca i128, align 16 + %coerce1 = alloca i128, align 16 + %b.addr = alloca i128, align 16 + %0 = bitcast i128* %coerce to %0* + %1 = getelementptr %0* %0, i32 0, i32 0 + store i64 %a.coerce0, i64* %1 + %2 = getelementptr %0* %0, i32 0, i32 1 + store i64 %a.coerce1, i64* %2 + %a = load i128* %coerce, align 16 + store i128 %a, i128* %a.addr, align 16 + %3 = bitcast i128* %coerce1 to %0* + %4 = getelementptr %0* %3, i32 0, i32 0 + store i64 %b.coerce0, i64* %4 + %5 = getelementptr %0* %3, i32 0, i32 1 + store i64 %b.coerce1, i64* %5 + %b = load i128* %coerce1, align 16 + store i128 %b, i128* %b.addr, align 16 + %tmp = load i128* %a.addr, align 16 + %tmp2 = load i128* %b.addr, align 16 + %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2) +; CHECK: cmov +; CHECK: divti3 + %7 = extractvalue %1 %6, 0 + %8 = extractvalue %1 %6, 1 + br i1 %8, label %overflow, label %nooverflow + +overflow: ; preds = %entry + call void @llvm.trap() + unreachable + +nooverflow: ; preds = %entry + store i128 %7, i128* %retval + %9 = bitcast i128* %retval to %0* + %10 = load %0* %9, align 1 + ret %0 %10 +} + +declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone + declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone declare void @llvm.trap() nounwind |