summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Christopher <echristo@apple.com>2011-06-18 00:09:57 +0000
committerEric Christopher <echristo@apple.com>2011-06-18 00:09:57 +0000
commita3071455e540530ecb644a2e59098129f70ce705 (patch)
treea51df0e3e0ceb2871401063337954c2c46a7a20f
parentccfae86da1e78e5985bdc9d64d1116c539cf19ba (diff)
downloadllvm-a3071455e540530ecb644a2e59098129f70ce705.tar.gz
llvm-a3071455e540530ecb644a2e59098129f70ce705.tar.bz2
llvm-a3071455e540530ecb644a2e59098129f70ce705.tar.xz
Fix UMULO support for 2x register width to allow the full
range without a libcall to a new mulo<mode> libcall that we'd have to create. Finishes the rest of rdar://9090077 and rdar://9210061 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133318 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--test/CodeGen/X86/muloti.ll47
2 files changed, 66 insertions, 2 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 6797bece13..cbb8da8f18 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2160,6 +2160,27 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
DebugLoc dl = N->getDebugLoc();
+ // A divide for UMULO should be faster than a function call.
+ if (N->getOpcode() == ISD::UMULO) {
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+ SplitInteger(MUL, Lo, Hi);
+
+ // A divide for UMULO will be faster than a function call. Select to
+ // make sure we aren't using 0.
+ SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+ RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+ DAG.getConstant(1, VT), RHS);
+ SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+ SDValue Overflow;
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
// Replace this with a libcall that will check overflow.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i32)
diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll
index eb9b6460cd..2f0986e831 100644
--- a/test/CodeGen/X86/muloti.ll
+++ b/test/CodeGen/X86/muloti.ll
@@ -2,9 +2,8 @@
%0 = type { i64, i64 }
%1 = type { i128, i1 }
-@.str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1
-
define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+; CHECK: x
entry:
%tmp16 = zext i64 %a.coerce0 to i128
%tmp11 = zext i64 %a.coerce1 to i128
@@ -33,6 +32,50 @@ nooverflow: ; preds = %entry
ret %0 %tmp24
}
+define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+entry:
+; CHECK: foo
+ %retval = alloca i128, align 16
+ %coerce = alloca i128, align 16
+ %a.addr = alloca i128, align 16
+ %coerce1 = alloca i128, align 16
+ %b.addr = alloca i128, align 16
+ %0 = bitcast i128* %coerce to %0*
+ %1 = getelementptr %0* %0, i32 0, i32 0
+ store i64 %a.coerce0, i64* %1
+ %2 = getelementptr %0* %0, i32 0, i32 1
+ store i64 %a.coerce1, i64* %2
+ %a = load i128* %coerce, align 16
+ store i128 %a, i128* %a.addr, align 16
+ %3 = bitcast i128* %coerce1 to %0*
+ %4 = getelementptr %0* %3, i32 0, i32 0
+ store i64 %b.coerce0, i64* %4
+ %5 = getelementptr %0* %3, i32 0, i32 1
+ store i64 %b.coerce1, i64* %5
+ %b = load i128* %coerce1, align 16
+ store i128 %b, i128* %b.addr, align 16
+ %tmp = load i128* %a.addr, align 16
+ %tmp2 = load i128* %b.addr, align 16
+ %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
+; CHECK: cmov
+; CHECK: divti3
+ %7 = extractvalue %1 %6, 0
+ %8 = extractvalue %1 %6, 1
+ br i1 %8, label %overflow, label %nooverflow
+
+overflow: ; preds = %entry
+ call void @llvm.trap()
+ unreachable
+
+nooverflow: ; preds = %entry
+ store i128 %7, i128* %retval
+ %9 = bitcast i128* %retval to %0*
+ %10 = load %0* %9, align 1
+ ret %0 %10
+}
+
+declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
+
declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
declare void @llvm.trap() nounwind