summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-02-26 22:48:07 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-02-26 22:48:07 +0000
commit7466678003f38f985d5b2dffd0917643137b11cf (patch)
tree8a0315a7b150080615e132dfa9cfc54c82fa42b1
parent981b1c4c62054e4e39570a9230c0318f13f9a0a9 (diff)
downloadllvm-7466678003f38f985d5b2dffd0917643137b11cf.tar.gz
llvm-7466678003f38f985d5b2dffd0917643137b11cf.tar.bz2
llvm-7466678003f38f985d5b2dffd0917643137b11cf.tar.xz
Add some DAGCombines for (adde 0, 0, glue), which are useful to optimize legalized code for large integer arithmetic.
1. Inform users of ADDEs with two 0 operands that it never sets carry 2. Fold other ADDs or ADDCs into the ADDE if possible It would be neat if we could do the same thing for SETCC+ADD eventually, but we can't do that in target independent code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126557 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp38
-rw-r--r--lib/Target/README.txt37
-rw-r--r--test/CodeGen/X86/adde-carry.ll26
3 files changed, 64 insertions, 37 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9cc70a3092..943731fe5a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1290,6 +1290,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
return SDValue();
}
+/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
+/// return 0 or 1 depending on the carry flag.
+static bool isCarryMaterialization(SDValue V) {
+ if (V.getOpcode() != ISD::ADDE)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
+ return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1453,6 +1463,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
+ // add (adde 0, 0, glue), X -> adde X, 0, glue
+ if (N0->hasOneUse() && isCarryMaterialization(N0))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+ DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
+ N0.getOperand(2));
+
+ // add X, (adde 0, 0, glue) -> adde X, 0, glue
+ if (N1->hasOneUse() && isCarryMaterialization(N1))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+ DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
+ N1.getOperand(2));
+
return SDValue();
}
@@ -1496,6 +1518,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
N->getDebugLoc(), MVT::Glue));
}
+ // addc (adde 0, 0, glue), X -> adde X, 0, glue
+ if (N0->hasOneUse() && isCarryMaterialization(N0))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
+ DAG.getConstant(0, VT), N0.getOperand(2));
+
+ // addc X, (adde 0, 0, glue) -> adde X, 0, glue
+ if (N1->hasOneUse() && isCarryMaterialization(N1))
+ return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
+ DAG.getConstant(0, VT), N1.getOperand(2));
+
return SDValue();
}
@@ -1506,6 +1538,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ // If both operands are null we know that carry out will always be false.
+ if (N0C && N0C->isNullValue() && N0 == N1)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
+ N->getDebugLoc(),
+ MVT::Glue));
+
// canonicalize constant to RHS
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index f85914b61d..e01df01043 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1780,43 +1780,6 @@ case it choses instead to keep the max operation obvious.
//===---------------------------------------------------------------------===//
-Take the following testcase on x86-64 (similar testcases exist for all targets
-with addc/adde):
-
-define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
-i64 %c) nounwind {
-entry:
- %0 = zext i64 %a to i128 ; <i128> [#uses=1]
- %1 = zext i64 %b to i128 ; <i128> [#uses=1]
- %2 = add i128 %1, %0 ; <i128> [#uses=2]
- %3 = zext i64 %c to i128 ; <i128> [#uses=1]
- %4 = shl i128 %3, 64 ; <i128> [#uses=1]
- %5 = add i128 %4, %2 ; <i128> [#uses=1]
- %6 = lshr i128 %5, 64 ; <i128> [#uses=1]
- %7 = trunc i128 %6 to i64 ; <i64> [#uses=1]
- store i64 %7, i64* %s, align 8
- %8 = trunc i128 %2 to i64 ; <i64> [#uses=1]
- store i64 %8, i64* %t, align 8
- ret void
-}
-
-Generated code:
- addq %rcx, %rdx
- sbbq %rax, %rax
- subq %rax, %r8
- movq %r8, (%rdi)
- movq %rdx, (%rsi)
- ret
-
-Expected code:
- addq %rcx, %rdx
- adcq $0, %r8
- movq %r8, (%rdi)
- movq %rdx, (%rsi)
- ret
-
-//===---------------------------------------------------------------------===//
-
Switch lowering generates less than ideal code for the following switch:
define void @a(i32 %x) nounwind {
entry:
diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/adde-carry.ll
new file mode 100644
index 0000000000..98c4f99343
--- /dev/null
+++ b/test/CodeGen/X86/adde-carry.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
+; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32
+
+define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+ %0 = zext i64 %a to i128
+ %1 = zext i64 %b to i128
+ %2 = add i128 %1, %0
+ %3 = zext i64 %c to i128
+ %4 = shl i128 %3, 64
+ %5 = add i128 %4, %2
+ %6 = lshr i128 %5, 64
+ %7 = trunc i128 %6 to i64
+ store i64 %7, i64* %s, align 8
+ %8 = trunc i128 %2 to i64
+ store i64 %8, i64* %t, align 8
+ ret void
+
+; CHECK-32: addl
+; CHECK-32: adcl
+; CHECK-32: adcl $0
+; CHECK-32: adcl $0
+
+; CHECK-64: addq
+; CHECK-64: adcq $0
+}