summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-04-22 18:47:44 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-04-22 18:47:44 +0000
commite7cf062537e898f830565db5dbf99ae9c928399e (patch)
tree3c87d0506e38a3039f8e6df7fc0ed061d232dd8c /lib
parenteac0c9dc7759b013bbe831ace1afa37bc46915c6 (diff)
downloadllvm-e7cf062537e898f830565db5dbf99ae9c928399e.tar.gz
llvm-e7cf062537e898f830565db5dbf99ae9c928399e.tar.bz2
llvm-e7cf062537e898f830565db5dbf99ae9c928399e.tar.xz
DAGCombine: fold "(zext x) == C" into "x == (trunc C)" if the trunc is lossless.
On x86 this allows to fold a load into the cmp, greatly reducing register pressure. movzbl (%rdi), %eax cmpl $47, %eax -> cmpb $47, (%rdi) This shaves 8k off gcc.o on i386. I'll leave applying the patch in README.txt to Chris :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130005 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp36
-rw-r--r--lib/Target/README.txt31
2 files changed, 37 insertions, 30 deletions
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 843ed96cba..15606af787 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1916,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
}
+ // (zext x) == C --> x == (trunc C)
+ if (DCI.isBeforeLegalize() && N0->hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreZExt;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreZExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreZExt = N0->getOperand(0);
+ }
+ } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreZExt = N0;
+ }
+ }
+
+ // Make sure we're not loosing bits from the constant.
+ if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+ }
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 5032b2bbbb..c345ce50c0 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2259,34 +2259,6 @@ icmp transform.
//===---------------------------------------------------------------------===//
-These functions:
-int foo(int *X) {
- if ((*X & 255) == 47)
- bar();
-}
-int foo2(int X) {
- if ((X & 255) == 47)
- bar();
-}
-
-codegen to:
-
- movzbl (%rdi), %eax
- cmpl $47, %eax
- jne LBB0_2
-
-and:
- movzbl %dil, %eax
- cmpl $47, %eax
- jne LBB1_2
-
-If a dag combine shrunk the compare to a byte compare, then we'd fold the load
-in the first example, and eliminate the movzbl in the second, saving a register.
-This can be a target independent dag combine that works on ISD::SETCC, it would
-catch this before the legalize ops pass.
-
-//===---------------------------------------------------------------------===//
-
We should optimize this:
%tmp = load i16* %arrayidx, align 4, !tbaa !0
@@ -2329,8 +2301,7 @@ Index: InstCombine/InstCombineCompares.cpp
{
-but we can't do that until the dag combine above is added. Not having this
-is blocking resolving PR6627.
+Not having this is blocking resolving PR6627.
//===---------------------------------------------------------------------===//