InstCombine: Turn (zext A) == (B & (1<<X)-1) into A == (trunc B), narrowing the compare.

This saves a cast, and zext is more expensive on platforms with subreg support than trunc is. This occurs in the BSD implementation of memchr(3), see PR12750. On the synthetic benchmark from that bug stupid_memchr and bsd_memchr have the same performance now when not inlining either function. stupid_memchr: 323.0us bsd_memchr: 321.0us memchr: 479.0us where memchr is the llvm-gcc compiled bsd_memchr from osx lion's libc. When inlining is enabled bsd_memchr still regresses down to llvm-gcc memchr time, I haven't fully understood the issue yet, something is grossly mangling the loop after inlining. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158297 91177308-0d34-0410-b5e6-96231b3b80d8
author: Benjamin Kramer <benny.kra@googlemail.com> 2012-06-10 20:35:00 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2012-06-10 20:35:00 +0000
commit: 66821d902040f838a97bf04a4b910fdc11ba49a9 (patch)
tree: 4e71e22e3b9c24402a519c93918d9d17ff9ff548
parent: 71ffcfe9f8602785d4d9133e029c37f2fac78cc3 (diff)
download: llvm-66821d902040f838a97bf04a4b910fdc11ba49a9.tar.gz
llvm-66821d902040f838a97bf04a4b910fdc11ba49a9.tar.bz2
llvm-66821d902040f838a97bf04a4b910fdc11ba49a9.tar.xz
2 files changed, 45 insertions, 1 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7bbdd450f1..3fa0aba012 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2580,10 +2580,32 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
     }
 
+    // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
+    ConstantInt *Cst1;
+    if (Op0->hasOneUse() &&
+        match(Op0, m_ZExt(m_Value(A))) &&
+        match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) {
+      APInt Pow2 = Cst1->getValue() + 1;
+      if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
+          Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
+        return new ICmpInst(I.getPredicate(), A,
+                            Builder->CreateTrunc(B, A->getType()));
+    }
+
+    // Transform (B & (1<<X)-1) == (zext A) --> A == (trunc B)
+    if (Op1->hasOneUse() &&
+        match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
+        match(Op1, m_ZExt(m_Value(A)))) {
+      APInt Pow2 = Cst1->getValue() + 1;
+      if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
+          Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
+        return new ICmpInst(I.getPredicate(), A,
+                            Builder->CreateTrunc(B, A->getType()));
+    }
+
     // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
     // "icmp (and X, mask), cst"
     uint64_t ShAmt = 0;
-    ConstantInt *Cst1;
     if (Op0->hasOneUse() &&
         match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
                                            m_ConstantInt(ShAmt))))) &&
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index a9ae221d8f..eaff87d695 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -637,3 +637,25 @@ define i1 @test62(i8* %a) {
 ; CHECK: @test62
 ; CHECK-NEXT: ret i1 true
 }
+
+define i1 @test63(i8 %a, i32 %b) nounwind {
+  %z = zext i8 %a to i32
+  %t = and i32 %b, 255
+  %c = icmp eq i32 %z, %t
+  ret i1 %c
+; CHECK: @test63
+; CHECK-NEXT: %1 = trunc i32 %b to i8
+; CHECK-NEXT: %c = icmp eq i8 %1, %a
+; CHECK-NEXT: ret i1 %c
+}
+
+define i1 @test64(i8 %a, i32 %b) nounwind {
+  %t = and i32 %b, 255
+  %z = zext i8 %a to i32
+  %c = icmp eq i32 %t, %z
+  ret i1 %c
+; CHECK: @test64
+; CHECK-NEXT: %1 = trunc i32 %b to i8
+; CHECK-NEXT: %c = icmp eq i8 %1, %a
+; CHECK-NEXT: ret i1 %c
+}
author	Benjamin Kramer <benny.kra@googlemail.com>	2012-06-10 20:35:00 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2012-06-10 20:35:00 +0000
commit	66821d902040f838a97bf04a4b910fdc11ba49a9 (patch)
tree	4e71e22e3b9c24402a519c93918d9d17ff9ff548
parent	71ffcfe9f8602785d4d9133e029c37f2fac78cc3 (diff)
download	llvm-66821d902040f838a97bf04a4b910fdc11ba49a9.tar.gz llvm-66821d902040f838a97bf04a4b910fdc11ba49a9.tar.bz2 llvm-66821d902040f838a97bf04a4b910fdc11ba49a9.tar.xz