summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2012-10-13 10:39:49 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2012-10-13 10:39:49 +0000
commit444dccecfc6c6d0dad4f400141a12f9ca76943d8 (patch)
treecf6c680b2f5dcd504d8c2e431788672778843a34 /lib/Target/X86/X86ISelLowering.cpp
parentc2fcf1a67126a079e34f04bd09a50ae85fdd9469 (diff)
downloadllvm-444dccecfc6c6d0dad4f400141a12f9ca76943d8.tar.gz
llvm-444dccecfc6c6d0dad4f400141a12f9ca76943d8.tar.bz2
llvm-444dccecfc6c6d0dad4f400141a12f9ca76943d8.tar.xz
X86: Promote i8 cmov when both operands are coming from truncates of the same width.
X86 doesn't have i8 cmovs so isel would emit a branch. Emitting branches at this level is often not a good idea because it's too late for many optimizations to kick in. This solution doesn't add any extensions (truncs are free) and tries to avoid introducing partial register stalls by filtering direct copyfromregs. I'm seeing a ~10% speedup on reading a random .png file with libpng15 via graphicsmagick on x86_64/westmere, but YMMV depending on the microarchitecture. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165868 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp15
1 files changed, 15 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1580935fb2..b7a9f6f317 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9148,6 +9148,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
+ // widen the cmov and push the truncate through. This avoids introducing a new
+ // branch during isel and doesn't add any extensions.
+ if (Op.getValueType() == MVT::i8 &&
+ Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+ SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
+ if (T1.getValueType() == T2.getValueType() &&
+ // Blacklist CopyFromReg to avoid partial register stalls.
+ T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
+ SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
+ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T1, T2, CC, Cond);
+ return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+ }
+ }
+
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);