summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEli Friedman <eli.friedman@gmail.com>2011-11-01 21:18:39 +0000
committerEli Friedman <eli.friedman@gmail.com>2011-11-01 21:18:39 +0000
commitf6aa6b12f132b41b1337fef14110696458a0f323 (patch)
treedcc7831b3c74c83f80c67910e8a1177d05601ab7
parentfadfd7b9776c723357894af00442a35d0a4d0acf (diff)
downloadllvm-f6aa6b12f132b41b1337fef14110696458a0f323.tar.gz
llvm-f6aa6b12f132b41b1337fef14110696458a0f323.tar.bz2
llvm-f6aa6b12f132b41b1337fef14110696458a0f323.tar.xz
Teach the x86 backend a couple tricks for dealing with v16i8 sra by a constant splat value. Fixes PR11289.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143498 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--test/CodeGen/X86/x86-shifts.ll20
2 files changed, 38 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2ddb1b7163..c9b642242a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -966,6 +966,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
+ setOperationAction(ISD::SRA, MVT::v16i8, Custom);
}
if (Subtarget->hasSSE42() || Subtarget->hasAVX())
@@ -9994,6 +9995,23 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
R, DAG.getConstant(ShiftAmt, MVT::i32));
+
+ if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
+ }
+
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
+ }
}
}
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index 3e44eafa24..20bccab8ff 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -170,3 +170,23 @@ define <16 x i8> @shr9(<16 x i8> %A) nounwind {
; CHECK: pand
; CHECK: ret
}
+
+define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
+ %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ret <16 x i8> %B
+; CHECK: sra_v16i8_7:
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
+ %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <16 x i8> %B
+; CHECK: sra_v16i8:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: ret
+}