summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--test/CodeGen/X86/vec_compare-sse4.ll35
2 files changed, 42 insertions, 0 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cab9161f5a..fc2d5ce64e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8555,6 +8555,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Swap)
std::swap(Op0, Op1);
+ // Check that the operation in question is available (most are plain SSE2,
+ // but PCMPGTQ and PCMPEQQ have different requirements).
+ if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+ return SDValue();
+
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
diff --git a/test/CodeGen/X86/vec_compare-sse4.ll b/test/CodeGen/X86/vec_compare-sse4.ll
new file mode 100644
index 0000000000..b4a4a4cfa7
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare-sse4.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=-sse3,+sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86 -mattr=-sse42,+sse41 | FileCheck %s -check-prefix=SSE41
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s -check-prefix=SSE42
+
+define <2 x i64> @test1(<2 x i64> %A, <2 x i64> %B) nounwind {
+; SSE42: test1:
+; SSE42: pcmpgtq
+; SSE42: ret
+; SSE41: test1:
+; SSE41-NOT: pcmpgtq
+; SSE41: ret
+; SSE2: test1:
+; SSE2-NOT: pcmpgtq
+; SSE2: ret
+
+ %C = icmp sgt <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test2(<2 x i64> %A, <2 x i64> %B) nounwind {
+; SSE42: test2:
+; SSE42: pcmpeqq
+; SSE42: ret
+; SSE41: test2:
+; SSE41: pcmpeqq
+; SSE41: ret
+; SSE2: test2:
+; SSE2-NOT: pcmpeqq
+; SSE2: ret
+
+ %C = icmp eq <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}