summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp55
-rw-r--r--test/CodeGen/X86/vec_compare.ll136
2 files changed, 176 insertions, 15 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 3564ce3920..296bcf6ed3 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9350,11 +9350,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (Swap)
std::swap(Op0, Op1);
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations.
+ if (FlipSigns) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+ EltVT);
+ std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+ SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+ SignBits.size());
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ }
+
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
if (VT == MVT::v2i64) {
- if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
- return SDValue();
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
+ assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+
+ // First cast everything to the right type,
+ Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+ // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
+ SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+ SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
+
+ // Create masks for only the low parts/high parts of the 64 bit integers.
+ const int MaskHi[] = { 1, 1, 3, 3 };
+ const int MaskLo[] = { 0, 0, 2, 2 };
+ SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
+ SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
+ SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+ SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
+ Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
// pcmpeqd + pshufd + pand.
@@ -9379,19 +9417,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
}
}
- // Since SSE has no unsigned integer comparisons, we need to flip the sign
- // bits of the inputs before performing those operations.
- if (FlipSigns) {
- EVT EltVT = VT.getVectorElementType();
- SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
- EltVT);
- std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
- SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
- SignBits.size());
- Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
- Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
- }
-
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
// If the logical-not of the result is required, perform that now.
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index b6d91a3f77..85d8b2cea3 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -65,3 +65,139 @@ define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
%D = sext <2 x i1> %C to <2 x i64>
ret <2 x i64> %D
}
+
+define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test7:
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+ %C = icmp sgt <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test8:
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+ %C = icmp slt <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test9:
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+ %C = icmp sge <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test10:
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+ %C = icmp sle <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test11:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+ %C = icmp ugt <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test12:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+ %C = icmp ult <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test13:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+ %C = icmp uge <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}
+
+define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test14:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+ %C = icmp ule <2 x i64> %A, %B
+ %D = sext <2 x i1> %C to <2 x i64>
+ ret <2 x i64> %D
+}