summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOwen Anderson <resistor@mac.com>2010-11-08 23:21:22 +0000
committerOwen Anderson <resistor@mac.com>2010-11-08 23:21:22 +0000
commitc24cb3551ed66830b53362f593269873cb53a0c4 (patch)
treefcfad3ffe5894ca621031ee680dce6de429780a8
parent2c2304c62346e17f4963f6b609dc7ae6f8b91962 (diff)
downloadllvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.gz
llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.bz2
llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.xz
Add support for ARM's specialized vector-compare-against-zero instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118453 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp33
-rw-r--r--lib/Target/ARM/ARMISelLowering.h5
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td54
-rw-r--r--test/CodeGen/ARM/vceq.ll11
-rw-r--r--test/CodeGen/ARM/vcge.ll22
-rw-r--r--test/CodeGen/ARM/vcgt.ll22
-rw-r--r--test/MC/ARM/neon-cmp-encoding.s11
7 files changed, 134 insertions, 24 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index fa3170cc2e..6577a246e1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3074,7 +3074,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (Swap)
std::swap(Op0, Op1);
- SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ // If one of the operands is a constant vector zero, attempt to fold the
+ // comparison to a specialized compare-against-zero form.
+ SDValue SingleOp;
+ if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+ SingleOp = Op0;
+ else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+ if (Opc == ARMISD::VCGE)
+ Opc = ARMISD::VCLEZ;
+ else if (Opc == ARMISD::VCGT)
+ Opc = ARMISD::VCLTZ;
+ SingleOp = Op1;
+ }
+
+ SDValue Result;
+ if (SingleOp.getNode()) {
+ switch (Opc) {
+ case ARMISD::VCEQ:
+ Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGE:
+ Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLEZ:
+ Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+ case ARMISD::VCGT:
+ Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+ case ARMISD::VCLTZ:
+ Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+ default:
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
+ } else {
+ Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+ }
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index dd2a1ad14f..4a4b83db0f 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -87,9 +87,14 @@ namespace llvm {
PRELOAD, // Preload
VCEQ, // Vector compare equal.
+ VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
+ VCGEZ, // Vector compare greater than or equal to zero.
+ VCLEZ, // Vector compare less than or equal to zero.
VCGEU, // Vector compare unsigned greater than or equal.
VCGT, // Vector compare greater than.
+ VCGTZ, // Vector compare greater than zero.
+ VCLTZ, // Vector compare less than zero.
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits.
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e64fefc6ca..d3e83f42b1 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -16,11 +16,17 @@
//===----------------------------------------------------------------------===//
def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
@@ -2150,36 +2156,44 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// First with only element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt,
- string asm> {
+ string asm, SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "8"), asm, "", []>;
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set DPR:$dst, (v8i8 (OpNode (v8i8 DPR:$src))))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "16"), asm, "", []>;
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set DPR:$dst, (v4i16 (OpNode (v4i16 DPR:$src))))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, !strconcat(Dt, "32"), asm, "", []>;
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set DPR:$dst, (v2i32 (OpNode (v2i32 DPR:$src))))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$dst), (ins DPR:$src), NoItinerary,
- opc, "f32", asm, "", []> {
+ opc, "f32", asm, "",
+ [(set DPR:$dst, (v2f32 (OpNode (v2f32 DPR:$src))))]> {
let Inst{10} = 1; // overwrite F = 1
}
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "8"), asm, "", []>;
+ opc, !strconcat(Dt, "8"), asm, "",
+ [(set QPR:$dst, (v16i8 (OpNode (v16i8 QPR:$src))))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "16"), asm, "", []>;
+ opc, !strconcat(Dt, "16"), asm, "",
+ [(set QPR:$dst, (v8i16 (OpNode (v8i16 QPR:$src))))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, !strconcat(Dt, "32"), asm, "", []>;
+ opc, !strconcat(Dt, "32"), asm, "",
+ [(set QPR:$dst, (v4i32 (OpNode (v4i32 QPR:$src))))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$dst), (ins QPR:$src), NoItinerary,
- opc, "f32", asm, "", []> {
+ opc, "f32", asm, "",
+ [(set QPR:$dst, (v4f32 (OpNode (v4f32 QPR:$src))))]> {
let Inst{10} = 1; // overwrite F = 1
}
}
@@ -3220,9 +3234,9 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
-// For disassembly only.
+
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$dst, $src, #0">;
+ "$dst, $src, #0", NEONvceqz>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -3233,14 +3247,11 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
- "$dst, $src, #0">;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+ "$dst, $src, #0", NEONvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
- "$dst, $src, #0">;
+ "$dst, $src, #0", NEONvclez>;
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -3251,14 +3262,11 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
- "$dst, $src, #0">;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+ "$dst, $src, #0", NEONvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
- "$dst, $src, #0">;
+ "$dst, $src, #0", NEONvcltz>;
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index e4787518e7..051c349a06 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -79,3 +79,14 @@ define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
}
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 2c161113c1..f190931f1b 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -160,3 +160,25 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index e3318cd206..7663da3c61 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -173,3 +173,25 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+ %tmp1 = load <8 x i8>* %A
+ %tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+ ret <8 x i8> %tmp4
+}
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index 27ebc8a936..da59ee7209 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -102,3 +102,14 @@
vtst.16 q8, q8, q9
@ CHECK: vtst.32 q8, q8, q9 @ encoding: [0xf2,0x08,0x60,0xf2]
vtst.32 q8, q8, q9
+
+@ CHECK: vceq.i8 d16, d16, #0 @ encoding: [0x20,0x01,0xf1,0xf3]
+ vceq.i8 d16, d16, #0
+@ CHECK: vcge.s8 d16, d16, #0 @ encoding: [0xa0,0x00,0xf1,0xf3]
+ vcge.s8 d16, d16, #0
+@ CHECK: vcle.s8 d16, d16, #0 @ encoding: [0xa0,0x01,0xf1,0xf3]
+ vcle.s8 d16, d16, #0
+@ CHECK: vcgt.s8 d16, d16, #0 @ encoding: [0x20,0x00,0xf1,0xf3]
+ vcgt.s8 d16, d16, #0
+@ CHECK: vclt.s8 d16, d16, #0 @ encoding: [0x20,0x02,0xf1,0xf3]
+ vclt.s8 d16, d16, #0