Add support for ARM's specialized vector-compare-against-zero instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118453 91177308-0d34-0410-b5e6-96231b3b80d8
author: Owen Anderson <resistor@mac.com> 2010-11-08 23:21:22 +0000
committer: Owen Anderson <resistor@mac.com> 2010-11-08 23:21:22 +0000
commit: c24cb3551ed66830b53362f593269873cb53a0c4 (patch)
tree: fcfad3ffe5894ca621031ee680dce6de429780a8
parent: 2c2304c62346e17f4963f6b609dc7ae6f8b91962 (diff)
download: llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.gz
llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.bz2
llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.xz
7 files changed, 134 insertions, 24 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index fa3170cc2e..6577a246e1 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3074,7 +3074,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   if (Swap)
     std::swap(Op0, Op1);
 
-  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+  // If one of the operands is a constant vector zero, attempt to fold the
+  // comparison to a specialized compare-against-zero form.
+  SDValue SingleOp;
+  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+    SingleOp = Op0;
+  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+    if (Opc == ARMISD::VCGE)
+      Opc = ARMISD::VCLEZ;
+    else if (Opc == ARMISD::VCGT)
+      Opc = ARMISD::VCLTZ;
+    SingleOp = Op1;
+  }
+  
+  SDValue Result;
+  if (SingleOp.getNode()) {
+    switch (Opc) {
+    case ARMISD::VCEQ:
+      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGE:
+      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLEZ:
+      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGT:
+      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLTZ:
+      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+    default:
+      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+    }
+  } else {
+     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+  }
 
   if (Invert)
     Result = DAG.getNOT(dl, Result, VT);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index dd2a1ad14f..4a4b83db0f 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -87,9 +87,14 @@ namespace llvm {
       PRELOAD,      // Preload
       
       VCEQ,         // Vector compare equal.
+      VCEQZ,        // Vector compare equal to zero.
       VCGE,         // Vector compare greater than or equal.
+      VCGEZ,        // Vector compare greater than or equal to zero.
+      VCLEZ,        // Vector compare less than or equal to zero.
       VCGEU,        // Vector compare unsigned greater than or equal.
       VCGT,         // Vector compare greater than.
+      VCGTZ,        // Vector compare greater than zero.
+      VCLTZ,        // Vector compare less than zero.
       VCGTU,        // Vector compare unsigned greater than.
       VTST,         // Vector test bits.
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e64fefc6ca..d3e83f42b1 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -16,11 +16,17 @@
 //===----------------------------------------------------------------------===//
 
 def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
 
 def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
 def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
 def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
 def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
 def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
 def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
 
@@ -2150,36 +2156,44 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 // First with only element sizes of 8, 16 and 32 bits:
 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                        bits<5> op11_7, bit op4, string opc, string Dt,
-                       string asm> {
+                       string asm, SDNode OpNode> {
   // 64-bit vector types.
   def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
                   (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "8"), asm, "", []>;
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set DPR:$dst, (v8i8 (OpNode (v8i8 DPR:$src))))]>;
   def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
                   (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "16"), asm, "", []>;
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set DPR:$dst, (v4i16 (OpNode (v4i16 DPR:$src))))]>;
   def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
                   (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "32"), asm, "", []>;
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set DPR:$dst, (v2i32 (OpNode (v2i32 DPR:$src))))]>;
   def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
                   (outs DPR:$dst), (ins DPR:$src), NoItinerary,
-                  opc, "f32", asm, "", []> {
+                  opc, "f32", asm, "",
+                  [(set DPR:$dst, (v2f32 (OpNode (v2f32 DPR:$src))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
 
   // 128-bit vector types.
   def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
                   (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "8"), asm, "", []>;
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set QPR:$dst, (v16i8 (OpNode (v16i8 QPR:$src))))]>;
   def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
                   (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "16"), asm, "", []>;
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set QPR:$dst, (v8i16 (OpNode (v8i16 QPR:$src))))]>;
   def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
                   (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, !strconcat(Dt, "32"), asm, "", []>;
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set QPR:$dst, (v4i32 (OpNode (v4i32 QPR:$src))))]>;
   def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
                   (outs QPR:$dst), (ins QPR:$src), NoItinerary,
-                  opc, "f32", asm, "", []> {
+                  opc, "f32", asm, "",
+                  [(set QPR:$dst, (v4f32 (OpNode (v4f32 QPR:$src))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
 }
@@ -3220,9 +3234,9 @@ def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
                      NEONvceq, 1>;
-// For disassembly only.
+
 defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
-                            "$dst, $src, #0">;
+                            "$dst, $src, #0", NEONvceqz>;
 
 //   VCGE     : Vector Compare Greater Than or Equal
 defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -3233,14 +3247,11 @@ def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
                      NEONvcge, 0>;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+
 defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
-                            "$dst, $src, #0">;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+                            "$dst, $src, #0", NEONvcgez>;
 defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
-                            "$dst, $src, #0">;
+                            "$dst, $src, #0", NEONvclez>;
 
 //   VCGT     : Vector Compare Greater Than
 defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -3251,14 +3262,11 @@ def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
                      NEONvcgt, 0>;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+
 defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
-                            "$dst, $src, #0">;
-// For disassembly only.
-// FIXME: This instruction's encoding MAY NOT BE correct.
+                            "$dst, $src, #0", NEONvcgtz>;
 defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
-                            "$dst, $src, #0">;
+                            "$dst, $src, #0", NEONvcltz>;
 
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
 def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index e4787518e7..051c349a06 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -79,3 +79,14 @@ define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
 }
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 2c161113c1..f190931f1b 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -160,3 +160,25 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index e3318cd206..7663da3c61 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -173,3 +173,25 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
 
 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index 27ebc8a936..da59ee7209 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -102,3 +102,14 @@
 	vtst.16	q8, q8, q9
 @ CHECK: vtst.32	q8, q8, q9              @ encoding: [0xf2,0x08,0x60,0xf2]
 	vtst.32	q8, q8, q9
+
+@ CHECK: vceq.i8	d16, d16, #0            @ encoding: [0x20,0x01,0xf1,0xf3]
+  vceq.i8	d16, d16, #0
+@ CHECK: vcge.s8	d16, d16, #0            @ encoding: [0xa0,0x00,0xf1,0xf3]
+  vcge.s8	d16, d16, #0
+@ CHECK: vcle.s8	d16, d16, #0            @ encoding: [0xa0,0x01,0xf1,0xf3]
+  vcle.s8	d16, d16, #0
+@ CHECK: vcgt.s8	d16, d16, #0            @ encoding: [0x20,0x00,0xf1,0xf3]
+  vcgt.s8	d16, d16, #0
+@ CHECK: vclt.s8	d16, d16, #0            @ encoding: [0x20,0x02,0xf1,0xf3]
+  vclt.s8	d16, d16, #0
author	Owen Anderson <resistor@mac.com>	2010-11-08 23:21:22 +0000
committer	Owen Anderson <resistor@mac.com>	2010-11-08 23:21:22 +0000
commit	c24cb3551ed66830b53362f593269873cb53a0c4 (patch)
tree	fcfad3ffe5894ca621031ee680dce6de429780a8
parent	2c2304c62346e17f4963f6b609dc7ae6f8b91962 (diff)
download	llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.gz llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.bz2 llvm-c24cb3551ed66830b53362f593269873cb53a0c4.tar.xz