Merging r195355:

------------------------------------------------------------------------ r195355 | dsanders | 2013-11-21 13:24:49 +0000 (Thu, 21 Nov 2013) | 20 lines Add support for legalizing SETNE/SETEQ by inverting the condition code and the result of the comparison. Summary: LegalizeSetCCCondCode can now legalize SETEQ and SETNE by returning the inverse condition and requesting that the caller invert the result of the condition. The caller of LegalizeSetCCCondCode must handle the inverted CC, and they do so as follows: SETCC, BR_CC: Invert the result of the SETCC with SelectionDAG::getNOT() SELECT_CC: Swap the true/false operands. This is necessary for MSA which lacks an integer SETNE instruction. Reviewers: resistor CC: llvm-commits Differential Revision: http://llvm-reviews.chandlerc.com/D2229 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195363 91177308-0d34-0410-b5e6-96231b3b80d8
author: Daniel Sanders <daniel.sanders@imgtec.com> 2013-11-21 15:03:54 +0000
committer: Daniel Sanders <daniel.sanders@imgtec.com> 2013-11-21 15:03:54 +0000
commit: 4e2d2f091e88dd83d1685173d2c0692d8ae155e6 (patch)
tree: 9e93aac3ffb4e24e7d1657fff20f394c63cfb995 /test/CodeGen
parent: 9eaade8b56b124150d189e1786b14306cdaf5641 (diff)
download: llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.gz
llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.bz2
llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.xz
2 files changed, 234 insertions, 0 deletions
diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll
index 36569a984b..6408d7ba09 100644
--- a/test/CodeGen/Mips/msa/compare.ll
+++ b/test/CodeGen/Mips/msa/compare.ll
@@ -341,6 +341,91 @@ define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: .size clt_u_v2i64
 }
 
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: cne_v16i8:
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <16 x i8> %1, %2
+  %4 = sext <16 x i1> %3 to <16 x i8>
+  ; CHECK-DAG: ceq.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; CHECK-DAG: xori.b [[R3]], [[R3]], 255
+  store <16 x i8> %4, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v16i8
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: cne_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <8 x i16> %1, %2
+  %4 = sext <8 x i1> %3 to <8 x i16>
+  ; CHECK-DAG: ceq.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <8 x i16> %4, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v8i16
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: cne_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <4 x i32> %1, %2
+  %4 = sext <4 x i1> %3 to <4 x i32>
+  ; CHECK-DAG: ceq.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <4 x i32> %4, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v4i32
+}
+
+; There is no != comparison, but test it anyway since we've had legalizer
+; issues in this area.
+define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: cne_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = icmp ne <2 x i64> %1, %2
+  %4 = sext <2 x i1> %3 to <2 x i64>
+  ; CHECK-DAG: ceq.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  ; TODO: This should be an 'xori.b [[R3]], [[R3]], 255' but thats an optimisation issue
+  ; CHECK-DAG: ldi.b [[R4:\$w[0-9]+]], -1
+  ; CHECK-DAG: xor.v [[R3]], [[R3]], [[R4]]
+  store <2 x i64> %4, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size cne_v2i64
+}
+
 define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: ceqi_v16i8:
 
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
new file mode 100644
index 0000000000..1a03e55d9d
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
@@ -0,0 +1,149 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Don't know how to expand this condition!" unreachable.
+; It should at least successfully build.
+
+define void @autogen_SD3861334421(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <2 x i32>
+  %A3 = alloca <2 x double>
+  %A2 = alloca i64
+  %A1 = alloca i64
+  %A = alloca double
+  %L = load i8* %0
+  store i8 -101, i8* %0
+  %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+  %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1>
+  %I = insertelement <8 x i64> zeroinitializer, i64 %4, i32 5
+  %B = and i64 116376, 57247
+  %FC = uitofp i8 7 to double
+  %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %L5 = load i8* %0
+  store i8 %L, i8* %0
+  %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3
+  %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
+  %I8 = insertelement <8 x i8> %Sl, i8 7, i32 4
+  %B9 = or <8 x i64> zeroinitializer, zeroinitializer
+  %Sl10 = select i1 false, i64 116376, i64 380809
+  %Cmp = icmp sgt i32 394647, 17081
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %L11 = load i8* %0
+  store i8 -87, i8* %0
+  %E12 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5>
+  %I14 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 1
+  %B15 = srem i64 %Sl10, 380809
+  %FC16 = sitofp i64 57247 to float
+  %Sl17 = select i1 false, double 0x87A9374869A78EC6, double 0.000000e+00
+  %Cmp18 = icmp uge i8 %L, %5
+  br i1 %Cmp18, label %CF, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF88, %CF
+  %L19 = load i8* %0
+  store i8 -101, i8* %0
+  %E20 = extractelement <4 x i64> zeroinitializer, i32 0
+  %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  %I22 = insertelement <4 x i64> zeroinitializer, i64 127438, i32 1
+  %B23 = fdiv double %Sl17, 0.000000e+00
+  %Sl24 = select i1 %Cmp18, i32 420510, i32 492085
+  %Cmp25 = icmp ugt i1 %Cmp18, false
+  br i1 %Cmp25, label %CF80, label %CF83
+
+CF83:                                             ; preds = %CF83, %CF80
+  %L26 = load i8* %0
+  store i8 -87, i8* %0
+  %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
+  %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
+  %I29 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 492085, i32 1
+  %B30 = lshr <8 x i8> %I8, %I8
+  %FC31 = sitofp <4 x i32> %Shuff28 to <4 x double>
+  %Sl32 = select i1 false, <8 x i8> %I8, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Cmp33 = icmp eq i64 %B, 116376
+  br i1 %Cmp33, label %CF83, label %CF88
+
+CF88:                                             ; preds = %CF83
+  %L34 = load i8* %0
+  store i8 -87, i8* %0
+  %E35 = extractelement <8 x i64> %Shuff, i32 7
+  %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
+  %I37 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 0
+  %B38 = xor <8 x i64> %B9, %B9
+  %ZE = zext i32 0 to i64
+  %Sl39 = select i1 %Cmp33, i8 %L11, i8 %L5
+  %Cmp40 = icmp sgt i1 %Cmp, false
+  br i1 %Cmp40, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF81, %CF85, %CF87, %CF88
+  %L41 = load i8* %0
+  store i8 %L34, i8* %0
+  %E42 = extractelement <8 x i64> %Shuff13, i32 6
+  %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7>
+  %I44 = insertelement <4 x i64> zeroinitializer, i64 116376, i32 3
+  %B45 = fsub float %FC16, 0x3AC86DCC40000000
+  %Tr = trunc <4 x i64> %I14 to <4 x i32>
+  %Sl46 = select i1 false, <8 x i64> %B38, <8 x i64> zeroinitializer
+  %Cmp47 = icmp sgt i1 %Cmp18, %Cmp18
+  br i1 %Cmp47, label %CF81, label %CF85
+
+CF85:                                             ; preds = %CF81
+  %L48 = load i8* %0
+  store i8 -101, i8* %0
+  %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2
+  %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+  %I51 = insertelement <4 x i64> zeroinitializer, i64 %E20, i32 3
+  %B52 = or i32 336955, %Sl24
+  %FC53 = uitofp i8 %L48 to double
+  %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24
+  %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer
+  %L56 = load i8* %0
+  store i8 %L11, i8* %0
+  %E57 = extractelement <4 x i64> %Shuff21, i32 1
+  %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2>
+  %I59 = insertelement <4 x i64> zeroinitializer, i64 %E42, i32 2
+  %B60 = udiv <8 x i8> %Sl, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %Tr61 = trunc i8 49 to i1
+  br i1 %Tr61, label %CF81, label %CF84
+
+CF84:                                             ; preds = %CF84, %CF85
+  %Sl62 = select i1 false, i8 %L, i8 %L48
+  %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer
+  %L64 = load i8* %0
+  store i8 %5, i8* %0
+  %E65 = extractelement <8 x i1> %Cmp55, i32 0
+  br i1 %E65, label %CF84, label %CF87
+
+CF87:                                             ; preds = %CF84
+  %Shuff66 = shufflevector <4 x i64> %Shuff21, <4 x i64> %I14, <4 x i32> <i32 3, i32 undef, i32 7, i32 1>
+  %I67 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %Sl54, i32 1
+  %B68 = frem double %B23, %Sl17
+  %ZE69 = zext <8 x i8> %Sl32 to <8 x i64>
+  %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12
+  %Cmp71 = icmp slt <8 x i64> %I, %Shuff
+  %L72 = load i8* %0
+  store i8 %L72, i8* %0
+  %E73 = extractelement <8 x i1> %Cmp55, i32 6
+  br i1 %E73, label %CF81, label %CF82
+
+CF82:                                             ; preds = %CF82, %CF87
+  %Shuff74 = shufflevector <4 x i32> %I67, <4 x i32> %I29, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
+  %I75 = insertelement <4 x i64> zeroinitializer, i64 380809, i32 3
+  %B76 = fsub double 0.000000e+00, %FC53
+  %Tr77 = trunc i32 %E to i8
+  %Sl78 = select i1 %Cmp18, i64* %A2, i64* %2
+  %Cmp79 = icmp eq i32 394647, 492085
+  br i1 %Cmp79, label %CF82, label %CF86
+
+CF86:                                             ; preds = %CF82
+  store i64 %Sl70, i64* %Sl78
+  store i64 %E57, i64* %Sl78
+  store i64 %Sl70, i64* %Sl78
+  store i64 %B, i64* %Sl78
+  store i64 %Sl10, i64* %Sl78
+  ret void
+}
author	Daniel Sanders <daniel.sanders@imgtec.com>	2013-11-21 15:03:54 +0000
committer	Daniel Sanders <daniel.sanders@imgtec.com>	2013-11-21 15:03:54 +0000
commit	4e2d2f091e88dd83d1685173d2c0692d8ae155e6 (patch)
tree	9e93aac3ffb4e24e7d1657fff20f394c63cfb995 /test/CodeGen
parent	9eaade8b56b124150d189e1786b14306cdaf5641 (diff)
download	llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.gz llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.bz2 llvm-4e2d2f091e88dd83d1685173d2c0692d8ae155e6.tar.xz