From 2fd3e67dc6438cee5e32e0d7d7d42891df7edd96 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 28 Aug 2013 12:04:29 +0000 Subject: [mips][msa] Added load/store intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189476 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsMips.td | 52 ++++++++++++ lib/Target/Mips/MipsISelDAGToDAG.cpp | 6 ++ lib/Target/Mips/MipsISelDAGToDAG.h | 5 ++ lib/Target/Mips/MipsInstrInfo.td | 3 + lib/Target/Mips/MipsMSAInstrInfo.td | 109 +++++++++++++++++++----- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 14 ++++ lib/Target/Mips/MipsSEISelDAGToDAG.h | 3 + lib/Target/Mips/MipsSEISelLowering.cpp | 62 +++++++++++++- lib/Target/Mips/MipsSEISelLowering.h | 1 + test/CodeGen/Mips/msa/3r_ld_st.ll | 149 +++++++++++++++++++++++++++++++++ test/CodeGen/Mips/msa/i5_ld_st.ll | 149 +++++++++++++++++++++++++++++++++ 11 files changed, 532 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/Mips/msa/3r_ld_st.ll create mode 100644 test/CodeGen/Mips/msa/i5_ld_st.ll diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 6e03a19408..c8178a91ac 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -1200,6 +1200,32 @@ def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">, [llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; +def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; +def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; +def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; + +def int_mips_ldx_b : GCCBuiltin<"__builtin_msa_ldx_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; +def int_mips_ldx_h : GCCBuiltin<"__builtin_msa_ldx_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; +def int_mips_ldx_w : GCCBuiltin<"__builtin_msa_ldx_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; +def int_mips_ldx_d : GCCBuiltin<"__builtin_msa_ldx_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadArgMem]>; + def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">, @@ -1601,6 +1627,32 @@ def int_mips_srlri_w : GCCBuiltin<"__builtin_msa_srlri_w">, def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">, + Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">, + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">, + Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + +def int_mips_stx_b : GCCBuiltin<"__builtin_msa_stx_b">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +def int_mips_stx_h : GCCBuiltin<"__builtin_msa_stx_h">, + Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +def int_mips_stx_w : GCCBuiltin<"__builtin_msa_stx_w">, + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; +def int_mips_stx_d : GCCBuiltin<"__builtin_msa_stx_d">, + Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrReadWriteArgMem]>; + def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [Commutative]>; def int_mips_subs_s_h : GCCBuiltin<"__builtin_msa_subs_s_h">, diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 15e43d48cf..725d9b4686 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -69,6 +69,12 @@ bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, return false; } +bool MipsDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + llvm_unreachable("Unimplemented function."); + return false; +} + bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { llvm_unreachable("Unimplemented function."); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index a137a6080d..e98d590a27 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -57,6 +57,11 @@ private: virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + // Complex Pattern. + /// (reg + reg). + virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + /// Fall back on this function if all else fails. virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 5518b8c532..9b6c8575ee 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -376,6 +376,9 @@ def addr : def addrRegImm : ComplexPattern; +def addrRegReg : + ComplexPattern; + def addrDefault : ComplexPattern; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index eb95c7878a..53fceb79ef 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -466,10 +466,15 @@ class LD_H_ENC : MSA_I5_FMT<0b110, 0b01, 0b000111>; class LD_W_ENC : MSA_I5_FMT<0b110, 0b10, 0b000111>; class LD_D_ENC : MSA_I5_FMT<0b110, 0b11, 0b000111>; -class LDI_B_ENC : MSA_I10_FMT<0b010, 0b00, 0b001100>; -class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; -class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; -class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; +class LDI_B_ENC : MSA_I10_FMT<0b010, 0b00, 0b001100>; +class LDI_H_ENC : MSA_I10_FMT<0b010, 0b01, 0b001100>; +class LDI_W_ENC : MSA_I10_FMT<0b010, 0b10, 0b001100>; +class LDI_D_ENC : MSA_I10_FMT<0b010, 0b11, 0b001100>; + +class LDX_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001111>; +class LDX_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001111>; +class LDX_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001111>; +class LDX_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001111>; class MADD_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>; class MADD_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>; @@ -688,6 +693,11 @@ class ST_H_ENC : MSA_I5_FMT<0b111, 0b01, 0b000111>; class ST_W_ENC : MSA_I5_FMT<0b111, 0b10, 0b000111>; class ST_D_ENC : MSA_I5_FMT<0b111, 0b11, 0b000111>; +class STX_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001111>; +class STX_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001111>; +class STX_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001111>; +class STX_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001111>; + class SUBS_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010001>; class SUBS_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010001>; class SUBS_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010001>; @@ -1705,7 +1715,7 @@ class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, class LD_DESC_BASE { + Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm> { dag OutOperandList = (outs RCWD:$wd); dag InOperandList = (ins MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -1727,6 +1737,21 @@ class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, NoItinerary, MSA128D>; +class LDX_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); + list Pattern = [(set RCWD:$wd, (TyNode (OpNode Addr:$addr)))]; + InstrItinClass Itinerary = itin; +} + +class LDX_B_DESC : LDX_DESC_BASE<"ldx.b", load, v16i8, NoItinerary, MSA128B>; +class LDX_H_DESC : LDX_DESC_BASE<"ldx.h", load, v8i16, NoItinerary, MSA128H>; +class LDX_W_DESC : LDX_DESC_BASE<"ldx.w", load, v4i32, NoItinerary, MSA128W>; +class LDX_D_DESC : LDX_DESC_BASE<"ldx.d", load, v2i64, NoItinerary, MSA128D>; + class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h, NoItinerary, MSA128H, MSA128H>; class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w, @@ -2110,7 +2135,7 @@ class SRLRI_D_DESC : MSA_BIT_D_DESC_BASE<"srlri.d", int_mips_srlri_d, class ST_DESC_BASE { + Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm> { dag OutOperandList = (outs); dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); @@ -2118,12 +2143,26 @@ class ST_DESC_BASE; class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, NoItinerary, MSA128H>; class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, NoItinerary, MSA128W>; class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, NoItinerary, MSA128D>; +class STX_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins RCWD:$wd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$wd, $addr"); + list Pattern = [(OpNode (TyNode RCWD:$wd), Addr:$addr)]; + InstrItinClass Itinerary = itin; +} + +class STX_B_DESC : STX_DESC_BASE<"stx.b", store, v16i8, NoItinerary, MSA128B>; +class STX_H_DESC : STX_DESC_BASE<"stx.h", store, v8i16, NoItinerary, MSA128H>; +class STX_W_DESC : STX_DESC_BASE<"stx.w", store, v4i32, NoItinerary, MSA128W>; +class STX_D_DESC : STX_DESC_BASE<"stx.d", store, v2i64, NoItinerary, MSA128D>; + class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, NoItinerary, MSA128B, MSA128B>; class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, @@ -2628,6 +2667,11 @@ def LDI_B : LDI_B_ENC, LDI_B_DESC, Requires<[HasMSA]>; def LDI_H : LDI_H_ENC, LDI_H_DESC, Requires<[HasMSA]>; def LDI_W : LDI_W_ENC, LDI_W_DESC, Requires<[HasMSA]>; +def LDX_B: LDX_B_ENC, LDX_B_DESC, Requires<[HasMSA]>; +def LDX_H: LDX_H_ENC, LDX_H_DESC, Requires<[HasMSA]>; +def LDX_W: LDX_W_ENC, LDX_W_DESC, Requires<[HasMSA]>; +def LDX_D: LDX_D_ENC, LDX_D_DESC, Requires<[HasMSA]>; + def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC, Requires<[HasMSA]>; def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC, Requires<[HasMSA]>; @@ -2845,6 +2889,11 @@ def ST_H: ST_H_ENC, ST_H_DESC, Requires<[HasMSA]>; def ST_W: ST_W_ENC, ST_W_DESC, Requires<[HasMSA]>; def ST_D: ST_D_ENC, ST_D_DESC, Requires<[HasMSA]>; +def STX_B: STX_B_ENC, STX_B_DESC, Requires<[HasMSA]>; +def STX_H: STX_H_ENC, STX_H_DESC, Requires<[HasMSA]>; +def STX_W: STX_W_ENC, STX_W_DESC, Requires<[HasMSA]>; +def STX_D: STX_D_ENC, STX_D_DESC, Requires<[HasMSA]>; + def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC, Requires<[HasMSA]>; def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC, Requires<[HasMSA]>; def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC, Requires<[HasMSA]>; @@ -2888,19 +2937,39 @@ def XORI_B : XORI_B_ENC, XORI_B_DESC, Requires<[HasMSA]>; class MSAPat pred = [HasMSA]> : Pat, Requires; -def LD_FH : MSAPat<(v8f16 (load addr:$addr)), - (LD_H addr:$addr)>; -def LD_FW : MSAPat<(v4f32 (load addr:$addr)), - (LD_W addr:$addr)>; -def LD_FD : MSAPat<(v2f64 (load addr:$addr)), - (LD_D addr:$addr)>; - -def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr), - (ST_H MSA128H:$ws, addr:$addr)>; -def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr), - (ST_W MSA128W:$ws, addr:$addr)>; -def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr), - (ST_D MSA128D:$ws, addr:$addr)>; +def : MSAPat<(v16i8 (load addr:$addr)), (LD_B addr:$addr)>; +def : MSAPat<(v8i16 (load addr:$addr)), (LD_H addr:$addr)>; +def : MSAPat<(v4i32 (load addr:$addr)), (LD_W addr:$addr)>; +def : MSAPat<(v2i64 (load addr:$addr)), (LD_D addr:$addr)>; +def : MSAPat<(v8f16 (load addr:$addr)), (LD_H addr:$addr)>; +def : MSAPat<(v4f32 (load addr:$addr)), (LD_W addr:$addr)>; +def : MSAPat<(v2f64 (load addr:$addr)), (LD_D addr:$addr)>; + +def : MSAPat<(v8f16 (load addrRegImm:$addr)), (LD_H addrRegImm:$addr)>; +def : MSAPat<(v4f32 (load addrRegImm:$addr)), (LD_W addrRegImm:$addr)>; +def : MSAPat<(v2f64 (load addrRegImm:$addr)), (LD_D addrRegImm:$addr)>; + +def : MSAPat<(store (v16i8 MSA128B:$ws), addr:$addr), + (ST_B MSA128B:$ws, addr:$addr)>; +def : MSAPat<(store (v8i16 MSA128H:$ws), addr:$addr), + (ST_H MSA128H:$ws, addr:$addr)>; +def : MSAPat<(store (v4i32 MSA128W:$ws), addr:$addr), + (ST_W MSA128W:$ws, addr:$addr)>; +def : MSAPat<(store (v2i64 MSA128D:$ws), addr:$addr), + (ST_D MSA128D:$ws, addr:$addr)>; +def : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr), + (ST_H MSA128H:$ws, addr:$addr)>; +def : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr), + (ST_W MSA128W:$ws, addr:$addr)>; +def : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr), + (ST_D MSA128D:$ws, addr:$addr)>; + +def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addrRegImm:$addr), + (ST_H MSA128H:$ws, addrRegImm:$addr)>; +def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrRegImm:$addr), + (ST_W MSA128W:$ws, addrRegImm:$addr)>; +def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr), + (ST_D MSA128D:$ws, addrRegImm:$addr)>; class MSABitconvertPat preds = [HasMSA]> : diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 27fcaa3391..7161850079 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -316,6 +316,20 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, return false; } +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + return false; +} + bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { Base = Addr; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 9961934bd8..22e597ebf8 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -43,6 +43,9 @@ private: virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index dfa16c0a3d..5341277d2d 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -125,6 +125,9 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + computeRegisterProperties(); } @@ -174,6 +177,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); } return MipsTargetLowering::LowerOperation(Op, DAG); @@ -726,9 +730,24 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, } } +static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false, + false, false, 16); +} + SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { - switch (cast(Op->getOperand(1))->getZExtValue()) { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { default: return SDValue(); case Intrinsic::mips_extp: @@ -771,6 +790,47 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); case Intrinsic::mips_dpsqx_sa_w_ph: return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); + case Intrinsic::mips_ld_b: + case Intrinsic::mips_ld_h: + case Intrinsic::mips_ld_w: + case Intrinsic::mips_ld_d: + case Intrinsic::mips_ldx_b: + case Intrinsic::mips_ldx_h: + case Intrinsic::mips_ldx_w: + case Intrinsic::mips_ldx_d: + return lowerMSALoadIntr(Op, DAG, Intr); + } +} + +static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false, + false, 16); +} + +SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::mips_st_b: + case Intrinsic::mips_st_h: + case Intrinsic::mips_st_w: + case Intrinsic::mips_st_d: + case Intrinsic::mips_stx_b: + case Intrinsic::mips_stx_h: + case Intrinsic::mips_stx_w: + case Intrinsic::mips_stx_d: + return lowerMSAStoreIntr(Op, DAG, Intr); } } diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index d672677edc..de4309236c 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -63,6 +63,7 @@ namespace llvm { SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/test/CodeGen/Mips/msa/3r_ld_st.ll b/test/CodeGen/Mips/msa/3r_ld_st.ll new file mode 100644 index 0000000000..9b57675e21 --- /dev/null +++ b/test/CodeGen/Mips/msa/3r_ld_st.ll @@ -0,0 +1,149 @@ +; Test the MSA intrinsics that are encoded with the 3R instruction format and +; are loads or stores. + +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_ldx_b_ARG = global <16 x i8> , align 16 +@llvm_mips_ldx_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_ldx_b_test(i32 %a1) nounwind { +entry: + %0 = bitcast <16 x i8>* @llvm_mips_ldx_b_ARG to i8* + %1 = tail call <16 x i8> @llvm.mips.ldx.b(i8* %0, i32 %a1) + store <16 x i8> %1, <16 x i8>* @llvm_mips_ldx_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ldx.b(i8*, i32) nounwind + +; CHECK: llvm_mips_ldx_b_test: +; CHECK: ldx.b [[R1:\$w[0-9]+]], $4( +; CHECK: st.b +; CHECK: .size llvm_mips_ldx_b_test +; +@llvm_mips_ldx_h_ARG = global <8 x i16> , align 16 +@llvm_mips_ldx_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_ldx_h_test(i32 %a1) nounwind { +entry: + %0 = bitcast <8 x i16>* @llvm_mips_ldx_h_ARG to i8* + %1 = tail call <8 x i16> @llvm.mips.ldx.h(i8* %0, i32 %a1) + store <8 x i16> %1, <8 x i16>* @llvm_mips_ldx_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ldx.h(i8*, i32) nounwind + +; CHECK: llvm_mips_ldx_h_test: +; CHECK: ldx.h [[R1:\$w[0-9]+]], $4( +; CHECK: st.h +; CHECK: .size llvm_mips_ldx_h_test +; +@llvm_mips_ldx_w_ARG = global <4 x i32> , align 16 +@llvm_mips_ldx_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_ldx_w_test(i32 %a1) nounwind { +entry: + %0 = bitcast <4 x i32>* @llvm_mips_ldx_w_ARG to i8* + %1 = tail call <4 x i32> @llvm.mips.ldx.w(i8* %0, i32 %a1) + store <4 x i32> %1, <4 x i32>* @llvm_mips_ldx_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ldx.w(i8*, i32) nounwind + +; CHECK: llvm_mips_ldx_w_test: +; CHECK: ldx.w [[R1:\$w[0-9]+]], $4( +; CHECK: st.w +; CHECK: .size llvm_mips_ldx_w_test +; +@llvm_mips_ldx_d_ARG = global <2 x i64> , align 16 +@llvm_mips_ldx_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_ldx_d_test(i32 %a1) nounwind { +entry: + %0 = bitcast <2 x i64>* @llvm_mips_ldx_d_ARG to i8* + %1 = tail call <2 x i64> @llvm.mips.ldx.d(i8* %0, i32 %a1) + store <2 x i64> %1, <2 x i64>* @llvm_mips_ldx_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ldx.d(i8*, i32) nounwind + +; CHECK: llvm_mips_ldx_d_test: +; CHECK: ldx.d [[R1:\$w[0-9]+]], $4( +; CHECK: st.d +; CHECK: .size llvm_mips_ldx_d_test +; +@llvm_mips_stx_b_ARG = global <16 x i8> , align 16 +@llvm_mips_stx_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_stx_b_test(i32 %a1) nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_stx_b_ARG + %1 = bitcast <16 x i8>* @llvm_mips_stx_b_RES to i8* + tail call void @llvm.mips.stx.b(<16 x i8> %0, i8* %1, i32 %a1) + ret void +} + +declare void @llvm.mips.stx.b(<16 x i8>, i8*, i32) nounwind + +; CHECK: llvm_mips_stx_b_test: +; CHECK: ld.b +; CHECK: stx.b [[R1:\$w[0-9]+]], $4( +; CHECK: .size llvm_mips_stx_b_test +; +@llvm_mips_stx_h_ARG = global <8 x i16> , align 16 +@llvm_mips_stx_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_stx_h_test(i32 %a1) nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_stx_h_ARG + %1 = bitcast <8 x i16>* @llvm_mips_stx_h_RES to i8* + tail call void @llvm.mips.stx.h(<8 x i16> %0, i8* %1, i32 %a1) + ret void +} + +declare void @llvm.mips.stx.h(<8 x i16>, i8*, i32) nounwind + +; CHECK: llvm_mips_stx_h_test: +; CHECK: ld.h +; CHECK: stx.h [[R1:\$w[0-9]+]], $4( +; CHECK: .size llvm_mips_stx_h_test +; +@llvm_mips_stx_w_ARG = global <4 x i32> , align 16 +@llvm_mips_stx_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_stx_w_test(i32 %a1) nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_stx_w_ARG + %1 = bitcast <4 x i32>* @llvm_mips_stx_w_RES to i8* + tail call void @llvm.mips.stx.w(<4 x i32> %0, i8* %1, i32 %a1) + ret void +} + +declare void @llvm.mips.stx.w(<4 x i32>, i8*, i32) nounwind + +; CHECK: llvm_mips_stx_w_test: +; CHECK: ld.w +; CHECK: stx.w [[R1:\$w[0-9]+]], $4( +; CHECK: .size llvm_mips_stx_w_test +; +@llvm_mips_stx_d_ARG = global <2 x i64> , align 16 +@llvm_mips_stx_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_stx_d_test(i32 %a1) nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_stx_d_ARG + %1 = bitcast <2 x i64>* @llvm_mips_stx_d_RES to i8* + tail call void @llvm.mips.stx.d(<2 x i64> %0, i8* %1, i32 %a1) + ret void +} + +declare void @llvm.mips.stx.d(<2 x i64>, i8*, i32) nounwind + +; CHECK: llvm_mips_stx_d_test: +; CHECK: ld.d +; CHECK: stx.d [[R1:\$w[0-9]+]], $4( +; CHECK: .size llvm_mips_stx_d_test +; diff --git a/test/CodeGen/Mips/msa/i5_ld_st.ll b/test/CodeGen/Mips/msa/i5_ld_st.ll new file mode 100644 index 0000000000..7a26326fc0 --- /dev/null +++ b/test/CodeGen/Mips/msa/i5_ld_st.ll @@ -0,0 +1,149 @@ +; Test the MSA intrinsics that are encoded with the I5 instruction format and +; are loads or stores. + +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s + +@llvm_mips_ld_b_ARG = global <16 x i8> , align 16 +@llvm_mips_ld_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_ld_b_test() nounwind { +entry: + %0 = bitcast <16 x i8>* @llvm_mips_ld_b_ARG to i8* + %1 = tail call <16 x i8> @llvm.mips.ld.b(i8* %0, i32 16) + store <16 x i8> %1, <16 x i8>* @llvm_mips_ld_b_RES + ret void +} + +declare <16 x i8> @llvm.mips.ld.b(i8*, i32) nounwind + +; CHECK: llvm_mips_ld_b_test: +; CHECK: ld.b [[R1:\$w[0-9]+]], 16( +; CHECK: st.b +; CHECK: .size llvm_mips_ld_b_test +; +@llvm_mips_ld_h_ARG = global <8 x i16> , align 16 +@llvm_mips_ld_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_ld_h_test() nounwind { +entry: + %0 = bitcast <8 x i16>* @llvm_mips_ld_h_ARG to i8* + %1 = tail call <8 x i16> @llvm.mips.ld.h(i8* %0, i32 16) + store <8 x i16> %1, <8 x i16>* @llvm_mips_ld_h_RES + ret void +} + +declare <8 x i16> @llvm.mips.ld.h(i8*, i32) nounwind + +; CHECK: llvm_mips_ld_h_test: +; CHECK: ld.h [[R1:\$w[0-9]+]], 16( +; CHECK: st.h +; CHECK: .size llvm_mips_ld_h_test +; +@llvm_mips_ld_w_ARG = global <4 x i32> , align 16 +@llvm_mips_ld_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_ld_w_test() nounwind { +entry: + %0 = bitcast <4 x i32>* @llvm_mips_ld_w_ARG to i8* + %1 = tail call <4 x i32> @llvm.mips.ld.w(i8* %0, i32 16) + store <4 x i32> %1, <4 x i32>* @llvm_mips_ld_w_RES + ret void +} + +declare <4 x i32> @llvm.mips.ld.w(i8*, i32) nounwind + +; CHECK: llvm_mips_ld_w_test: +; CHECK: ld.w [[R1:\$w[0-9]+]], 16( +; CHECK: st.w +; CHECK: .size llvm_mips_ld_w_test +; +@llvm_mips_ld_d_ARG = global <2 x i64> , align 16 +@llvm_mips_ld_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_ld_d_test() nounwind { +entry: + %0 = bitcast <2 x i64>* @llvm_mips_ld_d_ARG to i8* + %1 = tail call <2 x i64> @llvm.mips.ld.d(i8* %0, i32 16) + store <2 x i64> %1, <2 x i64>* @llvm_mips_ld_d_RES + ret void +} + +declare <2 x i64> @llvm.mips.ld.d(i8*, i32) nounwind + +; CHECK: llvm_mips_ld_d_test: +; CHECK: ld.d [[R1:\$w[0-9]+]], 16( +; CHECK: st.d +; CHECK: .size llvm_mips_ld_d_test +; +@llvm_mips_st_b_ARG = global <16 x i8> , align 16 +@llvm_mips_st_b_RES = global <16 x i8> , align 16 + +define void @llvm_mips_st_b_test() nounwind { +entry: + %0 = load <16 x i8>* @llvm_mips_st_b_ARG + %1 = bitcast <16 x i8>* @llvm_mips_st_b_RES to i8* + tail call void @llvm.mips.st.b(<16 x i8> %0, i8* %1, i32 16) + ret void +} + +declare void @llvm.mips.st.b(<16 x i8>, i8*, i32) nounwind + +; CHECK: llvm_mips_st_b_test: +; CHECK: ld.b +; CHECK: st.b [[R1:\$w[0-9]+]], 16( +; CHECK: .size llvm_mips_st_b_test +; +@llvm_mips_st_h_ARG = global <8 x i16> , align 16 +@llvm_mips_st_h_RES = global <8 x i16> , align 16 + +define void @llvm_mips_st_h_test() nounwind { +entry: + %0 = load <8 x i16>* @llvm_mips_st_h_ARG + %1 = bitcast <8 x i16>* @llvm_mips_st_h_RES to i8* + tail call void @llvm.mips.st.h(<8 x i16> %0, i8* %1, i32 16) + ret void +} + +declare void @llvm.mips.st.h(<8 x i16>, i8*, i32) nounwind + +; CHECK: llvm_mips_st_h_test: +; CHECK: ld.h +; CHECK: st.h [[R1:\$w[0-9]+]], 16( +; CHECK: .size llvm_mips_st_h_test +; +@llvm_mips_st_w_ARG = global <4 x i32> , align 16 +@llvm_mips_st_w_RES = global <4 x i32> , align 16 + +define void @llvm_mips_st_w_test() nounwind { +entry: + %0 = load <4 x i32>* @llvm_mips_st_w_ARG + %1 = bitcast <4 x i32>* @llvm_mips_st_w_RES to i8* + tail call void @llvm.mips.st.w(<4 x i32> %0, i8* %1, i32 16) + ret void +} + +declare void @llvm.mips.st.w(<4 x i32>, i8*, i32) nounwind + +; CHECK: llvm_mips_st_w_test: +; CHECK: ld.w +; CHECK: st.w [[R1:\$w[0-9]+]], 16( +; CHECK: .size llvm_mips_st_w_test +; +@llvm_mips_st_d_ARG = global <2 x i64> , align 16 +@llvm_mips_st_d_RES = global <2 x i64> , align 16 + +define void @llvm_mips_st_d_test() nounwind { +entry: + %0 = load <2 x i64>* @llvm_mips_st_d_ARG + %1 = bitcast <2 x i64>* @llvm_mips_st_d_RES to i8* + tail call void @llvm.mips.st.d(<2 x i64> %0, i8* %1, i32 16) + ret void +} + +declare void @llvm.mips.st.d(<2 x i64>, i8*, i32) nounwind + +; CHECK: llvm_mips_st_d_test: +; CHECK: ld.d +; CHECK: st.d [[R1:\$w[0-9]+]], 16( +; CHECK: .size llvm_mips_st_d_test +; -- cgit v1.2.3