From f96f832a3ca939aba0098b3343f2f48aab49f6d0 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 31 Jan 2014 21:29:19 +0000 Subject: Replace X86 FMA intrinsic pseduo-instructions with def pats. It looks like these pseudos were only used for pattern matching. Def pats are the appropriate way to do that. As a bonus, these intrinsics will now have memory operands folded properly, and better FMA3 variants selected where appropriate (see r199933). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200577 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFMA.td | 47 +++++++++++++++++-------------------- lib/Target/X86/X86InstrInfo.cpp | 8 ------- test/CodeGen/X86/fma3-intrinsics.ll | 4 ++-- 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 206f7b600f..033033ca69 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -125,6 +125,7 @@ multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; + let mayLoad = 1 in def m : FMA3 opc, string OpcodeStr, X86MemOperand x86memop, (OpVT (OpNode RC:$src2, RC:$src1, (mem_frag addr:$src3))))]>; } - -multiclass fma3s_rm_int opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic IntId, - RegisterClass RC> { - let isCodeGenOnly = 1 in { - let isCommutable = 1 in - def r_Int : FMA3; - def m_Int : FMA3; - } // isCodeGenOnly -} } // Constraints = "$src1 = $dst" multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, - string OpStr, string PackTy, Intrinsic Int, + string OpStr, string PackTy, string PT2, Intrinsic Int, SDNode OpNode, RegisterClass RC, ValueType OpVT, X86MemOperand x86memop, Operand memop, PatFrag mem_frag, ComplexPattern mem_cpat> { @@ -169,18 +150,32 @@ let neverHasSideEffects = 1 in { } defm r213 : fma3s_rm, - fma3s_rm_int; + x86memop, RC, OpVT, mem_frag, OpNode>; } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, string OpStr, Intrinsic IntF32, Intrinsic IntF64, SDNode OpNode> { - defm SS : fma3s_forms; - defm SD : fma3s_forms, VEX_W; + + def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), + (COPY_TO_REGCLASS + (!cast(NAME#"SSr213r") + (COPY_TO_REGCLASS $src2, FR32), + (COPY_TO_REGCLASS $src1, FR32), + (COPY_TO_REGCLASS $src3, FR32)), + VR128)>; + + def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), + (COPY_TO_REGCLASS + (!cast(NAME#"SDr213r") + (COPY_TO_REGCLASS $src2, FR64), + (COPY_TO_REGCLASS $src1, FR64), + (COPY_TO_REGCLASS $src3, FR64)), + VR128)>; } defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2bf3972838..816804656e 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1274,8 +1274,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, - { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, - { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, @@ -1296,8 +1294,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, - { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, - { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, @@ -1318,8 +1314,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, - { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, - { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, @@ -1340,8 +1334,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, - { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, - { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll index e3910a6935..9a25096c7a 100644 --- a/test/CodeGen/X86/fma3-intrinsics.ll +++ b/test/CodeGen/X86/fma3-intrinsics.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { - ; CHECK: fmadd213ss %xmm + ; CHECK: fmadd213ss (%r8), %xmm %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind ret <4 x float> %res } @@ -24,7 +24,7 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { - ; CHECK: fnmadd213ss %xmm + ; CHECK: fnmadd213ss (%r8), %xmm %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind ret <4 x float> %res } -- cgit v1.2.3