From 177cf1e1a3685209ab805f82897902a8d2b61661 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Thu, 31 May 2012 09:20:20 +0000 Subject: Added FMA3 Intel instructions. I disabled FMA3 autodetection, since the result may differ from expected for some benchmarks. I added tests for GodeGen and intrinsics. I did not change llvm.fma.f32/64 - it may be done later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157737 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/fma3-intrinsics.ll | 132 ++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100755 test/CodeGen/X86/fma3-intrinsics.ll (limited to 'test/CodeGen/X86/fma3-intrinsics.ll') diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll new file mode 100755 index 0000000000..183d1882b9 --- /dev/null +++ b/test/CodeGen/X86/fma3-intrinsics.ll @@ -0,0 +1,132 @@ +; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma3 | FileCheck %s + +define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fmadd132ss {{.*\(%r.*}}, %xmm + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fmadd132ps + %res = call <4 x float> @llvm.x86.fma4.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: fmadd132ps {{.*\(%r.*}}, %ymm + %res = call <8 x float> @llvm.x86.fma4.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma4.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + +define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fnmadd132ss {{.*\(%r.*}}, %xmm + %res = call <4 x float> @llvm.x86.fma4.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fnmadd132ps + %res = call <4 x float> @llvm.x86.fma4.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { + ; CHECK: fnmadd132ps {{.*\(%r.*}}, %ymm + %res = call <8 x float> @llvm.x86.fma4.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.fma4.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + + +define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fmsub132ss + %res = call <4 x float> @llvm.x86.fma4.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fmsub132ps + %res = call <4 x float> @llvm.x86.fma4.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fnmsub132ss + %res = call <4 x float> @llvm.x86.fma4.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { + ; CHECK: fnmsub132ps + %res = call <4 x float> @llvm.x86.fma4.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.fma4.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + +;;;; + +define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fmadd132sd + %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fmadd132pd + %res = call <2 x double> @llvm.x86.fma4.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fnmadd132sd + %res = call <2 x double> @llvm.x86.fma4.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fnmadd132pd + %res = call <2 x double> @llvm.x86.fma4.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + + + +define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fmsub132sd + %res = call <2 x double> @llvm.x86.fma4.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fmsub132pd + %res = call <2 x double> @llvm.x86.fma4.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fnmsub132sd + %res = call <2 x double> @llvm.x86.fma4.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { + ; CHECK: fnmsub132pd + %res = call <2 x double> @llvm.x86.fma4.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.fma4.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone -- cgit v1.2.3