Fix load size for FMA4 SS/SD instructions. They need to use f32 and f64 size, but with the special handling to be compatible with the intrinsic expecting a vector. Similar handling is already used elsewhere.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147360 91177308-0d34-0410-b5e6-96231b3b80d8
author: Craig Topper <craig.topper@gmail.com> 2011-12-30 01:49:53 +0000
committer: Craig Topper <craig.topper@gmail.com> 2011-12-30 01:49:53 +0000
commit: 57d4b3315fb7a84379778c4727594cd7480dbfe1 (patch)
tree: 74526a95f6d729920af006a55e5b7453a27a42f4 /test/CodeGen/X86/fma4-intrinsics-x86_64.ll
parent: 2e95afa04cd1c89de7e4abaeead66cd8e51ec929 (diff)
download: llvm-57d4b3315fb7a84379778c4727594cd7480dbfe1.tar.gz
llvm-57d4b3315fb7a84379778c4727594cd7480dbfe1.tar.bz2
llvm-57d4b3315fb7a84379778c4727594cd7480dbfe1.tar.xz
1 files changed, 28 insertions, 0 deletions
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index bd94c134ce..a4b9cc6666 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -6,6 +6,20 @@ define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %
   %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
   ret < 4 x float > %res
 }
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
 declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
 define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
@@ -13,6 +27,20 @@ define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double
   %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
   ret < 2 x double > %res
 }
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
 declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
 define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
author	Craig Topper <craig.topper@gmail.com>	2011-12-30 01:49:53 +0000
committer	Craig Topper <craig.topper@gmail.com>	2011-12-30 01:49:53 +0000
commit	57d4b3315fb7a84379778c4727594cd7480dbfe1 (patch)
tree	74526a95f6d729920af006a55e5b7453a27a42f4 /test/CodeGen/X86/fma4-intrinsics-x86_64.ll
parent	2e95afa04cd1c89de7e4abaeead66cd8e51ec929 (diff)
download	llvm-57d4b3315fb7a84379778c4727594cd7480dbfe1.tar.gz llvm-57d4b3315fb7a84379778c4727594cd7480dbfe1.tar.bz2 llvm-57d4b3315fb7a84379778c4727594cd7480dbfe1.tar.xz