[PowerPC] Add subregister classes for f64 VSX values

We had stored both f64 values and v2f64, etc. values in the VSX registers. This worked, but was suboptimal because we would always spill 16-byte values even through we almost always had scalar 8-byte values. This resulted in an increase in stack-size use, extra memory bandwidth, etc. To fix this, I've added 64-bit subregisters of the Altivec registers, and combined those with the existing scalar floating-point registers to form a class of VSX scalar floating-point registers. The ABI code has also been enhanced to use this register class and some other necessary improvements have been made. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205075 91177308-0d34-0410-b5e6-96231b3b80d8
author: Hal Finkel <hfinkel@anl.gov> 2014-03-29 05:29:01 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2014-03-29 05:29:01 +0000
commit: 44b2b9dc1a6192fda90990ec9eec922e3f8d2049 (patch)
tree: 799b084ff01548b0c8e4e2a051363a6a4ac11fde /test/CodeGen/PowerPC
parent: c06afdcb65acd3f1fb28ce6280fed3a2d0db764c (diff)
download: llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.gz
llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.bz2
llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.xz
2 files changed, 166 insertions, 3 deletions
diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll
index 1e123407d1..da4a20481e 100644
--- a/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-m.ll
@@ -64,7 +64,7 @@ entry:
   ret void
 
 ; CHECK-LABEL: @test3
-; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
 ; CHECK-DAG: li [[C1:[0-9]+]], 24
 ; CHECK-DAG: li [[C2:[0-9]+]], 16
 ; CHECK-DAG: li [[C3:[0-9]+]], 8
@@ -80,7 +80,7 @@ entry:
 ; CHECK-DAG: stxsdx 2, 8, [[C1]]
 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
-; CHECK-DAG: blr
+; CHECK: blr
 }
 
 define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
@@ -99,7 +99,7 @@ entry:
   ret void
 
 ; CHECK-LABEL: @test4
-; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
 ; CHECK-DAG: li [[C1:[0-9]+]], 8
 ; CHECK-DAG: li [[C2:[0-9]+]], 16
 ; CHECK-DAG: xsmaddmdp 4, 2, 1
@@ -120,5 +120,119 @@ entry:
 
 declare double @llvm.fma.f64(double, double, double) #0
 
+define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+  ret void
+
+; CHECK-LABEL: @testv1
+; CHECK-DAG: xvmaddmdp 36, 35, 34
+; CHECK-DAG: xvmaddadp 34, 35, 37
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 36, 0, 3
+; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
+; CHECK: blr
+}
+
+define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+  %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+  store <2 x double> %2, <2 x double>* %arrayidx2, align 8
+  ret void
+
+; CHECK-LABEL: @testv2
+; CHECK-DAG: xvmaddmdp 36, 35, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: stxvd2x 36, 0, 3
+; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
+; CHECK: blr
+}
+
+define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3
+  store <2 x double> %2, <2 x double>* %arrayidx1, align 8
+  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+  %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+  store <2 x double> %3, <2 x double>* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx3, align 8
+  ret void
+
+; CHECK-LABEL: @testv3
+; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 48
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: li [[C3:[0-9]+]], 16
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xvmaddadp [[V1]], 35, 36
+
+; CHECK-DAG: xvmaddmdp 35, 36, 37
+; CHECK-DAG: stxvd2x 32, 0, 3
+; CHECK-DAG: stxvd2x 35, 3, [[C1]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK-DAG: stxvd2x 37, 3, [[C3]]
+; CHECK: blr
+}
+
+define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+  %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+  store <2 x double> %0, <2 x double>* %d, align 8
+  %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+  %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+  store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+  %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
+  %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3
+  store <2 x double> %2, <2 x double>* %arrayidx3, align 8
+  %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+  %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2
+  store <2 x double> %3, <2 x double>* %arrayidx4, align 8
+  ret void
+
+; CHECK-LABEL: @testv4
+; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xvmaddadp [[V1]], 35, 36
+
+; CHECK-DAG: stxvd2x 32, 0, 3
+; CHECK-DAG: stxvd2x 37, 3, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 48
+; CHECK-DAG: xvmaddadp 37, 35, 36
+; CHECK-DAG: stxvd2x 37, 3, [[C3]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK: blr
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+
 attributes #0 = { nounwind readnone }
 
diff --git a/test/CodeGen/PowerPC/vsx-spill.ll b/test/CodeGen/PowerPC/vsx-spill.ll
new file mode 100644
index 0000000000..29bc6fcc71
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-spill.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo1(double %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
+  br label %return
+
+; CHECK: @foo1
+; CHECK: xxlor [[R1:[0-9]+]], 1, 1
+; CHECK: xxlor 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return:                                           ; preds = %entry
+  ret double %a
+}
+
+define double @foo2(double %a) nounwind {
+entry:
+  %b = fadd double %a, %a
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
+  br label %return
+
+; CHECK: @foo2
+; CHECK: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
+; CHECK: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return:                                           ; preds = %entry
+  ret double %b
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+  call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31},~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind
+  br label %return
+
+; CHECK: @foo3
+; CHECK: stxsdx 1,
+; CHECK: lxsdx [[R1:[0-9]+]],
+; CHECK: xsadddp 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return:                                           ; preds = %entry
+  %b = fadd double %a, %a
+  ret double %b
+}
+
author	Hal Finkel <hfinkel@anl.gov>	2014-03-29 05:29:01 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2014-03-29 05:29:01 +0000
commit	44b2b9dc1a6192fda90990ec9eec922e3f8d2049 (patch)
tree	799b084ff01548b0c8e4e2a051363a6a4ac11fde /test/CodeGen/PowerPC
parent	c06afdcb65acd3f1fb28ce6280fed3a2d0db764c (diff)
download	llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.gz llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.bz2 llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.xz