summaryrefslogtreecommitdiff
path: root/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2014-03-29 05:29:01 +0000
committerHal Finkel <hfinkel@anl.gov>2014-03-29 05:29:01 +0000
commit44b2b9dc1a6192fda90990ec9eec922e3f8d2049 (patch)
tree799b084ff01548b0c8e4e2a051363a6a4ac11fde /test/CodeGen/PowerPC
parentc06afdcb65acd3f1fb28ce6280fed3a2d0db764c (diff)
downloadllvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.gz
llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.bz2
llvm-44b2b9dc1a6192fda90990ec9eec922e3f8d2049.tar.xz
[PowerPC] Add subregister classes for f64 VSX values
We had stored both f64 values and v2f64, etc. values in the VSX registers. This worked, but was suboptimal because we would always spill 16-byte values even through we almost always had scalar 8-byte values. This resulted in an increase in stack-size use, extra memory bandwidth, etc. To fix this, I've added 64-bit subregisters of the Altivec registers, and combined those with the existing scalar floating-point registers to form a class of VSX scalar floating-point registers. The ABI code has also been enhanced to use this register class and some other necessary improvements have been made. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205075 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PowerPC')
-rw-r--r--test/CodeGen/PowerPC/vsx-fma-m.ll120
-rw-r--r--test/CodeGen/PowerPC/vsx-spill.ll49
2 files changed, 166 insertions, 3 deletions
diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll
index 1e123407d1..da4a20481e 100644
--- a/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-m.ll
@@ -64,7 +64,7 @@ entry:
ret void
; CHECK-LABEL: @test3
-; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
; CHECK-DAG: li [[C1:[0-9]+]], 24
; CHECK-DAG: li [[C2:[0-9]+]], 16
; CHECK-DAG: li [[C3:[0-9]+]], 8
@@ -80,7 +80,7 @@ entry:
; CHECK-DAG: stxsdx 2, 8, [[C1]]
; CHECK-DAG: stxsdx 1, 8, [[C2]]
; CHECK-DAG: stxsdx 4, 8, [[C3]]
-; CHECK-DAG: blr
+; CHECK: blr
}
define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
@@ -99,7 +99,7 @@ entry:
ret void
; CHECK-LABEL: @test4
-; CHECK-DAG: xxlor [[F1:[0-9]+]], 1, 1
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
; CHECK-DAG: li [[C1:[0-9]+]], 8
; CHECK-DAG: li [[C2:[0-9]+]], 16
; CHECK-DAG: xsmaddmdp 4, 2, 1
@@ -120,5 +120,119 @@ entry:
declare double @llvm.fma.f64(double, double, double) #0
+define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
+entry:
+ %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+ store <2 x double> %0, <2 x double>* %d, align 8
+ %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+ %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+ store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+ ret void
+
+; CHECK-LABEL: @testv1
+; CHECK-DAG: xvmaddmdp 36, 35, 34
+; CHECK-DAG: xvmaddadp 34, 35, 37
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 36, 0, 3
+; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
+; CHECK: blr
+}
+
+define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+ %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+ store <2 x double> %0, <2 x double>* %d, align 8
+ %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+ %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+ store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+ %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+ %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+ store <2 x double> %2, <2 x double>* %arrayidx2, align 8
+ ret void
+
+; CHECK-LABEL: @testv2
+; CHECK-DAG: xvmaddmdp 36, 35, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: stxvd2x 36, 0, 3
+; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
+; CHECK: blr
+}
+
+define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+ %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+ store <2 x double> %0, <2 x double>* %d, align 8
+ %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+ %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
+ %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3
+ store <2 x double> %2, <2 x double>* %arrayidx1, align 8
+ %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+ %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+ store <2 x double> %3, <2 x double>* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1
+ store <2 x double> %1, <2 x double>* %arrayidx3, align 8
+ ret void
+
+; CHECK-LABEL: @testv3
+; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 48
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: li [[C3:[0-9]+]], 16
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xvmaddadp [[V1]], 35, 36
+
+; CHECK-DAG: xvmaddmdp 35, 36, 37
+; CHECK-DAG: stxvd2x 32, 0, 3
+; CHECK-DAG: stxvd2x 35, 3, [[C1]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK-DAG: stxvd2x 37, 3, [[C3]]
+; CHECK: blr
+}
+
+define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
+entry:
+ %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
+ store <2 x double> %0, <2 x double>* %d, align 8
+ %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
+ %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+ store <2 x double> %1, <2 x double>* %arrayidx1, align 8
+ %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
+ %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3
+ store <2 x double> %2, <2 x double>* %arrayidx3, align 8
+ %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
+ %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2
+ store <2 x double> %3, <2 x double>* %arrayidx4, align 8
+ ret void
+
+; CHECK-LABEL: @testv4
+; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-DAG: xvmaddmdp 37, 35, 34
+; CHECK-DAG: li [[C1:[0-9]+]], 16
+; CHECK-DAG: li [[C2:[0-9]+]], 32
+; CHECK-DAG: xvmaddadp 34, 35, 38
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xvmaddadp [[V1]], 35, 36
+
+; CHECK-DAG: stxvd2x 32, 0, 3
+; CHECK-DAG: stxvd2x 37, 3, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 48
+; CHECK-DAG: xvmaddadp 37, 35, 36
+; CHECK-DAG: stxvd2x 37, 3, [[C3]]
+; CHECK-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK: blr
+}
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
+
attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/PowerPC/vsx-spill.ll b/test/CodeGen/PowerPC/vsx-spill.ll
new file mode 100644
index 0000000000..29bc6fcc71
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-spill.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @foo1(double %a) nounwind {
+entry:
+ call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
+ br label %return
+
+; CHECK: @foo1
+; CHECK: xxlor [[R1:[0-9]+]], 1, 1
+; CHECK: xxlor 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return: ; preds = %entry
+ ret double %a
+}
+
+define double @foo2(double %a) nounwind {
+entry:
+ %b = fadd double %a, %a
+ call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
+ br label %return
+
+; CHECK: @foo2
+; CHECK: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
+; CHECK: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return: ; preds = %entry
+ ret double %b
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+ call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31},~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() nounwind
+ br label %return
+
+; CHECK: @foo3
+; CHECK: stxsdx 1,
+; CHECK: lxsdx [[R1:[0-9]+]],
+; CHECK: xsadddp 1, [[R1]], [[R1]]
+; CHECK: blr
+
+return: ; preds = %entry
+ %b = fadd double %a, %a
+ ret double %b
+}
+