diff options
author | Hal Finkel <hfinkel@anl.gov> | 2014-04-04 23:51:18 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2014-04-04 23:51:18 +0000 |
commit | e6a5b33e6e3e9626634f08f2dab8cbc0866e30b5 (patch) | |
tree | 48a6115dc1972a6184a8c04732dd3b94235975e8 /test/Analysis | |
parent | cef9f7ef271e9d95c8151ed8e683ef272b6b8c18 (diff) | |
download | llvm-e6a5b33e6e3e9626634f08f2dab8cbc0866e30b5.tar.gz llvm-e6a5b33e6e3e9626634f08f2dab8cbc0866e30b5.tar.bz2 llvm-e6a5b33e6e3e9626634f08f2dab8cbc0866e30b5.tar.xz |
[PowerPC] Adjust load/store costs in PPCTTI
This provides more realistic costs for the insert/extractelement instructions
(which are load/store pairs), accounts for the cheap unaligned Altivec load
sequence, and for unaligned VSX load/stores.
Bad news:
MultiSource/Applications/sgefa/sgefa - 35% slowdown (this will require more investigation)
SingleSource/Benchmarks/McGill/queens - 20% slowdown (we no longer vectorize this, but it was a constant store that was scalarized)
MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 - 2% slowdown
Good news:
SingleSource/Benchmarks/Shootout/ary3 - 54% speedup
SingleSource/Benchmarks/Shootout-C++/ary - 40% speedup
MultiSource/Benchmarks/Ptrdist/ks/ks - 35% speedup
MultiSource/Benchmarks/FreeBench/neural/neural - 30% speedup
MultiSource/Benchmarks/TSVC/Symbolics-flt/Symbolics-flt - 20% speedup
Unfortunately, estimating the costs of the stack-based scalarization sequences
is hard, and adjusting these costs is like a game of whac-a-mole :( I'll
revisit this again after we have better codegen for vector extloads and
truncstores and unaligned load/stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205658 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Analysis')
-rw-r--r-- | test/Analysis/CostModel/PowerPC/ext.ll | 2 | ||||
-rw-r--r-- | test/Analysis/CostModel/PowerPC/insert_extract.ll | 4 | ||||
-rw-r--r-- | test/Analysis/CostModel/PowerPC/load_store.ll | 5 |
3 files changed, 7 insertions, 4 deletions
diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll index daaa8f5bac..7d6a14e93c 100644 --- a/test/Analysis/CostModel/PowerPC/ext.ll +++ b/test/Analysis/CostModel/PowerPC/ext.ll @@ -13,7 +13,7 @@ define void @exts() { ; CHECK: cost of 1 {{.*}} sext %v3 = sext <4 x i16> undef to <4 x i32> - ; CHECK: cost of 216 {{.*}} sext + ; CHECK: cost of 112 {{.*}} sext %v4 = sext <8 x i16> undef to <8 x i32> ret void diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll index f51963d56f..8dc003153a 100644 --- a/test/Analysis/CostModel/PowerPC/insert_extract.ll +++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll @@ -3,13 +3,13 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "powerpc64-unknown-linux-gnu" define i32 @insert(i32 %arg) { - ; CHECK: cost of 13 {{.*}} insertelement + ; CHECK: cost of 10 {{.*}} insertelement %x = insertelement <4 x i32> undef, i32 %arg, i32 0 ret i32 undef } define i32 @extract(<4 x i32> %arg) { - ; CHECK: cost of 13 {{.*}} extractelement + ; CHECK: cost of 3 {{.*}} extractelement %x = extractelement <4 x i32> %arg, i32 0 ret i32 %x } diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll index 8145a1dc71..40862780fa 100644 --- a/test/Analysis/CostModel/PowerPC/load_store.ll +++ b/test/Analysis/CostModel/PowerPC/load_store.ll @@ -31,9 +31,12 @@ define i32 @loads(i32 %arg) { ; FIXME: There actually are sub-vector Altivec loads, and so we could handle ; this with a small expense, but we don't currently. - ; CHECK: cost of 60 {{.*}} load + ; CHECK: cost of 48 {{.*}} load load <4 x i16>* undef, align 2 + ; CHECK: cost of 1 {{.*}} load + load <4 x i32>* undef, align 4 + ret i32 undef } |