summaryrefslogtreecommitdiff
path: root/test/Transforms/LoopVectorize/ARM
diff options
context:
space:
mode:
authorRenato Golin <renato.golin@linaro.org>2013-01-29 23:31:38 +0000
committerRenato Golin <renato.golin@linaro.org>2013-01-29 23:31:38 +0000
commit0261cea689c71a15175faf37fdc6bd1d9f69c46e (patch)
tree4171018f32ce26398acda35d0de58bfecd57beea /test/Transforms/LoopVectorize/ARM
parent76122f9c13e7b840687de2b8d5767c73dac02a3b (diff)
downloadllvm-0261cea689c71a15175faf37fdc6bd1d9f69c46e.tar.gz
llvm-0261cea689c71a15175faf37fdc6bd1d9f69c46e.tar.bz2
llvm-0261cea689c71a15175faf37fdc6bd1d9f69c46e.tar.xz
Adding simple cast cost to ARM
Changing ARMBaseTargetMachine to return ARMTargetLowering intead of the generic one (similar to x86 code). Tests showing which instructions were added to cast when necessary or cost zero when not. Downcast to 16 bits are not lowered in NEON, so costs are not there yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173849 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/LoopVectorize/ARM')
-rw-r--r--test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll114
1 files changed, 114 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
new file mode 100644
index 0000000000..d2e3de279f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s
+; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s
+; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM
+
+; ModuleID = 'arm.ll'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+%T216 = type <2 x i16>
+%T232 = type <2 x i32>
+%T264 = type <2 x i64>
+
+%T416 = type <4 x i16>
+%T432 = type <4 x i32>
+%T464 = type <4 x i64>
+
+define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'direct':
+ %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+ %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+ %r3 = mul %T432 %v0, %v1
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmul.i32
+ store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'ups1632':
+ %v0 = load %T416* %loadaddr
+; ASM: vldr
+ %v1 = load %T416* %loadaddr2
+; ASM: vldr
+ %r1 = sext %T416 %v0 to %T432
+ %r2 = sext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32>
+ %r3 = mul %T432 %r1, %r2
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.s16
+ store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+; COST: function 'upu1632':
+ %v0 = load %T416* %loadaddr
+; ASM: vldr
+ %v1 = load %T416* %loadaddr2
+; ASM: vldr
+ %r1 = zext %T416 %v0 to %T432
+ %r2 = zext %T416 %v1 to %T432
+; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32>
+ %r3 = mul %T432 %r1, %r2
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+; ASM: vmull.u16
+ store %T432 %r3, %T432* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'ups3264':
+ %v0 = load %T232* %loadaddr
+; ASM: vldr
+ %v1 = load %T232* %loadaddr2
+; ASM: vldr
+ %r3 = mul %T232 %v0, %v1
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+ %st = sext %T232 %r3 to %T264
+; ASM: vmovl.s32
+; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
+ store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+; COST: function 'upu3264':
+ %v0 = load %T232* %loadaddr
+; ASM: vldr
+ %v1 = load %T232* %loadaddr2
+; ASM: vldr
+ %r3 = mul %T232 %v0, %v1
+; ASM: vmul.i32
+; COST: cost of 1 for instruction: {{.*}} mul <2 x i32>
+ %st = zext %T232 %r3 to %T264
+; ASM: vmovl.u32
+; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
+ store %T264 %st, %T264* %storeaddr
+; ASM: vst1.64
+ ret void
+}
+
+define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+; COST: function 'dn3216':
+ %v0 = load %T432* %loadaddr
+; ASM: vld1.64
+ %v1 = load %T432* %loadaddr2
+; ASM: vld1.64
+ %r3 = mul %T432 %v0, %v1
+; ASM: vmul.i32
+; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
+ %st = trunc %T432 %r3 to %T416
+; ASM: vmovn.i32
+; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
+ store %T416 %st, %T416* %storeaddr
+; ASM: vstr
+ ret void
+}