diff options
Diffstat (limited to 'test')
268 files changed, 6 insertions, 27975 deletions
diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll index 2360e858b5..56f67873f8 100644 --- a/test/CodeGen/AArch64/128bit_load_store.ll +++ b/test/CodeGen/AArch64/128bit_load_store.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon| FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define void @test_store_f128(fp128* %ptr, fp128 %val) #0 { @@ -21,9 +20,6 @@ define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 { ; CHECK-ARM64-LABEL: test_vstrq_p128 ; CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-AARCH64-LABEL: test_vstrq_p128 -; CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] -; CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}] entry: %0 = bitcast i128* %ptr to fp128* %1 = bitcast i128 %val to fp128 @@ -35,9 +31,6 @@ define i128 @test_vldrq_p128(i128* readonly %ptr) #2 { ; CHECK-ARM64-LABEL: test_vldrq_p128 ; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-AARCH64-LABEL: test_vldrq_p128 -; CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] entry: %0 = bitcast i128* %ptr to fp128* %1 = load fp128* %0, align 16 diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll index 8742e45089..892573ba06 100644 --- a/test/CodeGen/AArch64/adc.ll +++ b/test/CodeGen/AArch64/adc.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll index f3fdbefb47..0a93edd829 100644 --- a/test/CodeGen/AArch64/addsub-shifted.ll +++ b/test/CodeGen/AArch64/addsub-shifted.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-apple-ios7.0 | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll index b64ad2a83d..3aa427c352 100644 --- a/test/CodeGen/AArch64/addsub.ll +++ b/test/CodeGen/AArch64/addsub.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-linux-gnu | FileCheck %s ; Note that this should be refactored (for efficiency if nothing else) diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll index d33933e922..cd01f594dc 100644 --- a/test/CodeGen/AArch64/addsub_ext.ll +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs %s -o - -mtriple=arm64-linux-gnu | FileCheck %s @var8 = global i8 0 diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll index f73365b20c..7cab200b1e 100644 --- a/test/CodeGen/AArch64/alloca.ll +++ b/test/CodeGen/AArch64/alloca.ll @@ -1,6 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-AARCH64 %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s declare void @use_addr(i8*) @@ -54,8 +52,6 @@ define i64 @test_alloca_with_local(i64 %n) { ; CHECK: bl use_addr %val = load i64* %loc -; CHECK-AARCH64: sub x[[TMP:[0-9]+]], x29, #[[LOC_FROM_FP]] -; CHECK-AARCH64: ldr x0, [x[[TMP]]] ; CHECK-ARM64: ldur x0, [x29, #-[[LOC_FROM_FP]]] @@ -68,13 +64,7 @@ define i64 @test_alloca_with_local(i64 %n) { define void @test_variadic_alloca(i64 %n, ...) { ; CHECK-LABEL: test_variadic_alloca: -; CHECK-AARCH64: sub sp, sp, #{{[0-9]+}} -; CHECK-AARCH64: add x29, sp, #192 -; CHECK-AARCH64: sub [[TMP:x[0-9]+]], x29, #192 -; CHECK-AARCH64: add x8, [[TMP]], #0 -; CHECK-AARCH64-FP: str q7, [x8, #112] ; [...] -; CHECK-AARCH64-FP: str q1, [x8, #16] ; CHECK-NOFP-AARCH64: sub sp, sp, #80 @@ -112,9 +102,6 @@ define void @test_variadic_alloca(i64 %n, ...) { ; CHECK: bl use_addr ret void -; CHECK-AARCH64: sub sp, x29, #192 -; CHECK-AARCH64: ldp x29, x30, [sp, #192] -; CHECK-AARCH64: add sp, sp, #208 ; CHECK-NOFP-AARCH64: sub sp, x29, #64 ; CHECK-NOFP-AARCH64: ldp x29, x30, [sp, #64] @@ -127,11 +114,6 @@ define void @test_variadic_alloca(i64 %n, ...) { define void @test_alloca_large_frame(i64 %n) { ; CHECK-LABEL: test_alloca_large_frame: -; CHECK-AARCH64: sub sp, sp, #496 -; CHECK-AARCH64: stp x29, x30, [sp, #480] -; CHECK-AARCH64: add x29, sp, #480 -; CHECK-AARCH64: sub sp, sp, #48 -; CHECK-AARCH64: sub sp, sp, #1953, lsl #12 ; CHECK-ARM64: stp x20, x19, [sp, #-32]! ; CHECK-ARM64: stp x29, x30, [sp, #16] @@ -145,9 +127,6 @@ define void @test_alloca_large_frame(i64 %n) { call void @use_addr_loc(i8* %addr1, i64* %addr2) ret void -; CHECK-AARCH64: sub sp, x29, #480 -; CHECK-AARCH64: ldp x29, x30, [sp, #480] -; CHECK-AARCH64: add sp, sp, #496 ; CHECK-ARM64: sub sp, x29, #16 ; CHECK-ARM64: ldp x29, x30, [sp, #16] diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll index b4fbf2edc4..1d4daec5f4 100644 --- a/test/CodeGen/AArch64/analyze-branch.ll +++ b/test/CodeGen/AArch64/analyze-branch.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s ; This test checks that LLVM can do basic stripping and reapplying of branches diff --git a/test/CodeGen/AArch64/andCmpBrToTBZ.ll b/test/CodeGen/AArch64/andCmpBrToTBZ.ll deleted file mode 100644 index f564a5587f..0000000000 --- a/test/CodeGen/AArch64/andCmpBrToTBZ.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: llc -O1 -march=aarch64 -enable-andcmp-sinking=true < %s | FileCheck %s -; arm64 has separate copy of this test - -; ModuleID = 'and-cbz-extr-mr.bc' -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" -target triple = "aarch64-none-linux-gnu" - -define zeroext i1 @foo(i1 %IsEditable, i1 %isTextField, i8* %str1, i8* %str2, i8* %str3, i8* %str4, i8* %str5, i8* %str6, i8* %str7, i8* %str8, i8* %str9, i8* %str10, i8* %str11, i8* %str12, i8* %str13, i32 %int1, i8* %str14) unnamed_addr #0 align 2 { -; CHECK: foo: -entry: - %tobool = icmp eq i8* %str14, null - br i1 %tobool, label %return, label %if.end - -; CHECK: %if.end -; CHECK: tbz -if.end: ; preds = %entry - %and.i.i.i = and i32 %int1, 4 - %tobool.i.i.i = icmp eq i32 %and.i.i.i, 0 - br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i - -land.rhs.i: ; preds = %if.end - %cmp.i.i.i = icmp eq i8* %str12, %str13 - br i1 %cmp.i.i.i, label %if.then3, label %lor.rhs.i.i.i - -lor.rhs.i.i.i: ; preds = %land.rhs.i - %cmp.i13.i.i.i = icmp eq i8* %str10, %str11 - br i1 %cmp.i13.i.i.i, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, label %if.end5 - -_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds = %lor.rhs.i.i.i - %cmp.i.i.i.i = icmp eq i8* %str8, %str9 - br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5 - -if.then3: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i - %tmp11 = load i8* %str14, align 8 - %tmp12 = and i8 %tmp11, 2 - %tmp13 = icmp ne i8 %tmp12, 0 - br label %return - -if.end5: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %lor.rhs.i.i.i -; CHECK: %if.end5 -; CHECK: tbz - br i1 %tobool.i.i.i, label %if.end12, label %land.rhs.i19 - -land.rhs.i19: ; preds = %if.end5 - %cmp.i.i.i18 = icmp eq i8* %str6, %str7 - br i1 %cmp.i.i.i18, label %if.then7, label %lor.rhs.i.i.i23 - -lor.rhs.i.i.i23: ; preds = %land.rhs.i19 - %cmp.i13.i.i.i22 = icmp eq i8* %str3, %str4 - br i1 %cmp.i13.i.i.i22, label %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, label %if.end12 - -_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28: ; preds = %lor.rhs.i.i.i23 - %cmp.i.i.i.i26 = icmp eq i8* %str1, %str2 - br i1 %cmp.i.i.i.i26, label %if.then7, label %if.end12 - -if.then7: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %land.rhs.i19 - br i1 %isTextField, label %if.then9, label %if.end12 - -if.then9: ; preds = %if.then7 - %tmp23 = load i8* %str5, align 8 - %tmp24 = and i8 %tmp23, 2 - %tmp25 = icmp ne i8 %tmp24, 0 - br label %return - -if.end12: ; preds = %if.then7, %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit28, %lor.rhs.i.i.i23, %if.end5, %if.end - %lnot = xor i1 %IsEditable, true - br label %return - -return: ; preds = %if.end12, %if.then9, %if.then3, %entry - %retval.0 = phi i1 [ %tmp13, %if.then3 ], [ %tmp25, %if.then9 ], [ %lnot, %if.end12 ], [ true, %entry ] - ret i1 %retval.0 -} - -attributes #0 = { nounwind ssp } diff --git a/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/test/CodeGen/AArch64/assertion-rc-mismatch.ll index f09203f221..bcf206ec9b 100644 --- a/test/CodeGen/AArch64/assertion-rc-mismatch.ll +++ b/test/CodeGen/AArch64/assertion-rc-mismatch.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s ; Test case related to <rdar://problem/15633429>. diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll index fc4db9097a..162430b9b7 100644 --- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll +++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s define i32 @foo(i32* %var, i1 %cond) { diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll index f8db05fd41..58ea735c80 100644 --- a/test/CodeGen/AArch64/atomic-ops.ll +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG @@ -502,8 +500,6 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb @@ -528,8 +524,6 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth @@ -555,8 +549,6 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, le @@ -581,8 +573,6 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, le @@ -607,8 +597,6 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-ARM64-NEXT: sxtb w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxtb @@ -634,8 +622,6 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], sxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-ARM64-NEXT: sxth w[[OLD_EXT:[0-9]+]], w[[OLD]] ; CHECK-ARM64-NEXT: cmp w[[OLD_EXT]], w0, sxth @@ -661,8 +647,6 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt @@ -687,8 +671,6 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt @@ -713,8 +695,6 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls @@ -739,8 +719,6 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls @@ -765,8 +743,6 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, ls @@ -791,8 +767,6 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, ls @@ -817,8 +791,6 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxtb -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxtb ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi @@ -843,8 +815,6 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]], uxth -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0, uxth ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi @@ -869,8 +839,6 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]] ; w0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp w0, w[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo ; CHECK-ARM64-NEXT: cmp w[[OLD]], w0 ; CHECK-ARM64-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi @@ -895,8 +863,6 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] ; x0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-AARCH64-NEXT: cmp x0, x[[OLD]] -; CHECK-AARCH64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo ; CHECK-ARM64-NEXT: cmp x[[OLD]], x0 ; CHECK-ARM64-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll index c63610bcca..2c69bee0d1 100644 --- a/test/CodeGen/AArch64/basic-pic.ll +++ b/test/CodeGen/AArch64/basic-pic.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s @var = global i32 0 diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll index 9272e1edfb..8959e1b695 100644 --- a/test/CodeGen/AArch64/bitfield-insert-0.ll +++ b/test/CodeGen/AArch64/bitfield-insert-0.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -disassemble - | FileCheck %s ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll index b67aa0fa23..8b0b4dafe6 100644 --- a/test/CodeGen/AArch64/bitfield-insert.ll +++ b/test/CodeGen/AArch64/bitfield-insert.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; First, a simple example from Clang. The registers could plausibly be @@ -64,8 +63,6 @@ define void @test_whole64(i64* %existing, i64* %new) { define void @test_whole32_from64(i64* %existing, i64* %new) { ; CHECK-LABEL: test_whole32_from64: -; CHECK-AARCH64: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16 -; CHECK-AARCH64-NOT: and ; CHECK-ARM64: bfxil {{x[0-9]+}}, {{x[0-9]+}}, #0, #16 @@ -88,7 +85,6 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) { ; CHECK-ARM64: and ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4 -; CHECK-AARCH64: and {{w[0-9]+}}, [[INSERT]], #0xff %oldval = load volatile i32* %existing %oldval_keep = and i32 %oldval, 135 ; = 0x87 @@ -107,7 +103,6 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) { ; CHECK-LABEL: test_64bit_masked: ; CHECK-ARM64: and ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8 -; CHECK-AARCH64: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000 %oldval = load volatile i64* %existing %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000 @@ -128,7 +123,6 @@ define void @test_32bit_complexmask(i32 *%existing, i32 *%new) { ; CHECK-ARM64: and ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4 -; CHECK-AARCH64: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} %oldval = load volatile i32* %existing %oldval_keep = and i32 %oldval, 647 ; = 0x287 diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll index 92f6d74908..71ffe30c92 100644 --- a/test/CodeGen/AArch64/bitfield.ll +++ b/test/CodeGen/AArch64/bitfield.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 @var32 = global i32 0 @@ -24,7 +23,6 @@ define void @test_extendb(i8 %var) { %uxt64 = zext i8 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-AARCH64: uxtb {{x[0-9]+}}, {{w[0-9]+}} ; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff ret void } @@ -49,7 +47,6 @@ define void @test_extendh(i16 %var) { %uxt64 = zext i16 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-AARCH64: uxth {{x[0-9]+}}, {{w[0-9]+}} ; CHECK-ARM64: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff ret void } @@ -63,7 +60,6 @@ define void @test_extendw(i32 %var) { %uxt64 = zext i32 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK-AARCH64: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32 ; CHECK-ARM64: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32 ret void } diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll index c22ce1caf4..0cbdd3988b 100644 --- a/test/CodeGen/AArch64/blockaddress.ll +++ b/test/CodeGen/AArch64/blockaddress.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -code-model=large -mtriple=arm64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll index 37cc8e42f1..5d92ef67d0 100644 --- a/test/CodeGen/AArch64/bool-loads.ll +++ b/test/CodeGen/AArch64/bool-loads.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s @var = global i1 0 diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll index 285c19ddee..137173bc4f 100644 --- a/test/CodeGen/AArch64/breg.ll +++ b/test/CodeGen/AArch64/breg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @stored_label = global i8* null diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll index 6a2832ceaa..9b04a8f979 100644 --- a/test/CodeGen/AArch64/callee-save.ll +++ b/test/CodeGen/AArch64/callee-save.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK-ARM64 @var = global float 0.0 diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll index b2b1fa7a57..0408e6f489 100644 --- a/test/CodeGen/AArch64/code-model-large-abs.ll +++ b/test/CodeGen/AArch64/code-model-large-abs.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -code-model=large -o - %s | FileCheck %s @var8 = global i8 0 diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll index 31b9829d8a..accbadd4d4 100644 --- a/test/CodeGen/AArch64/compare-branch.ll +++ b/test/CodeGen/AArch64/compare-branch.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/complex-copy-noneon.ll b/test/CodeGen/AArch64/complex-copy-noneon.ll index 137ea5f0ff..f65b116128 100644 --- a/test/CodeGen/AArch64/complex-copy-noneon.ll +++ b/test/CodeGen/AArch64/complex-copy-noneon.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=-neon < %s ; The DAG combiner decided to use a vector load/store for this struct copy diff --git a/test/CodeGen/AArch64/concatvector-bugs.ll b/test/CodeGen/AArch64/concatvector-bugs.ll deleted file mode 100644 index 8d167e42c7..0000000000 --- a/test/CodeGen/AArch64/concatvector-bugs.ll +++ /dev/null @@ -1,70 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault. -; Fix: Removed i8 type from FPR8 register class. - -; Not relevant to arm64. - -define void @test_concatvector_v8i8() { -entry.split: - br i1 undef, label %if.then, label %if.end - -if.then: ; preds = %entry.split - unreachable - -if.end: ; preds = %entry.split - br i1 undef, label %if.then9, label %if.end18 - -if.then9: ; preds = %if.end - unreachable - -if.end18: ; preds = %if.end - br label %for.body - -for.body: ; preds = %for.inc, %if.end18 - br i1 false, label %if.then30, label %for.inc - -if.then30: ; preds = %for.body - unreachable - -for.inc: ; preds = %for.body - br i1 undef, label %for.end, label %for.body - -for.end: ; preds = %for.inc - br label %for.body77 - -for.body77: ; preds = %for.body77, %for.end - br i1 undef, label %for.end106, label %for.body77 - -for.end106: ; preds = %for.body77 - br i1 undef, label %for.body130.us.us, label %stmt.for.body130.us.us - -stmt.for.body130.us.us: ; preds = %stmt.for.body130.us.us, %for.end106 - %_p_splat.us = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer - store <8 x i8> %_p_splat.us, <8 x i8>* undef, align 1 - br label %stmt.for.body130.us.us - -for.body130.us.us: ; preds = %for.body130.us.us, %for.end106 - br label %for.body130.us.us -} - -declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) - -define <8 x i16> @test_splat(i32 %l) nounwind { -; CHECK-LABEL: test_splat: -; CHECK: ret - %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 - %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) - %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %vec -} - - -define <8 x i16> @test_notsplat(<8 x i16> %a, <8 x i16> %b, i32 %l) nounwind { -; CHECK-LABEL: test_notsplat: -; CHECK: ret -entry: - %lhs = insertelement <1 x i32> undef, i32 %l, i32 0 - %shift = tail call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %lhs, i32 11) - %vec = shufflevector <1 x i16> %shift, <1 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0> - ret <8 x i16> %vec -} diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll index 2ee49a2f6e..96e11b12a1 100644 --- a/test/CodeGen/AArch64/cond-sel.ll +++ b/test/CodeGen/AArch64/cond-sel.ll @@ -1,6 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @var32 = global i32 0 @@ -47,7 +45,6 @@ define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %r ; CHECK-NOFP-NOT: fcmp %val2 = select i1 %tst2, i64 9, i64 15 store i64 %val2, i64* @var64 -; CHECK-AARCH64: movz x[[CONST15:[0-9]+]], #15 ; CHECK-ARM64: orr w[[CONST15:[0-9]+]], wzr, #0xf ; CHECK: movz {{[wx]}}[[CONST9:[0-9]+]], #{{9|0x9}} ; CHECK: csel [[MAYBETRUE:x[0-9]+]], x[[CONST9]], x[[CONST15]], eq diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll index 23c06be3a1..f0f36bd5ce 100644 --- a/test/CodeGen/AArch64/cpus.ll +++ b/test/CodeGen/AArch64/cpus.ll @@ -1,9 +1,5 @@ ; This tests that llc accepts all valid AArch64 CPUs -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll index cd9a863bd6..832a01046b 100644 --- a/test/CodeGen/AArch64/directcond.ll +++ b/test/CodeGen/AArch64/directcond.ll @@ -1,13 +1,9 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { ; CHECK-LABEL: test_select_i32: %val = select i1 %bit, i32 %a, i32 %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: csel w0, w1, w2, ne @@ -17,8 +13,6 @@ define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { ; CHECK-LABEL: test_select_i64: %val = select i1 %bit, i64 %a, i64 %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: csel x0, x1, x2, ne @@ -28,8 +22,6 @@ define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { define float @test_select_float(i1 %bit, float %a, float %b) { ; CHECK-LABEL: test_select_float: %val = select i1 %bit, float %a, float %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: fcsel s0, s0, s1, ne ; CHECK-NOFP-NOT: fcsel @@ -39,8 +31,6 @@ define float @test_select_float(i1 %bit, float %a, float %b) { define double @test_select_double(i1 %bit, double %a, double %b) { ; CHECK-LABEL: test_select_double: %val = select i1 %bit, double %a, double %b -; CHECK-AARCH64: movz [[ONE:w[0-9]+]], #1 -; CHECK-AARCH64: tst w0, [[ONE]] ; CHECK-ARM64: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NOFP-NOT: fcsel diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll index 433ce209a7..22bd4a844e 100644 --- a/test/CodeGen/AArch64/dp-3source.ll +++ b/test/CodeGen/AArch64/dp-3source.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) { diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll index 41ef195199..b09ce3668d 100644 --- a/test/CodeGen/AArch64/dp1.ll +++ b/test/CodeGen/AArch64/dp1.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll index 391418d755..71b3169637 100644 --- a/test/CodeGen/AArch64/dp2.ll +++ b/test/CodeGen/AArch64/dp2.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64 | FileCheck %s @var32_0 = global i32 0 diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll index 314a94dda1..02a085acf0 100644 --- a/test/CodeGen/AArch64/eliminate-trunc.ll +++ b/test/CodeGen/AArch64/eliminate-trunc.ll @@ -1,11 +1,7 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-apple-ios7.0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-ARM64 ; Check trunc i64 operation is translated as a subregister access ; eliminating an i32 induction varible. -; CHECK-AARCH64: add {{x[0-9]+}}, {{x[0-9]+}}, #1 -; CHECK-AARCH64-NOT: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK-AARCH64-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtw ; CHECK-ARM64-NOT: add {{x[0-9]+}}, {{x[0-9]+}}, #1 ; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll index 7c78f9a815..8f418455ff 100644 --- a/test/CodeGen/AArch64/extern-weak.ll +++ b/test/CodeGen/AArch64/extern-weak.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s --check-prefix=CHECK-AARCH64 -; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK-ARM64 ; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s @@ -9,10 +7,7 @@ define i32()* @foo() { ; The usual ADRP/ADD pair can't be used for a weak reference because it must ; evaluate to 0 if the symbol is undefined. We use a litpool entry. ret i32()* @var -; CHECK-AARCH64: .LCPI0_0: -; CHECK-AARCH64-NEXT: .xword var -; CHECK-AARCH64: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0] ; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:var ; CHECK-ARM64: ldr x0, [x[[ADDRHI]], :got_lo12:var] @@ -30,11 +25,7 @@ define i32()* @foo() { define i32* @bar() { %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5 -; CHECK-AARCH64: .LCPI1_0: -; CHECK-AARCH64-NEXT: .xword arr_var -; CHECK-AARCH64: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0] -; CHECK-AARCH64: add x0, [[BASE]], #20 ; CHECK-ARM64: adrp x[[ADDRHI:[0-9]+]], :got:arr_var ; CHECK-ARM64: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var] @@ -54,8 +45,6 @@ define i32* @bar() { define i32* @wibble() { ret i32* @defined_weak_var -; CHECK-AARCH64: adrp [[BASE:x[0-9]+]], defined_weak_var -; CHECK-AARCH64: add x0, [[BASE]], #:lo12:defined_weak_var ; CHECK-ARM64: adrp [[BASE:x[0-9]+]], defined_weak_var ; CHECK-ARM64: add x0, [[BASE]], :lo12:defined_weak_var diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll index f066b59af2..1fc9387fec 100644 --- a/test/CodeGen/AArch64/extract.ll +++ b/test/CodeGen/AArch64/extract.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i64 @ror_i64(i64 %in) { diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll index 97410aa502..09a6ae3ccd 100644 --- a/test/CodeGen/AArch64/fastcc-reserved.ll +++ b/test/CodeGen/AArch64/fastcc-reserved.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefix=CHECK-ARM64 ; This test is designed to be run in the situation where the diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll index fb9b4ac57e..b641de0ee2 100644 --- a/test/CodeGen/AArch64/fastcc.ll +++ b/test/CodeGen/AArch64/fastcc.ll @@ -1,6 +1,4 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-ARM64-TAIL -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-ARM64 %s ; Without tailcallopt fastcc still means the caller cleans up the diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll index fe2c3260a8..c54e3e6294 100644 --- a/test/CodeGen/AArch64/fcmp.ll +++ b/test/CodeGen/AArch64/fcmp.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s declare void @bar(i32) diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll index 5d7c83ebfb..40800d00e5 100644 --- a/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0 diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll index a85b02538e..d549c7e784 100644 --- a/test/CodeGen/AArch64/fcvt-int.ll +++ b/test/CodeGen/AArch64/fcvt-int.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i32 @test_floattoi32(float %in) { diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll index cae6856d79..667c05d165 100644 --- a/test/CodeGen/AArch64/flags-multiuse.ll +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll index 5d11d3f0e2..8c02787a23 100644 --- a/test/CodeGen/AArch64/floatdp_1source.ll +++ b/test/CodeGen/AArch64/floatdp_1source.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @varhalf = global half 0.0 diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll index 0a0933e0e9..8e98b784bb 100644 --- a/test/CodeGen/AArch64/floatdp_2source.ll +++ b/test/CodeGen/AArch64/floatdp_2source.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu -mcpu=cyclone | FileCheck %s @varfloat = global float 0.0 diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll index ed9f36d948..07cbb4919e 100644 --- a/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/test/CodeGen/AArch64/fp-cond-sel.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 @varfloat = global float 0.0 @@ -13,7 +12,6 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst1 = icmp ugt i32 %lhs32, %rhs32 %val1 = select i1 %tst1, float 0.0, float 1.0 store float %val1, float* @varfloat -; CHECK-AARCH64: ldr s[[FLT0:[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI ; CHECK-ARM64: movi v[[FLT0:[0-9]+]].2d, #0 ; CHECK: fmov s[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi @@ -22,7 +20,6 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst2 = icmp sle i64 %lhs64, %rhs64 %val2 = select i1 %tst2, double 1.0, double 0.0 store double %val2, double* @vardouble -; CHECK-AARCH64: ldr d[[FLT0:[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI ; FLT0 is reused from above on ARM64. ; CHECK: fmov d[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{d[0-9]+}}, d[[FLT1]], d[[FLT0]], le diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll index e6da7c8762..53113b5912 100644 --- a/test/CodeGen/AArch64/fp-dp3.ll +++ b/test/CodeGen/AArch64/fp-dp3.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -fp-contract=fast | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s -check-prefix=CHECK-NOFAST diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll index 91445e2c84..4b19deb976 100644 --- a/test/CodeGen/AArch64/fp128-folding.ll +++ b/test/CodeGen/AArch64/fp128-folding.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s declare void @bar(i8*, i8*, i32*) diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll deleted file mode 100644 index 56089e33e6..0000000000 --- a/test/CodeGen/AArch64/fp128.ll +++ /dev/null @@ -1,282 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; arm64 has a separate copy of this test. -@lhs = global fp128 zeroinitializer -@rhs = global fp128 zeroinitializer - -define fp128 @test_add() { -; CHECK-LABEL: test_add: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fadd fp128 %lhs, %rhs -; CHECK: bl __addtf3 - ret fp128 %val -} - -define fp128 @test_sub() { -; CHECK-LABEL: test_sub: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fsub fp128 %lhs, %rhs -; CHECK: bl __subtf3 - ret fp128 %val -} - -define fp128 @test_mul() { -; CHECK-LABEL: test_mul: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fmul fp128 %lhs, %rhs -; CHECK: bl __multf3 - ret fp128 %val -} - -define fp128 @test_div() { -; CHECK-LABEL: test_div: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - %val = fdiv fp128 %lhs, %rhs -; CHECK: bl __divtf3 - ret fp128 %val -} - -@var32 = global i32 0 -@var64 = global i64 0 - -define void @test_fptosi() { -; CHECK-LABEL: test_fptosi: - %val = load fp128* @lhs - - %val32 = fptosi fp128 %val to i32 - store i32 %val32, i32* @var32 -; CHECK: bl __fixtfsi - - %val64 = fptosi fp128 %val to i64 - store i64 %val64, i64* @var64 -; CHECK: bl __fixtfdi - - ret void -} - -define void @test_fptoui() { -; CHECK-LABEL: test_fptoui: - %val = load fp128* @lhs - - %val32 = fptoui fp128 %val to i32 - store i32 %val32, i32* @var32 -; CHECK: bl __fixunstfsi - - %val64 = fptoui fp128 %val to i64 - store i64 %val64, i64* @var64 -; CHECK: bl __fixunstfdi - - ret void -} - -define void @test_sitofp() { -; CHECK-LABEL: test_sitofp: - - %src32 = load i32* @var32 - %val32 = sitofp i32 %src32 to fp128 - store volatile fp128 %val32, fp128* @lhs -; CHECK: bl __floatsitf - - %src64 = load i64* @var64 - %val64 = sitofp i64 %src64 to fp128 - store volatile fp128 %val64, fp128* @lhs -; CHECK: bl __floatditf - - ret void -} - -define void @test_uitofp() { -; CHECK-LABEL: test_uitofp: - - %src32 = load i32* @var32 - %val32 = uitofp i32 %src32 to fp128 - store volatile fp128 %val32, fp128* @lhs -; CHECK: bl __floatunsitf - - %src64 = load i64* @var64 - %val64 = uitofp i64 %src64 to fp128 - store volatile fp128 %val64, fp128* @lhs -; CHECK: bl __floatunditf - - ret void -} - -define i1 @test_setcc1() { -; CHECK-LABEL: test_setcc1: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - -; Technically, everything after the call to __letf2 is redundant, but we'll let -; LLVM have its fun for now. - %val = fcmp ole fp128 %lhs, %rhs -; CHECK: bl __letf2 -; CHECK: cmp w0, #0 -; CHECK: cset w0, le - - ret i1 %val -; CHECK: ret -} - -define i1 @test_setcc2() { -; CHECK-LABEL: test_setcc2: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - -; Technically, everything after the call to __letf2 is redundant, but we'll let -; LLVM have its fun for now. - %val = fcmp ugt fp128 %lhs, %rhs -; CHECK: bl __gttf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[GT:w[0-9]+]], gt - -; CHECK: bl __unordtf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[UNORDERED:w[0-9]+]], ne - -; CHECK: orr w0, [[UNORDERED]], [[GT]] - - ret i1 %val -; CHECK: ret -} - -define i32 @test_br_cc() { -; CHECK-LABEL: test_br_cc: - - %lhs = load fp128* @lhs - %rhs = load fp128* @rhs -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] -; CHECK: ldr q1, [{{x[0-9]+}}, {{#?}}:lo12:rhs] - - ; olt == !uge, which LLVM unfortunately "optimizes" this to. - %cond = fcmp olt fp128 %lhs, %rhs -; CHECK: bl __getf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[OGE:w[0-9]+]], ge - -; CHECK: bl __unordtf2 -; CHECK: cmp w0, #0 -; CHECK: cset [[UNORDERED:w[0-9]+]], ne - -; CHECK: orr [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]] -; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]] - br i1 %cond, label %iftrue, label %iffalse - -iftrue: - ret i32 42 -; CHECK-NEXT: BB# -; CHECK-NEXT: movz {{x0|w0}}, #42 -; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]] - -iffalse: - ret i32 29 -; CHECK: [[RET29]]: -; CHECK-NEXT: movz {{x0|w0}}, #29 -; CHECK-NEXT: [[REALRET]]: -; CHECK: ret -} - -define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) { -; CHECK-LABEL: test_select: - - %val = select i1 %cond, fp128 %lhs, fp128 %rhs - store fp128 %val, fp128* @lhs -; CHECK-AARCH64: cmp {{w[0-9]+}}, #0 -; CHECK-AARCH64: str q1, [sp] -; CHECK-ARM64: tst {{w[0-9]+}}, #0x1 -; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: BB# -; CHECK-AARCH64-NEXT: str q0, [sp] -; CHECK-ARM64-NEXT: orr v[[DEST:[0-9]+]].16b, v0.16b, v0.16b -; CHECK-NEXT: [[IFFALSE]]: -; CHECK-AARCH64-NEXT: ldr q[[DEST:[0-9]+]], [sp] -; CHECK: str q[[DEST]], [{{x[0-9]+}}, {{#?}}:lo12:lhs] - ret void -; CHECK: ret -} - -@varfloat = global float 0.0 -@vardouble = global double 0.0 - -define void @test_round() { -; CHECK-LABEL: test_round: - - %val = load fp128* @lhs - - %float = fptrunc fp128 %val to float - store float %float, float* @varfloat -; CHECK: bl __trunctfsf2 -; CHECK: str s0, [{{x[0-9]+}}, {{#?}}:lo12:varfloat] - - %double = fptrunc fp128 %val to double - store double %double, double* @vardouble -; CHECK: bl __trunctfdf2 -; CHECK: str d0, [{{x[0-9]+}}, {{#?}}:lo12:vardouble] - - ret void -} - -define void @test_extend() { -; CHECK-LABEL: test_extend: - - %val = load fp128* @lhs - - %float = load float* @varfloat - %fromfloat = fpext float %float to fp128 - store volatile fp128 %fromfloat, fp128* @lhs -; CHECK: bl __extendsftf2 -; CHECK: str q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] - - %double = load double* @vardouble - %fromdouble = fpext double %double to fp128 - store volatile fp128 %fromdouble, fp128* @lhs -; CHECK: bl __extenddftf2 -; CHECK: str q0, [{{x[0-9]+}}, {{#?}}:lo12:lhs] - - ret void -; CHECK: ret -} - -define fp128 @test_neg(fp128 %in) { -; CHECK: [[MINUS0:.LCPI[0-9]+_0]]: -; Make sure the weird hex constant below *is* -0.0 -; CHECK-NEXT: fp128 -0 - -; CHECK-LABEL: test_neg: - - ; Could in principle be optimized to fneg which we can't select, this makes - ; sure that doesn't happen. - %ret = fsub fp128 0xL00000000000000008000000000000000, %in -; CHECK-AARCH64: str q0, [sp, #-16] -; CHECK-AARCH64-NEXT: ldr q1, [sp], #16 -; CHECK-ARM64: orr v1.16b, v0.16b, v0.16b -; CHECK: ldr q0, [{{x[0-9]+}}, {{#?}}:lo12:[[MINUS0]]] -; CHECK: bl __subtf3 - - ret fp128 %ret -; CHECK: ret -} diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll index e5aafb5456..e279d5b009 100644 --- a/test/CodeGen/AArch64/fpimm.ll +++ b/test/CodeGen/AArch64/fpimm.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @varf32 = global float 0.0 diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll index 78fc13b37e..85d95e21c9 100644 --- a/test/CodeGen/AArch64/frameaddr.ll +++ b/test/CodeGen/AArch64/frameaddr.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i8* @t() nounwind { diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll index 584ce2844d..d69105eec3 100644 --- a/test/CodeGen/AArch64/free-zext.ll +++ b/test/CodeGen/AArch64/free-zext.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i64 @test_free_zext(i8* %a, i16* %b) { diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index 5b3e6c89db..129ab25c87 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -1,7 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s @@ -67,8 +63,6 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st %val0 = load volatile i32* %addr0 ; Some weird move means x0 is used for one access -; CHECK-AARCH64: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16 -; CHECK-AARCH64: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12] ; CHECK-ARM64: ldr [[REG32:w[0-9]+]], [sp, #28] store i32 %val0, i32* @var32 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32] @@ -166,9 +160,7 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, ; Beware as above: the offset would be different on big-endian ; machines if the first ldr were changed to use s-registers. ; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] -; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp] ; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] -; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat] ret void } @@ -196,7 +188,6 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24] ; Important point is that we address sp+24 for second dword -; CHECK-AARCH64: ldr {{x[0-9]+}}, [sp, #16] ; CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] ret void diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 807bffe38a..8cb5f97e88 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -1,7 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s @@ -93,10 +89,6 @@ define void @check_stack_args() { ; Want to check that the final double is passed in registers and ; that varstruct is passed on the stack. Rather dependent on how a ; memcpy gets created, but the following works for now. -; CHECK-AARCH64: mov x[[SPREG:[0-9]+]], sp -; CHECK-AARCH64-DAG: str {{w[0-9]+}}, [x[[SPREG]]] -; CHECK-AARCH64-DAG: str {{w[0-9]+}}, [x[[SPREG]], #12] -; CHECK-AARCH64-DAG: fmov d0, ; CHECK-ARM64-DAG: str {{q[0-9]+}}, [sp] ; CHECK-ARM64-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 @@ -112,9 +104,6 @@ define void @check_stack_args() { call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0, float -2.0, float -8.0, float 16.0, float 1.0, float 64.0) -; CHECK-AARCH64: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:.LCPI -; CHECK-AARCH64: mov x0, sp -; CHECK-AARCH64: str d[[STACKEDREG]], [x0] ; CHECK-ARM64: movz [[SIXTY_FOUR:w[0-9]+]], #0x4280, lsl #16 ; CHECK-ARM64: str [[SIXTY_FOUR]], [sp] @@ -142,9 +131,6 @@ define void @check_i128_align() { i32 42, i128 %val) ; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128] ; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] -; CHECK-AARCH64: mov x[[SPREG:[0-9]+]], sp -; CHECK-AARCH64: str [[I128HI]], [x[[SPREG]], #24] -; CHECK-AARCH64: str [[I128LO]], [x[[SPREG]], #16] ; CHECK-ARM64: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK-ARM64-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] ; CHECK: bl check_i128_stackalign diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll index 36b74e5a57..2bf4a2cbce 100644 --- a/test/CodeGen/AArch64/global-alignment.ll +++ b/test/CodeGen/AArch64/global-alignment.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s @var32 = global [3 x i32] zeroinitializer diff --git a/test/CodeGen/AArch64/global_merge_1.ll b/test/CodeGen/AArch64/global_merge_1.ll deleted file mode 100644 index e0587d6b90..0000000000 --- a/test/CodeGen/AArch64/global_merge_1.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s - -@m = internal global i32 0, align 4 -@n = internal global i32 0, align 4 - -define void @f1(i32 %a1, i32 %a2) { -; CHECK-LABEL: f1: -; CHECK: adrp x{{[0-9]+}}, _MergedGlobals -; CHECK-NOT: adrp - store i32 %a1, i32* @m, align 4 - store i32 %a2, i32* @n, align 4 - ret void -} - -; CHECK: .local _MergedGlobals -; CHECK: .comm _MergedGlobals,8,8 - diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll index 216bfef7d5..c23edaf436 100644 --- a/test/CodeGen/AArch64/got-abuse.ll +++ b/test/CodeGen/AArch64/got-abuse.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s ; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj -o - %s diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll index fb363a9591..a1b4d6f5a4 100644 --- a/test/CodeGen/AArch64/i128-align.ll +++ b/test/CodeGen/AArch64/i128-align.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-apple-ios7.0 -verify-machineinstrs -o - %s | FileCheck %s %struct = type { i32, i128, i8 } diff --git a/test/CodeGen/AArch64/i128-shift.ll b/test/CodeGen/AArch64/i128-shift.ll deleted file mode 100644 index bfc9e3c093..0000000000 --- a/test/CodeGen/AArch64/i128-shift.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; arm64 has its own version of this in long-shift.ll. We'll just use that. - -define i128 @test_i128_lsl(i128 %a, i32 %shift) { -; CHECK-LABEL: test_i128_lsl: - - %sh_prom = zext i32 %shift to i128 - %shl = shl i128 %a, %sh_prom - -; CHECK: movz [[SIXTYFOUR:x[0-9]+]], #64 -; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw -; CHECK-NEXT: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[REVSHAMT]] -; CHECK: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[SHAMT:x[0-9]+]] -; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] -; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 -; CHECK-NEXT: lsl [[TMP3:x[0-9]+]], [[LO]], [[EXTRASHAMT]] -; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 -; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], [[TMP3]], [[FALSEVAL]], ge -; CHECK-NEXT: lsl [[TMP4:x[0-9]+]], [[LO]], [[SHAMT]] -; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], xzr, [[TMP4]], ge - - ret i128 %shl -} - -define i128 @test_i128_shr(i128 %a, i32 %shift) { -; CHECK-LABEL: test_i128_shr: - - %sh_prom = zext i32 %shift to i128 - %shr = lshr i128 %a, %sh_prom - -; CHECK: movz [[SIXTYFOUR]], #64 -; CHECK-NEXT: sub [[REVSHAMT:x[0-9]+]], [[SIXTYFOUR]], [[SHAMT_32:w[0-9]+]], uxtw -; CHECK-NEXT: lsl [[TMP2:x[0-9]+]], [[HI:x[0-9]+]], [[REVSHAMT]] -; CHECK: lsr [[TMP1:x[0-9]+]], [[LO:x[0-9]+]], [[SHAMT:x[0-9]+]] -; CHECK-NEXT: orr [[FALSEVAL:x[0-9]+]], [[TMP1]], [[TMP2]] -; CHECK-NEXT: sub [[EXTRASHAMT:x[0-9]+]], [[SHAMT]], #64 -; CHECK-NEXT: lsr [[TRUEVAL:x[0-9]+]], [[HI]], [[EXTRASHAMT]] -; CHECK-NEXT: cmp [[EXTRASHAMT]], #0 -; CHECK-NEXT: csel [[RESULTLO:x[0-9]+]], [[TRUEVAL]], [[FALSEVAL]], ge -; CHECK-NEXT: lsr [[TMP3:x[0-9]+]], [[HI]], [[SHAMT]] -; CHECK-NEXT: csel [[RESULTHI:x[0-9]+]], xzr, [[TMP3]], ge - - ret i128 %shr -} diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll index 49443d2409..8320f3ab04 100644 --- a/test/CodeGen/AArch64/illegal-float-ops.ll +++ b/test/CodeGen/AArch64/illegal-float-ops.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s @varfloat = global float 0.0 diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll index 22b7cc5cf9..d3ed363821 100644 --- a/test/CodeGen/AArch64/init-array.ll +++ b/test/CodeGen/AArch64/init-array.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll index 91921d5aa3..7ca9ade9cc 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll index cc4558fa54..6ffc05dcbd 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-apple-ios7.0 -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll index 8200633924..6bc633814c 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-none-linux-gnu -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll index e7b8173f6a..3c2f60c1f8 100644 --- a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll +++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll @@ -1,4 +1,3 @@ -; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s ; RUN: not llc -mtriple=arm64-apple-ios7.0 -o - %s define void @foo() { diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll deleted file mode 100644 index 365453c5fe..0000000000 --- a/test/CodeGen/AArch64/inline-asm-constraints.ll +++ /dev/null @@ -1,137 +0,0 @@ -;RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -no-integrated-as < %s | FileCheck %s - -define i64 @test_inline_constraint_r(i64 %base, i32 %offset) { -; CHECK-LABEL: test_inline_constraint_r: - %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset) -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw - ret i64 %val -} - -define i16 @test_small_reg(i16 %lhs, i16 %rhs) { -; CHECK-LABEL: test_small_reg: - %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs) -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth - ret i16 %val -} - -define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) { -; CHECK-LABEL: test_inline_constraint_r_imm: - %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12) -; CHECK: movz [[FOUR:x[0-9]+]], #4 -; CHECK: movz [[TWELVE:w[0-9]+]], #12 -; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw - ret i64 %val -} - -; m is permitted to have a base/offset form. We don't do that -; currently though. -define i32 @test_inline_constraint_m(i32 *%ptr) { -; CHECK-LABEL: test_inline_constraint_m: - %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr) -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] - ret i32 %val -} - -@arr = global [8 x i32] zeroinitializer - -; Q should *never* have base/offset form even if given the chance. -define i32 @test_inline_constraint_Q(i32 *%ptr) { -; CHECK-LABEL: test_inline_constraint_Q: - %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1)) -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] - ret i32 %val -} - -@dump = global fp128 zeroinitializer - -define void @test_inline_constraint_w(<8 x i8> %vec64, <4 x float> %vec128, half %hlf, float %flt, double %dbl, fp128 %quad) { -; CHECK: test_inline_constraint_w: - call <8 x i8> asm sideeffect "add $0.8b, $1.8b, $1.8b", "=w,w"(<8 x i8> %vec64) - call <8 x i8> asm sideeffect "fadd $0.4s, $1.4s, $1.4s", "=w,w"(<4 x float> %vec128) -; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - - ; Arguably semantically dodgy to output "vN", but it's what GCC does - ; so purely for compatibility we want vector registers to be output. - call float asm sideeffect "fcvt ${0:s}, ${1:h}", "=w,w"(half undef) - call float asm sideeffect "fadd $0.2s, $0.2s, $0.2s", "=w,w"(float %flt) - call double asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(double %dbl) - call fp128 asm sideeffect "fadd $0.2d, $0.2d, $0.2d", "=w,w"(fp128 %quad) -; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}} -; CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -; CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - ret void -} - -define void @test_inline_constraint_I() { -; CHECK-LABEL: test_inline_constraint_I: - call void asm sideeffect "add x0, x0, $0", "I"(i32 0) - call void asm sideeffect "add x0, x0, $0", "I"(i64 4095) -; CHECK: add x0, x0, #0 -; CHECK: add x0, x0, #4095 - - ret void -} - -; Skip J because it's useless - -define void @test_inline_constraint_K() { -; CHECK-LABEL: test_inline_constraint_K: - call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa - call void asm sideeffect "and w0, w0, $0", "K"(i32 65535) -; CHECK: and w0, w0, #-1431655766 -; CHECK: and w0, w0, #65535 - - ret void -} - -define void @test_inline_constraint_L() { -; CHECK-LABEL: test_inline_constraint_L: - call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa - call void asm sideeffect "and x0, x0, $0", "L"(i64 65535) -; CHECK: and x0, x0, #4294967296 -; CHECK: and x0, x0, #65535 - - ret void -} - -; Skip M and N because we don't support MOV pseudo-instructions yet. - -@var = global i32 0 - -define void @test_inline_constraint_S() { -; CHECK-LABEL: test_inline_constraint_S: - call void asm sideeffect "adrp x0, $0", "S"(i32* @var) - call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var) - call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var) -; CHECK: adrp x0, var -; CHECK: adrp x0, var -; CHECK: add x0, x0, #:lo12:var - ret void -} - -define i32 @test_inline_constraint_S_label(i1 %in) { -; CHECK-LABEL: test_inline_constraint_S_label: - call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc)) -; CHECK: adr x0, .Ltmp{{[0-9]+}} - br i1 %in, label %loc, label %loc2 -loc: - ret i32 0 -loc2: - ret i32 42 -} - -define void @test_inline_constraint_Y() { -; CHECK-LABEL: test_inline_constraint_Y: - call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0) -; CHECK: fcmp s0, #0.0 - ret void -} - -define void @test_inline_constraint_Z() { -; CHECK-LABEL: test_inline_constraint_Z: - call void asm sideeffect "cmp w0, $0", "Z"(i32 0) -; CHECK: cmp w0, #0 - ret void -} diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll deleted file mode 100644 index cb66335b10..0000000000 --- a/test/CodeGen/AArch64/inline-asm-modifiers.ll +++ /dev/null @@ -1,147 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -no-integrated-as < %s | FileCheck %s - -@var_simple = hidden global i32 0 -@var_got = global i32 0 -@var_tlsgd = thread_local global i32 0 -@var_tlsld = thread_local(localdynamic) global i32 0 -@var_tlsie = thread_local(initialexec) global i32 0 -@var_tlsle = thread_local(localexec) global i32 0 - -define void @test_inline_modifier_L() nounwind { -; CHECK-LABEL: test_inline_modifier_L: - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple) - call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got) - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd) - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld) - call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie) - call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle) -; CHECK: add x0, x0, #:lo12:var_simple -; CHECK: ldr x0, [x0, #:got_lo12:var_got] -; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd -; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld -; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie] -; CHECK: add x0, x0, #:tprel_lo12:var_tlsle - - call void asm sideeffect "add x0, x0, ${0:L}", "Si,~{x0}"(i32 64) - call void asm sideeffect "ldr x0, [x0, ${0:L}]", "Si,~{x0}"(i32 64) -; CHECK: add x0, x0, #64 -; CHECK: ldr x0, [x0, #64] - - ret void -} - -define void @test_inline_modifier_G() nounwind { -; CHECK-LABEL: test_inline_modifier_G: - call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld) - call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle) -; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12 -; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12 - - call void asm sideeffect "add x0, x0, ${0:G}", "Si,~{x0}"(i32 42) -; CHECK: add x0, x0, #42 - ret void -} - -define void @test_inline_modifier_A() nounwind { -; CHECK-LABEL: test_inline_modifier_A: - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple) - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got) - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd) - call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie) - ; N.b. All tprel and dtprel relocs are modified: lo12 or granules. -; CHECK: adrp x0, var_simple -; CHECK: adrp x0, :got:var_got -; CHECK: adrp x0, :tlsdesc:var_tlsgd -; CHECK: adrp x0, :gottprel:var_tlsie - - call void asm sideeffect "adrp x0, ${0:A}", "Si,~{x0}"(i32 40) -; CHECK: adrp x0, #40 - - ret void -} - -define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind { -; CHECK-LABEL: test_inline_modifier_wx: - call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small) - call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small) - call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small) -; CHECK: //APP -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - - call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big) - call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big) - call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big) -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - - call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0) - call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0) -; CHECK: add {{w[0-9]+}}, wzr, wzr -; CHECK: add {{x[0-9]+}}, xzr, xzr - - call i32 asm sideeffect "add ${0:w}, ${0:w}, ${1:w}", "=r,Ir,0"(i32 123, i32 %small) - call i64 asm sideeffect "add ${0:x}, ${0:x}, ${1:x}", "=r,Ir,0"(i32 456, i64 %big) -; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #123 -; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #456 - - ret void -} - -define void @test_inline_modifier_bhsdq() nounwind { -; CHECK-LABEL: test_inline_modifier_bhsdq: - call float asm sideeffect "ldr ${0:b}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:h}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:s}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:d}, [sp]", "=w"() - call float asm sideeffect "ldr ${0:q}, [sp]", "=w"() -; CHECK: ldr b0, [sp] -; CHECK: ldr h0, [sp] -; CHECK: ldr s0, [sp] -; CHECK: ldr d0, [sp] -; CHECK: ldr q0, [sp] - - call double asm sideeffect "ldr ${0:b}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:h}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:s}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:d}, [sp]", "=w"() - call double asm sideeffect "ldr ${0:q}, [sp]", "=w"() -; CHECK: ldr b0, [sp] -; CHECK: ldr h0, [sp] -; CHECK: ldr s0, [sp] -; CHECK: ldr d0, [sp] -; CHECK: ldr q0, [sp] - - call void asm sideeffect "fcmp b0, ${0:b}", "Yw"(float 0.0) - call void asm sideeffect "fcmp h0, ${0:h}", "Yw"(float 0.0) - call void asm sideeffect "fcmp s0, ${0:s}", "Yw"(float 0.0) - call void asm sideeffect "fcmp d0, ${0:d}", "Yw"(float 0.0) - call void asm sideeffect "fcmp q0, ${0:q}", "Yw"(float 0.0) -; CHECK: fcmp b0, #0 -; CHECK: fcmp h0, #0 -; CHECK: fcmp s0, #0 -; CHECK: fcmp d0, #0 -; CHECK: fcmp q0, #0 - - ret void -} - -define void @test_inline_modifier_c() nounwind { -; CHECK-LABEL: test_inline_modifier_c: - call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3) -; CHECK: adr x0, 3 - - ret void -} - -define void @test_inline_modifier_a() nounwind { -; CHECK-LABEL: test_inline_modifier_a: - call void asm sideeffect "prfm pldl1keep, ${0:a}", "r"(i32* @var_simple) -; CHECK: adrp [[VARHI:x[0-9]+]], var_simple -; CHECK: add x[[VARADDR:[0-9]+]], [[VARHI]], #:lo12:var_simple -; CHECK: prfm pldl1keep, [x[[VARADDR]]] - ret void -} - diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll index 87a42ba60a..a0fcafa451 100644 --- a/test/CodeGen/AArch64/jump-table.ll +++ b/test/CodeGen/AArch64/jump-table.ll @@ -1,6 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic <%s | FileCheck --check-prefix=CHECK-PIC %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -code-model=large -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -o - %s | FileCheck --check-prefix=CHECK-PIC %s diff --git a/test/CodeGen/AArch64/large-consts.ll b/test/CodeGen/AArch64/large-consts.ll index b1f98b9cf9..b5f6c32eef 100644 --- a/test/CodeGen/AArch64/large-consts.ll +++ b/test/CodeGen/AArch64/large-consts.ll @@ -1,14 +1,9 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s --check-prefix=CHECK-ARM64 ; Make sure the shift amount is encoded into the instructions by LLVM because ; it's not the linker's job to put it there. define double @foo() { -; CHECK-AARCH64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0 // encoding: [A,A,0xe0'A',0xd2'A'] -; CHECK-AARCH64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [A,A,0xc0'A',0xf2'A'] -; CHECK-AARCH64: movk [[CPADDR]], #:abs_g1_nc:.LCPI0_0 // encoding: [A,A,0xa0'A',0xf2'A'] -; CHECK-AARCH64: movk [[CPADDR]], #:abs_g0_nc:.LCPI0_0 // encoding: [A,A,0x80'A',0xf2'A'] ; CHECK-ARM64: movz [[CPADDR:x[0-9]+]], #:abs_g3:.LCPI0_0 // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] ; CHECK-ARM64: movk [[CPADDR]], #:abs_g2_nc:.LCPI0_0 // encoding: [0bAAA01000,A,0b110AAAAA,0xf2] diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll deleted file mode 100644 index 79dc6487f1..0000000000 --- a/test/CodeGen/AArch64/large-frame.ll +++ /dev/null @@ -1,120 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s -; arm64 has a separate copy: aarch64-large-frame.ll (codegen was too different). -declare void @use_addr(i8*) - -@addr = global i8* null - -define void @test_bigframe() { -; CHECK-LABEL: test_bigframe: -; CHECK: .cfi_startproc - - %var1 = alloca i8, i32 20000000 - %var2 = alloca i8, i32 16 - %var3 = alloca i8, i32 20000000 -; CHECK: sub sp, sp, #496 -; CHECK: .cfi_def_cfa sp, 496 -; CHECK: str x30, [sp, #488] - ; Total adjust is 39999536 -; CHECK: movz [[SUBCONST:x[0-9]+]], #22576 -; CHECK: movk [[SUBCONST]], #610, lsl #16 -; CHECK: sub sp, sp, [[SUBCONST]] -; CHECK: .cfi_def_cfa sp, 40000032 -; CHECK: .cfi_offset x30, -8 - - ; Total offset is 20000024 -; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544 -; CHECK: movk [[VAR1OFFSET]], #305, lsl #16 -; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]] - store volatile i8* %var1, i8** @addr - - %var1plus2 = getelementptr i8* %var1, i32 2 - store volatile i8* %var1plus2, i8** @addr - -; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528 -; CHECK: movk [[VAR2OFFSET]], #305, lsl #16 -; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]] - store volatile i8* %var2, i8** @addr - - %var2plus2 = getelementptr i8* %var2, i32 2 - store volatile i8* %var2plus2, i8** @addr - - store volatile i8* %var3, i8** @addr - - %var3plus2 = getelementptr i8* %var3, i32 2 - store volatile i8* %var3plus2, i8** @addr - -; CHECK: movz [[ADDCONST:x[0-9]+]], #22576 -; CHECK: movk [[ADDCONST]], #610, lsl #16 -; CHECK: add sp, sp, [[ADDCONST]] -; CHECK: .cfi_endproc - ret void -} - -define void @test_mediumframe() { -; CHECK-LABEL: test_mediumframe: - %var1 = alloca i8, i32 1000000 - %var2 = alloca i8, i32 16 - %var3 = alloca i8, i32 1000000 -; CHECK: sub sp, sp, #496 -; CHECK: str x30, [sp, #488] -; CHECK: sub sp, sp, #688 -; CHECK-NEXT: sub sp, sp, #488, lsl #12 - - store volatile i8* %var1, i8** @addr -; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600 -; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12 - - %var1plus2 = getelementptr i8* %var1, i32 2 - store volatile i8* %var1plus2, i8** @addr -; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2 - - store volatile i8* %var2, i8** @addr -; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584 -; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12 - - %var2plus2 = getelementptr i8* %var2, i32 2 - store volatile i8* %var2plus2, i8** @addr -; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2 - - store volatile i8* %var3, i8** @addr - - %var3plus2 = getelementptr i8* %var3, i32 2 - store volatile i8* %var3plus2, i8** @addr - -; CHECK: add sp, sp, #688 -; CHECK: add sp, sp, #488, lsl #12 -; CHECK: ldr x30, [sp, #488] -; CHECK: add sp, sp, #496 - ret void -} - - -@bigspace = global [8 x i64] zeroinitializer - -; If temporary registers are allocated for adjustment, they should *not* clobber -; argument registers. -define void @test_tempallocation([8 x i64] %val) nounwind { -; CHECK-LABEL: test_tempallocation: - %var = alloca i8, i32 1000000 -; CHECK: sub sp, sp, - -; Make sure the prologue is reasonably efficient -; CHECK-NEXT: stp x29, x30, [sp, -; CHECK-NEXT: stp x25, x26, [sp, -; CHECK-NEXT: stp x23, x24, [sp, -; CHECK-NEXT: stp x21, x22, [sp, -; CHECK-NEXT: stp x19, x20, [sp, - -; Make sure we don't trash an argument register -; CHECK-NOT: movz {{x[0-7],}} -; CHECK: sub sp, sp, - -; CHECK-NOT: movz {{x[0-7],}} - -; CHECK: bl use_addr - call void @use_addr(i8* %var) - - store [8 x i64] %val, [8 x i64]* @bigspace - ret void -; CHECK: ret -} diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll index 2b42d8ec08..b13634ca70 100644 --- a/test/CodeGen/AArch64/ldst-regoffset.ll +++ b/test/CodeGen/AArch64/ldst-regoffset.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll index 36944ba9a8..d738cfdaa2 100644 --- a/test/CodeGen/AArch64/ldst-unscaledimm.ll +++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll index b3359b34f0..d6475f9042 100644 --- a/test/CodeGen/AArch64/ldst-unsignedimm.ll +++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg deleted file mode 100644 index c6f83453ac..0000000000 --- a/test/CodeGen/AArch64/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -targets = set(config.root.targets_to_build.split()) -if 'AArch64' not in targets or 'ARM64' not in targets: - config.unsupported = True - diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll index 769a68bebc..6f9f3fc377 100644 --- a/test/CodeGen/AArch64/literal_pools_float.ll +++ b/test/CodeGen/AArch64/literal_pools_float.ll @@ -1,7 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -mcpu=cyclone | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-none-linux-gnu -code-model=large -mcpu=cyclone | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s diff --git a/test/CodeGen/AArch64/literal_pools_int.ll b/test/CodeGen/AArch64/literal_pools_int.ll deleted file mode 100644 index 33a73d58bb..0000000000 --- a/test/CodeGen/AArch64/literal_pools_int.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s -; arm64 does not use literal pools for integers so there is nothing to check. - -@var32 = global i32 0 -@var64 = global i64 0 - -define void @foo() { -; CHECK-LABEL: foo: - %val32 = load i32* @var32 - %val64 = load i64* @var64 - - %val32_lit32 = and i32 %val32, 123456785 - store volatile i32 %val32_lit32, i32* @var32 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]] - - %val64_lit32 = and i64 %val64, 305402420 - store volatile i64 %val64_lit32, i64* @var64 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]] - - %val64_lit32signed = and i64 %val64, -12345678 - store volatile i64 %val64_lit32signed, i64* @var64 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldrsw {{x[0-9]+}}, [x[[LITADDR]]] - - %val64_lit64 = and i64 %val64, 1234567898765432 - store volatile i64 %val64_lit64, i64* @var64 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] -; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] - -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]] -; CHECK-LARGE: ldr {{x[0-9]+}}, [x[[LITADDR]]] - - ret void -} diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll index 1a76d5bcc1..4518fa2102 100644 --- a/test/CodeGen/AArch64/local_vars.ll +++ b/test/CodeGen/AArch64/local_vars.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-AARCH64 %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP-ARM64 %s diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll index 3ae63ad16f..a5e4a9956d 100644 --- a/test/CodeGen/AArch64/logical-imm.ll +++ b/test/CodeGen/AArch64/logical-imm.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @var32 = global i32 0 diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll index 49b253bcfd..608d44fc9d 100644 --- a/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s @var1_32 = global i32 0 diff --git a/test/CodeGen/AArch64/mature-mc-support.ll b/test/CodeGen/AArch64/mature-mc-support.ll index 3359616fa8..2948da9f20 100644 --- a/test/CodeGen/AArch64/mature-mc-support.ll +++ b/test/CodeGen/AArch64/mature-mc-support.ll @@ -1,10 +1,8 @@ ; Test that inline assembly is parsed by the MC layer when MC support is mature ; (even when the output is assembly). -; RUN: not llc -mtriple=aarch64-pc-linux < %s > /dev/null 2> %t1 ; RUN: FileCheck %s < %t1 -; RUN: not llc -mtriple=aarch64-pc-linux -filetype=obj < %s > /dev/null 2> %t2 ; RUN: FileCheck %s < %t2 ; RUN: not llc -mtriple=arm64-pc-linux < %s > /dev/null 2> %t3 diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll deleted file mode 100644 index f80956e60f..0000000000 --- a/test/CodeGen/AArch64/misched-basic-A53.ll +++ /dev/null @@ -1,113 +0,0 @@ -; REQUIRES: asserts -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s -; arm64 now has a separate copy of this test. -; -; The Cortex-A53 machine model will cause the MADD instruction to be scheduled -; much higher than the ADD instructions in order to hide latency. When not -; specifying a subtarget, the MADD will remain near the end of the block. -; -; CHECK: ********** MI Scheduling ********** -; CHECK: main -; CHECK: *** Final schedule for BB#2 *** -; CHECK: SU(13) -; CHECK: MADDwwww -; CHECK: SU(4) -; CHECK: ADDwwi_lsl0_s -; CHECK: ********** INTERVALS ********** -@main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 -@main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 - -; Function Attrs: nounwind -define i32 @main() #0 { -entry: - %retval = alloca i32, align 4 - %x = alloca [8 x i32], align 4 - %y = alloca [8 x i32], align 4 - %i = alloca i32, align 4 - %xx = alloca i32, align 4 - %yy = alloca i32, align 4 - store i32 0, i32* %retval - %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) - %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) - store i32 0, i32* %xx, align 4 - store i32 0, i32* %yy, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %2 = load i32* %i, align 4 - %cmp = icmp slt i32 %2, 8 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %3 = load i32* %i, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom - %4 = load i32* %arrayidx, align 4 - %add = add nsw i32 %4, 1 - store i32 %add, i32* %xx, align 4 - %5 = load i32* %xx, align 4 - %add1 = add nsw i32 %5, 12 - store i32 %add1, i32* %xx, align 4 - %6 = load i32* %xx, align 4 - %add2 = add nsw i32 %6, 23 - store i32 %add2, i32* %xx, align 4 - %7 = load i32* %xx, align 4 - %add3 = add nsw i32 %7, 34 - store i32 %add3, i32* %xx, align 4 - %8 = load i32* %i, align 4 - %idxprom4 = sext i32 %8 to i64 - %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4 - %9 = load i32* %arrayidx5, align 4 - %10 = load i32* %yy, align 4 - %mul = mul nsw i32 %10, %9 - store i32 %mul, i32* %yy, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %11 = load i32* %i, align 4 - %inc = add nsw i32 %11, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %12 = load i32* %xx, align 4 - %13 = load i32* %yy, align 4 - %add6 = add nsw i32 %12, %13 - ret i32 %add6 -} - - -; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to -; hide latency. Whereas normally there would only be a single FADDvvv_4s -; after it, this test checks to make sure there are more than one. -; -; CHECK: ********** MI Scheduling ********** -; CHECK: neon4xfloat:BB#0 -; CHECK: *** Final schedule for BB#0 *** -; CHECK: FDIVvvv_4S -; CHECK: FADDvvv_4S -; CHECK: FADDvvv_4S -; CHECK: ********** INTERVALS ********** -define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { - %tmp1 = fadd <4 x float> %A, %B; - %tmp2 = fadd <4 x float> %A, %tmp1; - %tmp3 = fadd <4 x float> %A, %tmp2; - %tmp4 = fadd <4 x float> %A, %tmp3; - %tmp5 = fadd <4 x float> %A, %tmp4; - %tmp6 = fadd <4 x float> %A, %tmp5; - %tmp7 = fadd <4 x float> %A, %tmp6; - %tmp8 = fadd <4 x float> %A, %tmp7; - %tmp9 = fdiv <4 x float> %A, %B; - %tmp10 = fadd <4 x float> %tmp8, %tmp9; - - ret <4 x float> %tmp10 -} - -; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 - -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll index 876eb52df6..6fe000974d 100644 --- a/test/CodeGen/AArch64/movw-consts.ll +++ b/test/CodeGen/AArch64/movw-consts.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define i64 @test0() { @@ -10,49 +9,42 @@ define i64 @test0() { define i64 @test1() { ; CHECK-LABEL: test1: -; CHECK-AARCH64: movz x0, #1 ; CHECK-ARM64: orr w0, wzr, #0x1 ret i64 1 } define i64 @test2() { ; CHECK-LABEL: test2: -; CHECK-AARCH64: movz x0, #65535 ; CHECK-ARM64: orr w0, wzr, #0xffff ret i64 65535 } define i64 @test3() { ; CHECK-LABEL: test3: -; CHECK-AARCH64: movz x0, #1, lsl #16 ; CHECK-ARM64: orr w0, wzr, #0x10000 ret i64 65536 } define i64 @test4() { ; CHECK-LABEL: test4: -; CHECK-AARCH64: movz x0, #65535, lsl #16 ; CHECK-ARM64: orr w0, wzr, #0xffff0000 ret i64 4294901760 } define i64 @test5() { ; CHECK-LABEL: test5: -; CHECK-AARCH64: movz x0, #1, lsl #32 ; CHECK-ARM64: orr x0, xzr, #0x100000000 ret i64 4294967296 } define i64 @test6() { ; CHECK-LABEL: test6: -; CHECK-AARCH64: movz x0, #65535, lsl #32 ; CHECK-ARM64: orr x0, xzr, #0xffff00000000 ret i64 281470681743360 } define i64 @test7() { ; CHECK-LABEL: test7: -; CHECK-AARCH64: movz x0, #1, lsl #48 ; CHECK-ARM64: orr x0, xzr, #0x1000000000000 ret i64 281474976710656 } @@ -83,7 +75,6 @@ define i64 @test10() { define void @test11() { ; CHECK-LABEL: test11: -; CHECK-AARCH64: mov {{w[0-9]+}}, wzr ; CHECK-ARM64: str wzr store i32 0, i32* @var32 ret void @@ -91,7 +82,6 @@ define void @test11() { define void @test12() { ; CHECK-LABEL: test12: -; CHECK-AARCH64: movz {{w[0-9]+}}, #1 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x1 store i32 1, i32* @var32 ret void @@ -99,7 +89,6 @@ define void @test12() { define void @test13() { ; CHECK-LABEL: test13: -; CHECK-AARCH64: movz {{w[0-9]+}}, #65535 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff store i32 65535, i32* @var32 ret void @@ -107,7 +96,6 @@ define void @test13() { define void @test14() { ; CHECK-LABEL: test14: -; CHECK-AARCH64: movz {{w[0-9]+}}, #1, lsl #16 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0x10000 store i32 65536, i32* @var32 ret void @@ -115,7 +103,6 @@ define void @test14() { define void @test15() { ; CHECK-LABEL: test15: -; CHECK-AARCH64: movz {{w[0-9]+}}, #65535, lsl #16 ; CHECK-ARM64: orr {{w[0-9]+}}, wzr, #0xffff0000 store i32 4294901760, i32* @var32 ret void @@ -132,7 +119,6 @@ define i64 @test17() { ; CHECK-LABEL: test17: ; Mustn't MOVN w0 here. -; CHECK-AARCH64: movn x0, #2 ; CHECK-ARM64: orr x0, xzr, #0xfffffffffffffffd ret i64 -3 } diff --git a/test/CodeGen/AArch64/movw-shift-encoding.ll b/test/CodeGen/AArch64/movw-shift-encoding.ll index 8a0da4cb93..2fe9dd4516 100644 --- a/test/CodeGen/AArch64/movw-shift-encoding.ll +++ b/test/CodeGen/AArch64/movw-shift-encoding.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s --check-prefix=CHECK-ARM64 @var = global i32 0 @@ -8,10 +7,6 @@ define i32* @get_var() { ret i32* @var -; CHECK-AARCH64: movz x0, #:abs_g3:var // encoding: [A,A,0xe0'A',0xd2'A'] -; CHECK-AARCH64: movk x0, #:abs_g2_nc:var // encoding: [A,A,0xc0'A',0xf2'A'] -; CHECK-AARCH64: movk x0, #:abs_g1_nc:var // encoding: [A,A,0xa0'A',0xf2'A'] -; CHECK-AARCH64: movk x0, #:abs_g0_nc:var // encoding: [A,A,0x80'A',0xf2'A'] ; CHECK-ARM64: movz x0, #:abs_g3:var // encoding: [0bAAA00000,A,0b111AAAAA,0xd2] ; CHECK-ARM64: movk x0, #:abs_g2_nc:var // encoding: [0bAAA00000,A,0b110AAAAA,0xf2] diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll index 3b027f2d4f..0689fbdcc0 100644 --- a/test/CodeGen/AArch64/mul-lohi.ll +++ b/test/CodeGen/AArch64/mul-lohi.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s -; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s ; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s diff --git a/test/CodeGen/AArch64/named-reg-alloc.ll b/test/CodeGen/AArch64/named-reg-alloc.ll deleted file mode 100644 index 31d72f6be0..0000000000 --- a/test/CodeGen/AArch64/named-reg-alloc.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: not llc < %s -mtriple=aarch64-linux-gnueabi 2>&1 | FileCheck %s -; arm64 has separate copy of this test - -define i32 @get_stack() nounwind { -entry: -; FIXME: Include an allocatable-specific error message -; CHECK: Invalid register name global variable - %sp = call i32 @llvm.read_register.i32(metadata !0) - ret i32 %sp -} - -declare i32 @llvm.read_register.i32(metadata) nounwind - -!0 = metadata !{metadata !"x5\00"} diff --git a/test/CodeGen/AArch64/named-reg-notareg.ll b/test/CodeGen/AArch64/named-reg-notareg.ll deleted file mode 100644 index 66d013137c..0000000000 --- a/test/CodeGen/AArch64/named-reg-notareg.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: not llc < %s -mtriple=aarch64-linux-gnueabi 2>&1 | FileCheck %s -; arm64 has separate copy of this test - -define i32 @get_stack() nounwind { -entry: -; CHECK: Invalid register name global variable - %sp = call i32 @llvm.read_register.i32(metadata !0) - ret i32 %sp -} - -declare i32 @llvm.read_register.i32(metadata) nounwind - -!0 = metadata !{metadata !"notareg\00"} diff --git a/test/CodeGen/AArch64/neon-2velem-high.ll b/test/CodeGen/AArch64/neon-2velem-high.ll deleted file mode 100644 index ebdb5b7132..0000000000 --- a/test/CodeGen/AArch64/neon-2velem-high.ll +++ /dev/null @@ -1,331 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has copied test in its directory due to differing intrinsics. -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) - -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) - -declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) - -define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { -; CHECK: test_vmull_high_n_s16: -; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - ret <4 x i32> %vmull15.i.i -} - -define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { -; CHECK: test_vmull_high_n_s32: -; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - ret <2 x i64> %vmull9.i.i -} - -define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { -; CHECK: test_vmull_high_n_u16: -; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - ret <4 x i32> %vmull15.i.i -} - -define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { -; CHECK: test_vmull_high_n_u32: -; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - ret <2 x i64> %vmull9.i.i -} - -define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { -; CHECK: test_vqdmull_high_n_s16: -; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 - %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - ret <4 x i32> %vqdmull15.i.i -} - -define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { -; CHECK: test_vqdmull_high_n_s32: -; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 - %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - ret <2 x i64> %vqdmull9.i.i -} - -define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlal_high_n_s16: -; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlal_high_n_s32: -; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlal_high_n_u16: -; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlal_high_n_u32: -; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vqdmlal_high_n_s16: -; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) - ret <4 x i32> %vqdmlal17.i.i -} - -define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vqdmlal_high_n_s32: -; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) - ret <2 x i64> %vqdmlal11.i.i -} - -define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlsl_high_n_s16: -; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlsl_high_n_s32: -; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vmlsl_high_n_u16: -; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vmlsl_high_n_u32: -; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { -; CHECK: test_vqdmlsl_high_n_s16: -; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 - %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 - %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 - %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 - %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) - %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) - ret <4 x i32> %vqdmlsl17.i.i -} - -define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { -; CHECK: test_vqdmlsl_high_n_s32: -; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 - %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 - %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) - %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) - ret <2 x i64> %vqdmlsl11.i.i -} - -define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { -; CHECK: test_vmul_n_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -entry: - %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 - %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 - %mul.i = fmul <2 x float> %vecinit1.i, %a - ret <2 x float> %mul.i -} - -define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { -; CHECK: test_vmulq_n_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -entry: - %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 - %mul.i = fmul <4 x float> %vecinit3.i, %a - ret <4 x float> %mul.i -} - -define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { -; CHECK: test_vmulq_n_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -entry: - %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 - %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 - %mul.i = fmul <2 x double> %vecinit1.i, %a - ret <2 x double> %mul.i -} - -define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { -; CHECK: test_vfma_n_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { -; CHECK: test_vfmaq_n_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { -; CHECK: test_vfms_n_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 - %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b - %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) - ret <2 x float> %1 -} - -define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { -; CHECK: test_vfmsq_n_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] -entry: - %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 - %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 - %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 - %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 - %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b - %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) - ret <4 x float> %1 -} diff --git a/test/CodeGen/AArch64/neon-2velem.ll b/test/CodeGen/AArch64/neon-2velem.ll deleted file mode 100644 index b9d0e84f16..0000000000 --- a/test/CodeGen/AArch64/neon-2velem.ll +++ /dev/null @@ -1,2854 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has copied test in its directory due to differing intrinsics. - -declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>) - -declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>) - -declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>) - -declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) - -declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) - -declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) - -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmla_lane_s16: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlaq_lane_s16: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmla_lane_s32: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlaq_lane_s32: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmla_laneq_s16: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlaq_laneq_s16: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmla_laneq_s32: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlaq_laneq_s32: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmls_lane_s16: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsq_lane_s16: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmls_lane_s32: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsq_lane_s32: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmls_laneq_s16: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsq_laneq_s16: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmls_laneq_s32: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsq_laneq_s32: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_s16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_s16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_s32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_s32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_u16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_u16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_u32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_u32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_s16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_s16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_s32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_s32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_u16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_u16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_u32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_u32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfma_lane_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) - -define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmaq_lane_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) - -define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfma_laneq_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmaq_laneq_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfms_lane_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmsq_lane_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfms_laneq_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmsq_laneq_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { -; CHECK: test_vfmaq_lane_f64: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) - -define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmaq_laneq_f64: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { -; CHECK: test_vfmsq_lane_f64: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <1 x double> <double -0.000000e+00>, %v - %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmsq_laneq_f64: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v - %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmas_laneq_f32 -; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %extract = extractelement <4 x float> %v, i32 3 - %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) - ret float %0 -} - -declare float @llvm.fma.f32(float, float, float) - -define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { -; CHECK-LABEL: test_vfmsd_lane_f64 -; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %extract.rhs = extractelement <1 x double> %v, i32 0 - %extract = fsub double -0.000000e+00, %extract.rhs - %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) - ret double %0 -} - -declare double @llvm.fma.f64(double, double, double) - -define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { -; CHECK: test_vfmss_laneq_f32 -; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %extract.rhs = extractelement <4 x float> %v, i32 3 - %extract = fsub float -0.000000e+00, %extract.rhs - %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) - ret float %0 -} - -define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmsd_laneq_f64 -; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %extract.rhs = extractelement <2 x double> %v, i32 1 - %extract = fsub double -0.000000e+00, %extract.rhs - %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) - ret double %0 -} - -define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_s16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_s32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_s16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_s32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_s16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_s32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_s16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_s32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_s16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_s32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_s16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_s32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_s16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_s32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_s16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_s32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_u16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_u32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_u16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_u32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_u16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_u32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_u16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_u32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_u16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_u32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_u16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_u32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_u16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_u32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_u16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_u32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_s16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_s32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_u16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_u32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_s16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_s32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_u16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_u32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_s16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_s32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_u16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_u32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_s16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_s32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_u16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_u32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_lane_s16: -; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_lane_s32: -; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_high_lane_s16: -; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_high_lane_s32: -; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_lane_s16: -; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_lane_s32: -; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_high_lane_s16: -; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_high_lane_s32: -; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_lane_s16: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_lane_s32: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_laneq_s16: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_laneq_s32: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_high_lane_s16: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_high_lane_s32: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_high_laneq_s16: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_high_laneq_s32: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulh_lane_s16: -; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqdmulh2.i -} - -define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulhq_lane_s16: -; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> - %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqdmulh2.i -} - -define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulh_lane_s32: -; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqdmulh2.i -} - -define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulhq_lane_s32: -; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqdmulh2.i -} - -define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulh_lane_s16: -; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqrdmulh2.i -} - -define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulhq_lane_s16: -; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> - %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqrdmulh2.i -} - -define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulh_lane_s32: -; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqrdmulh2.i -} - -define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulhq_lane_s32: -; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqrdmulh2.i -} - -define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmul_lane_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { -; CHECK: test_vmul_lane_f64: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %0 = bitcast <1 x double> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to double - %extract = extractelement <1 x double> %v, i32 0 - %2 = fmul double %1, %extract - %3 = insertelement <1 x double> undef, double %2, i32 0 - ret <1 x double> %3 -} - -define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulq_lane_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { -; CHECK: test_vmulq_lane_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x double> %shuffle, %a - ret <2 x double> %mul -} - -define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmul_laneq_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { -; CHECK: test_vmul_laneq_f64: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %0 = bitcast <1 x double> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to double - %extract = extractelement <2 x double> %v, i32 1 - %2 = fmul double %1, %extract - %3 = insertelement <1 x double> undef, double %2, i32 0 - ret <1 x double> %3 -} - -define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulq_laneq_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulq_laneq_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> - %mul = fmul <2 x double> %shuffle, %a - ret <2 x double> %mul -} - -define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmulx_lane_f32: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulxq_lane_f32: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { -; CHECK: test_vmulxq_lane_f64: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - -define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmulx_laneq_f32: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulxq_laneq_f32: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulxq_laneq_f64: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - -define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmla_lane_s16_0: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlaq_lane_s16_0: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmla_lane_s32_0: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlaq_lane_s32_0: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmla_laneq_s16_0: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %add = add <4 x i16> %mul, %a - ret <4 x i16> %add -} - -define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlaq_laneq_s16_0: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %add = add <8 x i16> %mul, %a - ret <8 x i16> %add -} - -define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmla_laneq_s32_0: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %add = add <2 x i32> %mul, %a - ret <2 x i32> %add -} - -define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlaq_laneq_s32_0: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %add = add <4 x i32> %mul, %a - ret <4 x i32> %add -} - -define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmls_lane_s16_0: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsq_lane_s16_0: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmls_lane_s32_0: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsq_lane_s32_0: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmls_laneq_s16_0: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %b - %sub = sub <4 x i16> %a, %mul - ret <4 x i16> %sub -} - -define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsq_laneq_s16_0: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %b - %sub = sub <8 x i16> %a, %mul - ret <8 x i16> %sub -} - -define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmls_laneq_s32_0: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %b - %sub = sub <2 x i32> %a, %mul - ret <2 x i32> %sub -} - -define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsq_laneq_s32_0: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %b - %sub = sub <4 x i32> %a, %mul - ret <4 x i32> %sub -} - -define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_s16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_s16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_s32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_s32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmul_lane_u16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmulq_lane_u16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmul_lane_u32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmulq_lane_u32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_s16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_s16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_s32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_s32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmul_laneq_u16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i16> %shuffle, %a - ret <4 x i16> %mul -} - -define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmulq_laneq_u16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer - %mul = mul <8 x i16> %shuffle, %a - ret <8 x i16> %mul -} - -define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmul_laneq_u32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %mul = mul <2 x i32> %shuffle, %a - ret <2 x i32> %mul -} - -define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmulq_laneq_u32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer - %mul = mul <4 x i32> %shuffle, %a - ret <4 x i32> %mul -} - -define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfma_lane_f32_0: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmaq_lane_f32_0: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfma_laneq_f32_0: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmaq_laneq_f32_0: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK: test_vfms_lane_f32_0: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK: test_vfmsq_lane_f32_0: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK: test_vfms_laneq_f32_0: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) - ret <2 x float> %0 -} - -define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK: test_vfmsq_laneq_f32_0: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v - %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer - %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) - ret <4 x float> %0 -} - -define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmaq_laneq_f64_0: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK: test_vfmsq_laneq_f64_0: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v - %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer - %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) - ret <2 x double> %0 -} - -define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_s16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_s32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_s16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_s32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_s16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_s32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_s16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_s32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_s16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_s32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_s16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_s32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_s16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_s32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_s16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_s32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_lane_u16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_lane_u32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_laneq_u16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_laneq_u32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlal_high_lane_u16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlal_high_lane_u32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlal_high_laneq_u16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %add = add <4 x i32> %vmull2.i, %a - ret <4 x i32> %add -} - -define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlal_high_laneq_u32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %add = add <2 x i64> %vmull2.i, %a - ret <2 x i64> %add -} - -define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_lane_u16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_lane_u32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_laneq_u16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_laneq_u32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vmlsl_high_lane_u16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vmlsl_high_lane_u32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { -; CHECK: test_vmlsl_high_laneq_u16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %sub = sub <4 x i32> %a, %vmull2.i - ret <4 x i32> %sub -} - -define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { -; CHECK: test_vmlsl_high_laneq_u32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %sub = sub <2 x i64> %a, %vmull2.i - ret <2 x i64> %sub -} - -define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_s16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_s32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_lane_u16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_lane_u32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_s16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_s32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vmull_high_lane_u16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vmull_high_lane_u32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_s16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_s32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_laneq_u16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_laneq_u32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_s16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_s32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vmull_high_laneq_u16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vmull_high_laneq_u32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vmull2.i -} - -define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_lane_s16_0: -; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_lane_s32_0: -; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlal_high_lane_s16_0: -; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlal_high_lane_s32_0: -; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_lane_s16_0: -; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_lane_s32_0: -; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { -; CHECK: test_vqdmlsl_high_lane_s16_0: -; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { -; CHECK: test_vqdmlsl_high_lane_s32_0: -; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_lane_s16_0: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_lane_s32_0: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_laneq_s16_0: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_laneq_s32_0: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmull_high_lane_s16_0: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmull_high_lane_s32_0: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { -; CHECK: test_vqdmull_high_laneq_s16_0: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { -; CHECK: test_vqdmull_high_laneq_s32_0: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulh_lane_s16_0: -; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqdmulh2.i -} - -define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqdmulhq_lane_s16_0: -; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqdmulh2.i -} - -define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulh_lane_s32_0: -; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqdmulh2.i -} - -define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqdmulhq_lane_s32_0: -; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqdmulh2.i -} - -define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulh_lane_s16_0: -; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer - %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) - ret <4 x i16> %vqrdmulh2.i -} - -define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { -; CHECK: test_vqrdmulhq_lane_s16_0: -; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer - %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) - ret <8 x i16> %vqrdmulh2.i -} - -define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulh_lane_s32_0: -; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer - %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) - ret <2 x i32> %vqrdmulh2.i -} - -define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { -; CHECK: test_vqrdmulhq_lane_s32_0: -; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer - %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) - ret <4 x i32> %vqrdmulh2.i -} - -define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmul_lane_f32_0: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulq_lane_f32_0: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmul_laneq_f32_0: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x float> %shuffle, %a - ret <2 x float> %mul -} - -define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { -; CHECK: test_vmul_laneq_f64_0: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %0 = bitcast <1 x double> %a to <8 x i8> - %1 = bitcast <8 x i8> %0 to double - %extract = extractelement <2 x double> %v, i32 0 - %2 = fmul double %1, %extract - %3 = insertelement <1 x double> undef, double %2, i32 0 - ret <1 x double> %3 -} - -define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulq_laneq_f32_0: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %mul = fmul <4 x float> %shuffle, %a - ret <4 x float> %mul -} - -define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulq_laneq_f64_0: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %mul = fmul <2 x double> %shuffle, %a - ret <2 x double> %mul -} - -define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { -; CHECK: test_vmulx_lane_f32_0: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { -; CHECK: test_vmulxq_lane_f32_0: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { -; CHECK: test_vmulxq_lane_f64_0: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - -define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { -; CHECK: test_vmulx_laneq_f32_0: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) - ret <2 x float> %vmulx2.i -} - -define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { -; CHECK: test_vmulxq_laneq_f32_0: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer - %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) - ret <4 x float> %vmulx2.i -} - -define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { -; CHECK: test_vmulxq_laneq_f64_0: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -entry: - %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer - %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) - ret <2 x double> %vmulx2.i -} - diff --git a/test/CodeGen/AArch64/neon-3vdiff.ll b/test/CodeGen/AArch64/neon-3vdiff.ll deleted file mode 100644 index dbe2a726b9..0000000000 --- a/test/CodeGen/AArch64/neon-3vdiff.ll +++ /dev/null @@ -1,1834 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy of this test in its directory. - -declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) - -declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) - -declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) - -declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) - -declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) - -declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) - -declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) - -declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) - -declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) - -declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) - -declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) - -declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) - -declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) - -declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) - -declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) - -declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) - -declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) - -declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vaddl_s8: -; CHECK: saddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vaddl_s16: -; CHECK: saddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vaddl_s32: -; CHECK: saddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vaddl_u8: -; CHECK: uaddl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vaddl_u16: -; CHECK: uaddl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vaddl_u32: -; CHECK: uaddl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vaddl_high_s8: -; CHECK: saddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %add.i = add <8 x i16> %0, %1 - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddl_high_s16: -; CHECK: saddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %add.i = add <4 x i32> %0, %1 - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddl_high_s32: -; CHECK: saddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %add.i = add <2 x i64> %0, %1 - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vaddl_high_u8: -; CHECK: uaddl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %add.i = add <8 x i16> %0, %1 - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddl_high_u16: -; CHECK: uaddl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %add.i = add <4 x i32> %0, %1 - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddl_high_u32: -; CHECK: uaddl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %add.i = add <2 x i64> %0, %1 - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vaddw_s8: -; CHECK: saddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vaddw_s16: -; CHECK: saddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vaddw_s32: -; CHECK: saddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vaddw_u8: -; CHECK: uaddw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %b to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vaddw_u16: -; CHECK: uaddw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %b to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vaddw_u32: -; CHECK: uaddw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %b to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vaddw_high_s8: -; CHECK: saddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %add.i = add <8 x i16> %0, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vaddw_high_s16: -; CHECK: saddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %add.i = add <4 x i32> %0, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vaddw_high_s32: -; CHECK: saddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %add.i = add <2 x i64> %0, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vaddw_high_u8: -; CHECK: uaddw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %add.i = add <8 x i16> %0, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vaddw_high_u16: -; CHECK: uaddw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %add.i = add <4 x i32> %0, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vaddw_high_u32: -; CHECK: uaddw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %add.i = add <2 x i64> %0, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsubl_s8: -; CHECK: ssubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsubl_s16: -; CHECK: ssubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsubl_s32: -; CHECK: ssubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsubl_u8: -; CHECK: usubl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %a to <8 x i16> - %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsubl_u16: -; CHECK: usubl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %a to <4 x i32> - %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsubl_u32: -; CHECK: usubl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %a to <2 x i64> - %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsubl_high_s8: -; CHECK: ssubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %sub.i = sub <8 x i16> %0, %1 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubl_high_s16: -; CHECK: ssubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %sub.i = sub <4 x i32> %0, %1 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubl_high_s32: -; CHECK: ssubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %sub.i = sub <2 x i64> %0, %1 - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsubl_high_u8: -; CHECK: usubl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> - %sub.i = sub <8 x i16> %0, %1 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubl_high_u16: -; CHECK: usubl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> - %sub.i = sub <4 x i32> %0, %1 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubl_high_u32: -; CHECK: usubl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> - %sub.i = sub <2 x i64> %0, %1 - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vsubw_s8: -; CHECK: ssubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = sext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %a, %vmovl.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vsubw_s16: -; CHECK: ssubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = sext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %a, %vmovl.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vsubw_s32: -; CHECK: ssubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = sext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %a, %vmovl.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) { -; CHECK: test_vsubw_u8: -; CHECK: usubw {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8b -entry: - %vmovl.i.i = zext <8 x i8> %b to <8 x i16> - %sub.i = sub <8 x i16> %a, %vmovl.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) { -; CHECK: test_vsubw_u16: -; CHECK: usubw {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4h -entry: - %vmovl.i.i = zext <4 x i16> %b to <4 x i32> - %sub.i = sub <4 x i32> %a, %vmovl.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) { -; CHECK: test_vsubw_u32: -; CHECK: usubw {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2s -entry: - %vmovl.i.i = zext <2 x i32> %b to <2 x i64> - %sub.i = sub <2 x i64> %a, %vmovl.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vsubw_high_s8: -; CHECK: ssubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> - %sub.i = sub <8 x i16> %a, %0 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vsubw_high_s16: -; CHECK: ssubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> - %sub.i = sub <4 x i32> %a, %0 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vsubw_high_s32: -; CHECK: ssubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> - %sub.i = sub <2 x i64> %a, %0 - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) { -; CHECK: test_vsubw_high_u8: -; CHECK: usubw2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.16b -entry: - %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> - %sub.i = sub <8 x i16> %a, %0 - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) { -; CHECK: test_vsubw_high_u16: -; CHECK: usubw2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.8h -entry: - %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> - %sub.i = sub <4 x i32> %a, %0 - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) { -; CHECK: test_vsubw_high_u32: -; CHECK: usubw2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.4s -entry: - %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> - %sub.i = sub <2 x i64> %a, %0 - ret <2 x i64> %sub.i -} - -define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_s16: -; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i = add <8 x i16> %a, %b - %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> - ret <8 x i8> %vaddhn2.i -} - -define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_s32: -; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i = add <4 x i32> %a, %b - %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> - %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> - ret <4 x i16> %vaddhn2.i -} - -define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_s64: -; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i = add <2 x i64> %a, %b - %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> - %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> - ret <2 x i32> %vaddhn2.i -} - -define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_u16: -; CHECK: addhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i = add <8 x i16> %a, %b - %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> - ret <8 x i8> %vaddhn2.i -} - -define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_u32: -; CHECK: addhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i = add <4 x i32> %a, %b - %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> - %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> - ret <4 x i16> %vaddhn2.i -} - -define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_u64: -; CHECK: addhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i = add <2 x i64> %a, %b - %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> - %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> - ret <2 x i32> %vaddhn2.i -} - -define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_high_s16: -; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i.i = add <8 x i16> %a, %b - %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_high_s32: -; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i.i = add <4 x i32> %a, %b - %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> - %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_high_s64: -; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i.i = add <2 x i64> %a, %b - %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> - %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vaddhn_high_u16: -; CHECK: addhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vaddhn.i.i = add <8 x i16> %a, %b - %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vaddhn_high_u32: -; CHECK: addhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vaddhn.i.i = add <4 x i32> %a, %b - %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> - %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vaddhn_high_u64: -; CHECK: addhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vaddhn.i.i = add <2 x i64> %a, %b - %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> - %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_s16: -; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vraddhn2.i -} - -define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_s32: -; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vraddhn2.i -} - -define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_s64: -; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vraddhn2.i -} - -define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_u16: -; CHECK: raddhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vraddhn2.i -} - -define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_u32: -; CHECK: raddhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vraddhn2.i -} - -define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_u64: -; CHECK: raddhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vraddhn2.i -} - -define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_high_s16: -; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_high_s32: -; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_high_s64: -; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vraddhn_high_u16: -; CHECK: raddhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vraddhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vraddhn_high_u32: -; CHECK: raddhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vraddhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vraddhn_high_u64: -; CHECK: raddhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vraddhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_s16: -; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i = sub <8 x i16> %a, %b - %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> - ret <8 x i8> %vsubhn2.i -} - -define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_s32: -; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i = sub <4 x i32> %a, %b - %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> - %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> - ret <4 x i16> %vsubhn2.i -} - -define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_s64: -; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i = sub <2 x i64> %a, %b - %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> - %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> - ret <2 x i32> %vsubhn2.i -} - -define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_u16: -; CHECK: subhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i = sub <8 x i16> %a, %b - %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> - ret <8 x i8> %vsubhn2.i -} - -define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_u32: -; CHECK: subhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i = sub <4 x i32> %a, %b - %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> - %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> - ret <4 x i16> %vsubhn2.i -} - -define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_u64: -; CHECK: subhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i = sub <2 x i64> %a, %b - %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> - %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> - ret <2 x i32> %vsubhn2.i -} - -define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_high_s16: -; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i.i = sub <8 x i16> %a, %b - %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_high_s32: -; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i.i = sub <4 x i32> %a, %b - %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> - %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_high_s64: -; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i.i = sub <2 x i64> %a, %b - %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> - %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsubhn_high_u16: -; CHECK: subhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vsubhn.i.i = sub <8 x i16> %a, %b - %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsubhn_high_u32: -; CHECK: subhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vsubhn.i.i = sub <4 x i32> %a, %b - %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> - %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsubhn_high_u64: -; CHECK: subhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vsubhn.i.i = sub <2 x i64> %a, %b - %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> - %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_s16: -; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vrsubhn2.i -} - -define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_s32: -; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vrsubhn2.i -} - -define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_s64: -; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vrsubhn2.i -} - -define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_u16: -; CHECK: rsubhn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - ret <8 x i8> %vrsubhn2.i -} - -define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_u32: -; CHECK: rsubhn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - ret <4 x i16> %vrsubhn2.i -} - -define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_u64: -; CHECK: rsubhn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - ret <2 x i32> %vrsubhn2.i -} - -define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_high_s16: -; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_high_s32: -; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_high_s64: -; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsubhn_high_u16: -; CHECK: rsubhn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %vrsubhn2.i.i = tail call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) - %0 = bitcast <8 x i8> %r to <1 x i64> - %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> - ret <16 x i8> %2 -} - -define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsubhn_high_u32: -; CHECK: rsubhn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsubhn2.i.i = tail call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) - %0 = bitcast <4 x i16> %r to <1 x i64> - %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> - ret <8 x i16> %2 -} - -define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsubhn_high_u64: -; CHECK: rsubhn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %vrsubhn2.i.i = tail call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) - %0 = bitcast <2 x i32> %r to <1 x i64> - %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> - %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> - %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> - ret <4 x i32> %2 -} - -define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vabdl_s8: -; CHECK: sabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) - %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i -} - -define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vabdl_s16: -; CHECK: sabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) - %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i -} - -define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vabdl_s32: -; CHECK: sabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) - %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i -} - -define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vabdl_u8: -; CHECK: uabdl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) - %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i -} - -define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vabdl_u16: -; CHECK: uabdl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) - %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i -} - -define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vabdl_u32: -; CHECK: uabdl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) - %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i -} - -define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vabal_s8: -; CHECK: sabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vabal_s16: -; CHECK: sabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vabal_s32: -; CHECK: sabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vabal_u8: -; CHECK: uabal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - %add.i = add <8 x i16> %vmovl.i.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vabal_u16: -; CHECK: uabal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - %add.i = add <4 x i32> %vmovl.i.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vabal_u32: -; CHECK: uabal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - %add.i = add <2 x i64> %vmovl.i.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vabdl_high_s8: -; CHECK: sabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i.i -} - -define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vabdl_high_s16: -; CHECK: sabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i.i -} - -define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vabdl_high_s32: -; CHECK: sabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i.i -} - -define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vabdl_high_u8: -; CHECK: uabdl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vabd.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> - ret <8 x i16> %vmovl.i.i.i -} - -define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vabdl_high_u16: -; CHECK: uabdl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vabd2.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> - ret <4 x i32> %vmovl.i.i.i -} - -define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vabdl_high_u32: -; CHECK: uabdl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vabd2.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> - ret <2 x i64> %vmovl.i.i.i -} - -define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vabal_high_s8: -; CHECK: sabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> - %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vabal_high_s16: -; CHECK: sabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> - %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vabal_high_s32: -; CHECK: sabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> - %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vabal_high_u8: -; CHECK: uabal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vabd.i.i.i.i = tail call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> - %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vabal_high_u16: -; CHECK: uabal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vabd2.i.i.i.i = tail call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> - %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vabal_high_u32: -; CHECK: uabal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vabd2.i.i.i.i = tail call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> - %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vmull_s8: -; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) - ret <8 x i16> %vmull.i -} - -define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vmull_s16: -; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vmull_s32: -; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b) - ret <2 x i64> %vmull2.i -} - -define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vmull_u8: -; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) - ret <8 x i16> %vmull.i -} - -define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vmull_u16: -; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b) - ret <4 x i32> %vmull2.i -} - -define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vmull_u32: -; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b) - ret <2 x i64> %vmull2.i -} - -define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vmull_high_s8: -; CHECK: smull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - ret <8 x i16> %vmull.i.i -} - -define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vmull_high_s16: -; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - ret <4 x i32> %vmull2.i.i -} - -define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vmull_high_s32: -; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - ret <2 x i64> %vmull2.i.i -} - -define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vmull_high_u8: -; CHECK: umull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - ret <8 x i16> %vmull.i.i -} - -define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vmull_high_u16: -; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - ret <4 x i32> %vmull2.i.i -} - -define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vmull_high_u32: -; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - ret <2 x i64> %vmull2.i.i -} - -define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlal_s8: -; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) - %add.i = add <8 x i16> %vmull.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlal_s16: -; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) - %add.i = add <4 x i32> %vmull2.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlal_s32: -; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) - %add.i = add <2 x i64> %vmull2.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlal_u8: -; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) - %add.i = add <8 x i16> %vmull.i.i, %a - ret <8 x i16> %add.i -} - -define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlal_u16: -; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) - %add.i = add <4 x i32> %vmull2.i.i, %a - ret <4 x i32> %add.i -} - -define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlal_u32: -; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) - %add.i = add <2 x i64> %vmull2.i.i, %a - ret <2 x i64> %add.i -} - -define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlal_high_s8: -; CHECK: smlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %add.i.i = add <8 x i16> %vmull.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlal_high_s16: -; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlal_high_s32: -; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlal_high_u8: -; CHECK: umlal2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %add.i.i = add <8 x i16> %vmull.i.i.i, %a - ret <8 x i16> %add.i.i -} - -define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlal_high_u16: -; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %add.i.i = add <4 x i32> %vmull2.i.i.i, %a - ret <4 x i32> %add.i.i -} - -define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlal_high_u32: -; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %add.i.i = add <2 x i64> %vmull2.i.i.i, %a - ret <2 x i64> %add.i.i -} - -define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlsl_s8: -; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) - %sub.i = sub <8 x i16> %a, %vmull.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlsl_s16: -; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) - %sub.i = sub <4 x i32> %a, %vmull2.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlsl_s32: -; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) - %sub.i = sub <2 x i64> %a, %vmull2.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vmlsl_u8: -; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) - %sub.i = sub <8 x i16> %a, %vmull.i.i - ret <8 x i16> %sub.i -} - -define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vmlsl_u16: -; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) - %sub.i = sub <4 x i32> %a, %vmull2.i.i - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vmlsl_u32: -; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) - %sub.i = sub <2 x i64> %a, %vmull2.i.i - ret <2 x i64> %sub.i -} - -define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlsl_high_s8: -; CHECK: smlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i - ret <8 x i16> %sub.i.i -} - -define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlsl_high_s16: -; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlsl_high_s32: -; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vmlsl_high_u8: -; CHECK: umlsl2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i - ret <8 x i16> %sub.i.i -} - -define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vmlsl_high_u16: -; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i - ret <4 x i32> %sub.i.i -} - -define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vmlsl_high_u32: -; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i - ret <2 x i64> %sub.i.i -} - -define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vqdmull_s16: -; CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) - ret <4 x i32> %vqdmull2.i -} - -define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vqdmull_s32: -; CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) - ret <2 x i64> %vqdmull2.i -} - -define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vqdmlal_s16: -; CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) - %vqdmlal4.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) - ret <4 x i32> %vqdmlal4.i -} - -define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vqdmlal_s32: -; CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) - %vqdmlal4.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) - ret <2 x i64> %vqdmlal4.i -} - -define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { -; CHECK: test_vqdmlsl_s16: -; CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -entry: - %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) - %vqdmlsl4.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) - ret <4 x i32> %vqdmlsl4.i -} - -define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { -; CHECK: test_vqdmlsl_s32: -; CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) - %vqdmlsl4.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) - ret <2 x i64> %vqdmlsl4.i -} - -define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vqdmull_high_s16: -; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vqdmull2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - ret <4 x i32> %vqdmull2.i.i -} - -define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vqdmull_high_s32: -; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vqdmull2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - ret <2 x i64> %vqdmull2.i.i -} - -define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vqdmlal_high_s16: -; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vqdmlal2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vqdmlal4.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) - ret <4 x i32> %vqdmlal4.i.i -} - -define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vqdmlal_high_s32: -; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vqdmlal2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vqdmlal4.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) - ret <2 x i64> %vqdmlal4.i.i -} - -define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { -; CHECK: test_vqdmlsl_high_s16: -; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h -entry: - %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vqdmlsl2.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) - %vqdmlsl4.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) - ret <4 x i32> %vqdmlsl4.i.i -} - -define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { -; CHECK: test_vqdmlsl_high_s32: -; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %vqdmlsl2.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) - %vqdmlsl4.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) - ret <2 x i64> %vqdmlsl4.i.i -} - -define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vmull_p8: -; CHECK: pmull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) - ret <8 x i16> %vmull.i -} - -define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vmull_high_p8: -; CHECK: pmull2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vmull.i.i = tail call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) - ret <8 x i16> %vmull.i.i -} - -define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { -; CHECK: test_vmull_p64 -; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d -entry: - %vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1 - %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 - ret i128 %vmull3.i -} - -define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { -; CHECK: test_vmull_high_p64 -; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -entry: - %0 = extractelement <2 x i64> %a, i32 1 - %1 = extractelement <2 x i64> %b, i32 1 - %vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0 - %vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0 - %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1 - %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 - ret i128 %vmull3.i.i -} - -declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5 - - diff --git a/test/CodeGen/AArch64/neon-aba-abd.ll b/test/CodeGen/AArch64/neon-aba-abd.ll deleted file mode 100644 index 1fe52565af..0000000000 --- a/test/CodeGen/AArch64/neon-aba-abd.ll +++ /dev/null @@ -1,237 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has copied test in its own directory (different intrinsic names). - -declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uabd_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uabd v0.8b, v0.8b, v1.8b - ret <8 x i8> %abd -} - -define <8 x i8> @test_uaba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uaba_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) - %aba = add <8 x i8> %lhs, %abd -; CHECK: uaba v0.8b, v0.8b, v1.8b - ret <8 x i8> %aba -} - -define <8 x i8> @test_sabd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sabd_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sabd v0.8b, v0.8b, v1.8b - ret <8 x i8> %abd -} - -define <8 x i8> @test_saba_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_saba_v8i8: - %abd = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) - %aba = add <8 x i8> %lhs, %abd -; CHECK: saba v0.8b, v0.8b, v1.8b - ret <8 x i8> %aba -} - -declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uabd_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uabd v0.16b, v0.16b, v1.16b - ret <16 x i8> %abd -} - -define <16 x i8> @test_uaba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uaba_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) - %aba = add <16 x i8> %lhs, %abd -; CHECK: uaba v0.16b, v0.16b, v1.16b - ret <16 x i8> %aba -} - -define <16 x i8> @test_sabd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sabd_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sabd v0.16b, v0.16b, v1.16b - ret <16 x i8> %abd -} - -define <16 x i8> @test_saba_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_saba_v16i8: - %abd = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) - %aba = add <16 x i8> %lhs, %abd -; CHECK: saba v0.16b, v0.16b, v1.16b - ret <16 x i8> %aba -} - -declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uabd_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uabd v0.4h, v0.4h, v1.4h - ret <4 x i16> %abd -} - -define <4 x i16> @test_uaba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uaba_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) - %aba = add <4 x i16> %lhs, %abd -; CHECK: uaba v0.4h, v0.4h, v1.4h - ret <4 x i16> %aba -} - -define <4 x i16> @test_sabd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sabd_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sabd v0.4h, v0.4h, v1.4h - ret <4 x i16> %abd -} - -define <4 x i16> @test_saba_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_saba_v4i16: - %abd = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) - %aba = add <4 x i16> %lhs, %abd -; CHECK: saba v0.4h, v0.4h, v1.4h - ret <4 x i16> %aba -} - -declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uabd_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uabd v0.8h, v0.8h, v1.8h - ret <8 x i16> %abd -} - -define <8 x i16> @test_uaba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uaba_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) - %aba = add <8 x i16> %lhs, %abd -; CHECK: uaba v0.8h, v0.8h, v1.8h - ret <8 x i16> %aba -} - -define <8 x i16> @test_sabd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sabd_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sabd v0.8h, v0.8h, v1.8h - ret <8 x i16> %abd -} - -define <8 x i16> @test_saba_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_saba_v8i16: - %abd = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) - %aba = add <8 x i16> %lhs, %abd -; CHECK: saba v0.8h, v0.8h, v1.8h - ret <8 x i16> %aba -} - -declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uabd_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uabd v0.2s, v0.2s, v1.2s - ret <2 x i32> %abd -} - -define <2 x i32> @test_uaba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uaba_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) - %aba = add <2 x i32> %lhs, %abd -; CHECK: uaba v0.2s, v0.2s, v1.2s - ret <2 x i32> %aba -} - -define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sabd_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sabd v0.2s, v0.2s, v1.2s - ret <2 x i32> %abd -} - -define <2 x i32> @test_sabd_v2i32_const() { -; CHECK: test_sabd_v2i32_const: -; CHECK: movi d1, #0xffffffff0000 -; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s - %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32( - <2 x i32> <i32 -2147483648, i32 2147450880>, - <2 x i32> <i32 -65536, i32 65535>) - ret <2 x i32> %1 -} - -define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_saba_v2i32: - %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) - %aba = add <2 x i32> %lhs, %abd -; CHECK: saba v0.2s, v0.2s, v1.2s - ret <2 x i32> %aba -} - -declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uabd_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uabd v0.4s, v0.4s, v1.4s - ret <4 x i32> %abd -} - -define <4 x i32> @test_uaba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uaba_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) - %aba = add <4 x i32> %lhs, %abd -; CHECK: uaba v0.4s, v0.4s, v1.4s - ret <4 x i32> %aba -} - -define <4 x i32> @test_sabd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sabd_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sabd v0.4s, v0.4s, v1.4s - ret <4 x i32> %abd -} - -define <4 x i32> @test_saba_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_saba_v4i32: - %abd = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) - %aba = add <4 x i32> %lhs, %abd -; CHECK: saba v0.4s, v0.4s, v1.4s - ret <4 x i32> %aba -} - -declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) - -define <2 x float> @test_fabd_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fabd_v2f32: - %abd = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fabd v0.2s, v0.2s, v1.2s - ret <2 x float> %abd -} - -declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) - -define <4 x float> @test_fabd_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fabd_v4f32: - %abd = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fabd v0.4s, v0.4s, v1.4s - ret <4 x float> %abd -} - -declare <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double>, <2 x double>) - -define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fabd_v2f64: - %abd = call <2 x double> @llvm.arm.neon.vabds.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fabd v0.2d, v0.2d, v1.2d - ret <2 x double> %abd -} diff --git a/test/CodeGen/AArch64/neon-across.ll b/test/CodeGen/AArch64/neon-across.ll deleted file mode 100644 index 98444d29a0..0000000000 --- a/test/CodeGen/AArch64/neon-across.ll +++ /dev/null @@ -1,473 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has copied test in its own directory. - -declare float @llvm.aarch64.neon.vminnmv(<4 x float>) - -declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>) - -declare float @llvm.aarch64.neon.vminv(<4 x float>) - -declare float @llvm.aarch64.neon.vmaxv(<4 x float>) - -declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32>) - -declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8>) - -declare <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16>) - -declare <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8>) - -declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32>) - -declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8>) - -declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32>) - -declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8>) - -declare <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16>) - -declare <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8>) - -define i16 @test_vaddlv_s8(<8 x i8> %a) { -; CHECK: test_vaddlv_s8: -; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i16> %saddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlv_s16(<4 x i16> %a) { -; CHECK: test_vaddlv_s16: -; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i32> %saddlv.i, i32 0 - ret i32 %0 -} - -define i16 @test_vaddlv_u8(<8 x i8> %a) { -; CHECK: test_vaddlv_u8: -; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i16> %uaddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlv_u16(<4 x i16> %a) { -; CHECK: test_vaddlv_u16: -; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i32> %uaddlv.i, i32 0 - ret i32 %0 -} - -define i16 @test_vaddlvq_s8(<16 x i8> %a) { -; CHECK: test_vaddlvq_s8: -; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %saddlv.i = tail call <1 x i16> @llvm.aarch64.neon.saddlv.v1i16.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i16> %saddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlvq_s16(<8 x i16> %a) { -; CHECK: test_vaddlvq_s16: -; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %saddlv.i = tail call <1 x i32> @llvm.aarch64.neon.saddlv.v1i32.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i32> %saddlv.i, i32 0 - ret i32 %0 -} - -define i64 @test_vaddlvq_s32(<4 x i32> %a) { -; CHECK: test_vaddlvq_s32: -; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %saddlv.i = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i64> %saddlv.i, i32 0 - ret i64 %0 -} - -define i16 @test_vaddlvq_u8(<16 x i8> %a) { -; CHECK: test_vaddlvq_u8: -; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %uaddlv.i = tail call <1 x i16> @llvm.aarch64.neon.uaddlv.v1i16.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i16> %uaddlv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddlvq_u16(<8 x i16> %a) { -; CHECK: test_vaddlvq_u16: -; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %uaddlv.i = tail call <1 x i32> @llvm.aarch64.neon.uaddlv.v1i32.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i32> %uaddlv.i, i32 0 - ret i32 %0 -} - -define i64 @test_vaddlvq_u32(<4 x i32> %a) { -; CHECK: test_vaddlvq_u32: -; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %uaddlv.i = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i64> %uaddlv.i, i32 0 - ret i64 %0 -} - -define i8 @test_vmaxv_s8(<8 x i8> %a) { -; CHECK: test_vmaxv_s8: -; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %smaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxv_s16(<4 x i16> %a) { -; CHECK: test_vmaxv_s16: -; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %smaxv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vmaxv_u8(<8 x i8> %a) { -; CHECK: test_vmaxv_u8: -; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %umaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxv_u16(<4 x i16> %a) { -; CHECK: test_vmaxv_u16: -; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %umaxv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vmaxvq_s8(<16 x i8> %a) { -; CHECK: test_vmaxvq_s8: -; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %smaxv.i = tail call <1 x i8> @llvm.aarch64.neon.smaxv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %smaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxvq_s16(<8 x i16> %a) { -; CHECK: test_vmaxvq_s16: -; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %smaxv.i = tail call <1 x i16> @llvm.aarch64.neon.smaxv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %smaxv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vmaxvq_s32(<4 x i32> %a) { -; CHECK: test_vmaxvq_s32: -; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %smaxv.i = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %smaxv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vmaxvq_u8(<16 x i8> %a) { -; CHECK: test_vmaxvq_u8: -; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %umaxv.i = tail call <1 x i8> @llvm.aarch64.neon.umaxv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %umaxv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vmaxvq_u16(<8 x i16> %a) { -; CHECK: test_vmaxvq_u16: -; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %umaxv.i = tail call <1 x i16> @llvm.aarch64.neon.umaxv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %umaxv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vmaxvq_u32(<4 x i32> %a) { -; CHECK: test_vmaxvq_u32: -; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %umaxv.i = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %umaxv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vminv_s8(<8 x i8> %a) { -; CHECK: test_vminv_s8: -; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %sminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminv_s16(<4 x i16> %a) { -; CHECK: test_vminv_s16: -; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %sminv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vminv_u8(<8 x i8> %a) { -; CHECK: test_vminv_u8: -; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %uminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminv_u16(<4 x i16> %a) { -; CHECK: test_vminv_u16: -; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %uminv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vminvq_s8(<16 x i8> %a) { -; CHECK: test_vminvq_s8: -; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %sminv.i = tail call <1 x i8> @llvm.aarch64.neon.sminv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %sminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminvq_s16(<8 x i16> %a) { -; CHECK: test_vminvq_s16: -; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %sminv.i = tail call <1 x i16> @llvm.aarch64.neon.sminv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %sminv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vminvq_s32(<4 x i32> %a) { -; CHECK: test_vminvq_s32: -; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sminv.i = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %sminv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vminvq_u8(<16 x i8> %a) { -; CHECK: test_vminvq_u8: -; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %uminv.i = tail call <1 x i8> @llvm.aarch64.neon.uminv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %uminv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vminvq_u16(<8 x i16> %a) { -; CHECK: test_vminvq_u16: -; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %uminv.i = tail call <1 x i16> @llvm.aarch64.neon.uminv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %uminv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vminvq_u32(<4 x i32> %a) { -; CHECK: test_vminvq_u32: -; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %uminv.i = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %uminv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vaddv_s8(<8 x i8> %a) { -; CHECK: test_vaddv_s8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddv_s16(<4 x i16> %a) { -; CHECK: test_vaddv_s16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vaddv_u8(<8 x i8> %a) { -; CHECK: test_vaddv_u8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v8i8(<8 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddv_u16(<4 x i16> %a) { -; CHECK: test_vaddv_u16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v4i16(<4 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i8 @test_vaddvq_s8(<16 x i8> %a) { -; CHECK: test_vaddvq_s8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddvq_s16(<8 x i16> %a) { -; CHECK: test_vaddvq_s16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddvq_s32(<4 x i32> %a) { -; CHECK: test_vaddvq_s32: -; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %vaddv.i, i32 0 - ret i32 %0 -} - -define i8 @test_vaddvq_u8(<16 x i8> %a) { -; CHECK: test_vaddvq_u8: -; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b -entry: - %vaddv.i = tail call <1 x i8> @llvm.aarch64.neon.vaddv.v1i8.v16i8(<16 x i8> %a) - %0 = extractelement <1 x i8> %vaddv.i, i32 0 - ret i8 %0 -} - -define i16 @test_vaddvq_u16(<8 x i16> %a) { -; CHECK: test_vaddvq_u16: -; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h -entry: - %vaddv.i = tail call <1 x i16> @llvm.aarch64.neon.vaddv.v1i16.v8i16(<8 x i16> %a) - %0 = extractelement <1 x i16> %vaddv.i, i32 0 - ret i16 %0 -} - -define i32 @test_vaddvq_u32(<4 x i32> %a) { -; CHECK: test_vaddvq_u32: -; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %vaddv.i = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32> %a) - %0 = extractelement <1 x i32> %vaddv.i, i32 0 - ret i32 %0 -} - -define float @test_vmaxvq_f32(<4 x float> %a) { -; CHECK: test_vmaxvq_f32: -; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a) - ret float %0 -} - -define float @test_vminvq_f32(<4 x float> %a) { -; CHECK: test_vminvq_f32: -; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a) - ret float %0 -} - -define float @test_vmaxnmvq_f32(<4 x float> %a) { -; CHECK: test_vmaxnmvq_f32: -; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a) - ret float %0 -} - -define float @test_vminnmvq_f32(<4 x float> %a) { -; CHECK: test_vminnmvq_f32: -; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s -entry: - %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a) - ret float %0 -} - diff --git a/test/CodeGen/AArch64/neon-add-pairwise.ll b/test/CodeGen/AArch64/neon-add-pairwise.ll deleted file mode 100644 index d304094adb..0000000000 --- a/test/CodeGen/AArch64/neon-add-pairwise.ll +++ /dev/null @@ -1,102 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has a copy of this test in its own directory. - -declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_addp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_addp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: addp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_addp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_addp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpadd.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: addp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_addp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_addp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: addp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_addp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_addp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpadd.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: addp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_addp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_addp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: addp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_addp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_addp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpadd.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: addp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - -declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_addp_v2i64: - %val = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: addp v0.2d, v0.2d, v1.2d - ret <2 x i64> %val -} - -declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_faddp_v2f32: - %val = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: faddp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_faddp_v4f32: - %val = call <4 x float> @llvm.arm.neon.vpadd.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: faddp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_faddp_v2f64: - %val = call <2 x double> @llvm.arm.neon.vpadd.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: faddp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -define i32 @test_vaddv.v2i32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddv.v2i32 -; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-add-sub.ll b/test/CodeGen/AArch64/neon-add-sub.ll deleted file mode 100644 index eebad4df10..0000000000 --- a/test/CodeGen/AArch64/neon-add-sub.ll +++ /dev/null @@ -1,280 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy of this test - -define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = add <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = add <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = add <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = add <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = add <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = add <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = add <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fadd <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fadd <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fadd <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = sub <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = sub <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = sub <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = sub <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = sub <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = sub <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = sub <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fsub <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fsub <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fsub <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vadd_f64 -; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fadd <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmul_f64 -; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fmul <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vdiv_f64 -; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fdiv <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vmla_f64 -; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fmul <1 x double> %b, %c - %2 = fadd <1 x double> %1, %a - ret <1 x double> %2 -} - -define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vmls_f64 -; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fmul <1 x double> %b, %c - %2 = fsub <1 x double> %a, %1 - ret <1 x double> %2 -} - -define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vfms_f64 -; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fsub <1 x double> <double -0.000000e+00>, %b - %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a) - ret <1 x double> %2 -} - -define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { -; CHECK-LABEL: test_vfma_f64 -; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vsub_f64 -; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fsub <1 x double> %a, %b - ret <1 x double> %1 -} - -define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vabd_f64 -; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmax_f64 -; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmin_f64 -; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vmaxnm_f64 -; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vminnm_f64 -; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vabs_f64(<1 x double> %a) { -; CHECK-LABEL: test_vabs_f64 -; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vneg_f64(<1 x double> %a) { -; CHECK-LABEL: test_vneg_f64 -; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}} - %1 = fsub <1 x double> <double -0.000000e+00>, %a - ret <1 x double> %1 -} - -declare <1 x double> @llvm.fabs.v1f64(<1 x double>) -declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) - -define <1 x i8> @test_add_v1i8(<1 x i8> %a, <1 x i8> %b) { -;CHECK-LABEL: test_add_v1i8: -;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %c = add <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @test_add_v1i16(<1 x i16> %a, <1 x i16> %b) { -;CHECK-LABEL: test_add_v1i16: -;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %c = add <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @test_add_v1i32(<1 x i32> %a, <1 x i32> %b) { -;CHECK-LABEL: test_add_v1i32: -;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %c = add <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @test_sub_v1i8(<1 x i8> %a, <1 x i8> %b) { -;CHECK-LABEL: test_sub_v1i8: -;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %c = sub <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @test_sub_v1i16(<1 x i16> %a, <1 x i16> %b) { -;CHECK-LABEL: test_sub_v1i16: -;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %c = sub <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @test_sub_v1i32(<1 x i32> %a, <1 x i32> %b) { -;CHECK-LABEL: test_sub_v1i32: -;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %c = sub <1 x i32> %a, %b - ret <1 x i32> %c -} diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll index 25819b3793..b70cda3175 100644 --- a/test/CodeGen/AArch64/neon-bitcast.ll +++ b/test/CodeGen/AArch64/neon-bitcast.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; From <8 x i8> diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 228a6bfdf5..dfaf1f2517 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) { diff --git a/test/CodeGen/AArch64/neon-bsl.ll b/test/CodeGen/AArch64/neon-bsl.ll deleted file mode 100644 index 3182b700d8..0000000000 --- a/test/CodeGen/AArch64/neon-bsl.ll +++ /dev/null @@ -1,237 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has no equivalent vbsl intrinsic, always using the and/or IR. The final -; two tests are duplicated by ARM64's vselect.ll test. - -declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>) - -declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) - -declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - -declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>) - -declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) - -declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) - -declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) - -declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>) - -declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>) - -declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) - -declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) - -define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: test_vbsl_s8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -; CHECK-LABEL: test_vbsl_s16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) - %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8> - ret <8 x i8> %0 -} - -define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: test_vbsl_s32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) - ret <2 x i32> %vbsl3.i -} - -define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_vbsl_s64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) - ret <1 x i64> %vbsl3.i -} - -define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: test_vbsl_u8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) - ret <8 x i8> %vbsl.i -} - -define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -; CHECK-LABEL: test_vbsl_u16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) - ret <4 x i16> %vbsl3.i -} - -define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -; CHECK-LABEL: test_vbsl_u32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) - ret <2 x i32> %vbsl3.i -} - -define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_vbsl_u64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) - ret <1 x i64> %vbsl3.i -} - -define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) { -; CHECK-LABEL: test_vbsl_f32: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) - ret <2 x float> %vbsl3.i -} - -define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) { -; CHECK-LABEL: test_vbsl_f64: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = bitcast <1 x i64> %v1 to <1 x double> - %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3) - ret <1 x double> %vbsl3.i -} - -define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -; CHECK-LABEL: test_vbsl_p8: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) - ret <8 x i8> %vbsl.i -} - -define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -; CHECK-LABEL: test_vbsl_p16: -; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -entry: - %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) - ret <4 x i16> %vbsl3.i -} - -define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -; CHECK-LABEL: test_vbslq_s8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) - ret <16 x i8> %vbsl.i -} - -define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -; CHECK-LABEL: test_vbslq_s16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) - ret <8 x i16> %vbsl3.i -} - -define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -; CHECK-LABEL: test_vbslq_s32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) - ret <4 x i32> %vbsl3.i -} - -define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { -; CHECK-LABEL: test_vbslq_s64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) - ret <2 x i64> %vbsl3.i -} - -define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -; CHECK-LABEL: test_vbslq_u8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) - ret <16 x i8> %vbsl.i -} - -define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -; CHECK-LABEL: test_vbslq_u16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) - ret <8 x i16> %vbsl3.i -} - -define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -; CHECK-LABEL: test_vbslq_u32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) - ret <4 x i32> %vbsl3.i -} - -define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { -; CHECK-LABEL: test_vbslq_u64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) - ret <2 x i64> %vbsl3.i -} - -define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) { -; CHECK-LABEL: test_vbslq_f32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = bitcast <4 x i32> %v1 to <4 x float> - %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3) - ret <4 x float> %vbsl3.i -} - -define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -; CHECK-LABEL: test_vbslq_p8: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) - ret <16 x i8> %vbsl.i -} - -define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -; CHECK-LABEL: test_vbslq_p16: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) - ret <8 x i16> %vbsl3.i -} - -define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) { -; CHECK-LABEL: test_vbslq_f64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %vbsl.i = bitcast <2 x i64> %v1 to <2 x double> - %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3) - ret <2 x double> %vbsl3.i -} - -define <2 x double> @test_bsl_v2f64(<2 x i1> %v1, <2 x double> %v2, <2 x double> %v3) { -; CHECK-LABEL: test_bsl_v2f64: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %1 = select <2 x i1> %v1, <2 x double> %v2, <2 x double> %v3 - ret <2 x double> %1 -} - -define <4 x float> @test_bsl_v4f32(<4 x i1> %v1, <4 x float> %v2, <4 x float> %v3) { -; CHECK-LABEL: test_bsl_v4f32: -; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %1 = select <4 x i1> %v1, <4 x float> %v2, <4 x float> %v3 - ret <4 x float> %1 -} diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll index e029cfcf33..b99057ebf2 100644 --- a/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) { diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll deleted file mode 100644 index 096018ab88..0000000000 --- a/test/CodeGen/AArch64/neon-copy.ll +++ /dev/null @@ -1,1402 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has copied equivalent test due to intrinsics. - -define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} - %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 - ret <16 x i8> %tmp3 -} - -define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} - %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 - ret <8 x i16> %tmp3 -} - -define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} - %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 - ret <4 x i32> %tmp3 -} - -define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} - %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 - ret <2 x i64> %tmp3 -} - -define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} - %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 - ret <8 x i8> %tmp3 -} - -define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} - %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 - ret <4 x i16> %tmp3 -} - -define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} - %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 - ret <2 x i32> %tmp3 -} - -define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <16 x i8> %tmp1, i32 2 - %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 - ret <16 x i8> %tmp4 -} - -define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 - ret <8 x i16> %tmp4 -} - -define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 - ret <4 x i32> %tmp4 -} - -define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x i64> %tmp1, i32 0 - %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 - ret <2 x i64> %tmp4 -} - -define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x float> %tmp1, i32 2 - %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 - ret <4 x float> %tmp4 -} - -define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x double> %tmp1, i32 0 - %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 - ret <2 x double> %tmp4 -} - -define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <8 x i8> %tmp1, i32 2 - %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 - ret <16 x i8> %tmp4 -} - -define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 - ret <8 x i16> %tmp4 -} - -define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x i32> %tmp1, i32 1 - %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 - ret <4 x i32> %tmp4 -} - -define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x i64> %tmp1, i32 0 - %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 - ret <2 x i64> %tmp4 -} - -define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x float> %tmp1, i32 1 - %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 - ret <4 x float> %tmp4 -} - -define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x double> %tmp1, i32 0 - %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 - ret <2 x double> %tmp4 -} - -define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <16 x i8> %tmp1, i32 2 - %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 - ret <8 x i8> %tmp4 -} - -define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 - ret <4 x i16> %tmp4 -} - -define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 - ret <2 x i32> %tmp4 -} - -define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x i64> %tmp1, i32 0 - %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 - ret <1 x i64> %tmp4 -} - -define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x float> %tmp1, i32 2 - %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 - ret <2 x float> %tmp4 -} - -define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x double> %tmp1, i32 0 - %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 - ret <1 x double> %tmp4 -} - -define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] - %tmp3 = extractelement <8 x i8> %tmp1, i32 2 - %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 - ret <8 x i8> %tmp4 -} - -define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 - ret <4 x i16> %tmp4 -} - -define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] - %tmp3 = extractelement <2 x i32> %tmp1, i32 0 - %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 - ret <2 x i32> %tmp4 -} - -define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x i64> %tmp1, i32 0 - %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 - ret <1 x i64> %tmp4 -} - -define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] - %tmp3 = extractelement <2 x float> %tmp1, i32 0 - %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 - ret <2 x float> %tmp4 -} - -define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] - %tmp3 = extractelement <1 x double> %tmp1, i32 0 - %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 - ret <1 x double> %tmp4 -} - -define i32 @umovw16b(<16 x i8> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] - %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = zext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw8h(<8 x i16> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = zext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw4s(<4 x i32> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - ret i32 %tmp3 -} - -define i64 @umovx2d(<2 x i64> %tmp1) { -;CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp3 = extractelement <2 x i64> %tmp1, i32 0 - ret i64 %tmp3 -} - -define i32 @umovw8b(<8 x i8> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] - %tmp3 = extractelement <8 x i8> %tmp1, i32 7 - %tmp4 = zext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw4h(<4 x i16> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = zext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @umovw2s(<2 x i32> %tmp1) { -;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x i32> %tmp1, i32 1 - ret i32 %tmp3 -} - -define i64 @umovx1d(<1 x i64> %tmp1) { -;CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - %tmp3 = extractelement <1 x i64> %tmp1, i32 0 - ret i64 %tmp3 -} - -define i32 @smovw16b(<16 x i8> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] - %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovw8h(<8 x i16> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovx16b(<16 x i8> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] - %tmp3 = extractelement <16 x i8> %tmp1, i32 8 - %tmp4 = sext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @smovx8h(<8 x i16> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <8 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i64 @smovx4s(<4 x i32> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] - %tmp3 = extractelement <4 x i32> %tmp1, i32 2 - %tmp4 = sext i32 %tmp3 to i64 - ret i64 %tmp4 -} - -define i32 @smovw8b(<8 x i8> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] - %tmp3 = extractelement <8 x i8> %tmp1, i32 4 - %tmp4 = sext i8 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovw4h(<4 x i16> %tmp1) { -;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - %tmp5 = add i32 5, %tmp4 - ret i32 %tmp5 -} - -define i32 @smovx8b(<8 x i8> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[6] - %tmp3 = extractelement <8 x i8> %tmp1, i32 6 - %tmp4 = sext i8 %tmp3 to i32 - ret i32 %tmp4 -} - -define i32 @smovx4h(<4 x i16> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] - %tmp3 = extractelement <4 x i16> %tmp1, i32 2 - %tmp4 = sext i16 %tmp3 to i32 - ret i32 %tmp4 -} - -define i64 @smovx2s(<2 x i32> %tmp1) { -;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp3 = extractelement <2 x i32> %tmp1, i32 1 - %tmp4 = sext i32 %tmp3 to i64 - ret i64 %tmp4 -} - -define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] - %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> - ret <8 x i8> %vset_lane -} - -define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6] - %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> - ret <16 x i8> %vset_lane -} - -define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0] - %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> - ret <8 x i8> %vset_lane -} - -define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { -;CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15] - %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> - ret <16 x i8> %vset_lane -} - -define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} - %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 - %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 - %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 - %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 - %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 - %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 - %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 - %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 - ret <8 x i8> %vecinit7.i -} - -define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} - %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} - %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { -;CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 - ret <1 x i64> %vecinit.i -} - -define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} - %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 - %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 - %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 - %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 - %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 - %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 - %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 - %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 - %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 - %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 - %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 - %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 - %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 - %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 - %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 - %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 - ret <16 x i8> %vecinit15.i -} - -define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} - %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 - %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 - %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 - %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 - %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 - %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 - %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 - %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 - ret <8 x i16> %vecinit7.i -} - -define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} - %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 - %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 - %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 - %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 - ret <4 x i32> %vecinit3.i -} - -define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} - %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 - %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 - ret <2 x i64> %vecinit1.i -} - -define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> - ret <8 x i8> %shuffle -} - -define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> - ret <4 x i16> %shuffle -} - -define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> - ret <2 x i32> %shuffle -} - -define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { -;CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> - ret <16 x i8> %shuffle -} - -define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { -;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> - ret <8 x i16> %shuffle -} - -define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { -;CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - ret <4 x i32> %shuffle -} - -define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { -;CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] - %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer - ret <2 x i64> %shuffle -} - -define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> - ret <8 x i8> %shuffle -} - -define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> - ret <4 x i16> %shuffle -} - -define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> - ret <2 x i32> %shuffle -} - -define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] - %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> - ret <16 x i8> %shuffle -} - -define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { -;CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] - %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> - ret <8 x i16> %shuffle -} - -define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] - %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - ret <4 x i32> %shuffle -} - -define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { -;CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] - %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer - ret <2 x i64> %shuffle -} - -define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { -; CHECK-LABEL: test_bitcastv8i8toi64: - %res = bitcast <8 x i8> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { -; CHECK-LABEL: test_bitcastv4i16toi64: - %res = bitcast <4 x i16> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { -; CHECK-LABEL: test_bitcastv2i32toi64: - %res = bitcast <2 x i32> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv2f32toi64(<2 x float> %in) { -; CHECK-LABEL: test_bitcastv2f32toi64: - %res = bitcast <2 x float> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { -; CHECK-LABEL: test_bitcastv1i64toi64: - %res = bitcast <1 x i64> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define i64 @test_bitcastv1f64toi64(<1 x double> %in) { -; CHECK-LABEL: test_bitcastv1f64toi64: - %res = bitcast <1 x double> %in to i64 -; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} - ret i64 %res -} - -define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov8i8: - %res = bitcast i64 %in to <8 x i8> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <8 x i8> %res -} - -define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov4i16: - %res = bitcast i64 %in to <4 x i16> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <4 x i16> %res -} - -define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov2i32: - %res = bitcast i64 %in to <2 x i32> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <2 x i32> %res -} - -define <2 x float> @test_bitcasti64tov2f32(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov2f32: - %res = bitcast i64 %in to <2 x float> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <2 x float> %res -} - -define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov1i64: - %res = bitcast i64 %in to <1 x i64> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <1 x i64> %res -} - -define <1 x double> @test_bitcasti64tov1f64(i64 %in) { -; CHECK-LABEL: test_bitcasti64tov1f64: - %res = bitcast i64 %in to <1 x double> -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} - ret <1 x double> %res -} - -define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { -; CHECK-LABEL: test_bitcastv8i8tov1f64: -; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <8 x i8> zeroinitializer, %a - %1 = bitcast <8 x i8> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { -; CHECK-LABEL: test_bitcastv4i16tov1f64: -; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <4 x i16> zeroinitializer, %a - %1 = bitcast <4 x i16> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { -; CHECK-LABEL: test_bitcastv2i32tov1f64: -; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <2 x i32> zeroinitializer, %a - %1 = bitcast <2 x i32> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1i64tov1f64: -; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = sub <1 x i64> zeroinitializer, %a - %1 = bitcast <1 x i64> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { -; CHECK-LABEL: test_bitcastv2f32tov1f64: -; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -; CHECK-NEXT: fcvtzs {{d[0-9]+}}, {{d[0-9]+}} - %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a - %1 = bitcast <2 x float> %sub.i to <1 x double> - %vcvt.i = fptosi <1 x double> %1 to <1 x i64> - ret <1 x i64> %vcvt.i -} - -define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov8i8: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <8 x i8> - %sub.i = sub <8 x i8> zeroinitializer, %1 - ret <8 x i8> %sub.i -} - -define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov4i16: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <4 x i16> - %sub.i = sub <4 x i16> zeroinitializer, %1 - ret <4 x i16> %sub.i -} - -define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov2i32: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <2 x i32> - %sub.i = sub <2 x i32> zeroinitializer, %1 - ret <2 x i32> %sub.i -} - -define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov1i64: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}} - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <1 x i64> - %sub.i = sub <1 x i64> zeroinitializer, %1 - ret <1 x i64> %sub.i -} - -define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { -; CHECK-LABEL: test_bitcastv1f64tov2f32: -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %vcvt.i = sitofp <1 x i64> %a to <1 x double> - %1 = bitcast <1 x double> %vcvt.i to <2 x float> - %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 - ret <2 x float> %sub.i -} - -; Test insert element into an undef vector -define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { -; CHECK-LABEL: scalar_to_vector.v8i8: -; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} - %b = insertelement <8 x i8> undef, i8 %a, i32 0 - ret <8 x i8> %b -} - -define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { -; CHECK-LABEL: scalar_to_vector.v16i8: -; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} - %b = insertelement <16 x i8> undef, i8 %a, i32 0 - ret <16 x i8> %b -} - -define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { -; CHECK-LABEL: scalar_to_vector.v4i16: -; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} - %b = insertelement <4 x i16> undef, i16 %a, i32 0 - ret <4 x i16> %b -} - -define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { -; CHECK-LABEL: scalar_to_vector.v8i16: -; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} - %b = insertelement <8 x i16> undef, i16 %a, i32 0 - ret <8 x i16> %b -} - -define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { -; CHECK-LABEL: scalar_to_vector.v2i32: -; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} - %b = insertelement <2 x i32> undef, i32 %a, i32 0 - ret <2 x i32> %b -} - -define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { -; CHECK-LABEL: scalar_to_vector.v4i32: -; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} - %b = insertelement <4 x i32> undef, i32 %a, i32 0 - ret <4 x i32> %b -} - -define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { -; CHECK-LABEL: scalar_to_vector.v2i64: -; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} - %b = insertelement <2 x i64> undef, i64 %a, i32 0 - ret <2 x i64> %b -} - -define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { -; CHECK-LABEL: testDUP.v1i8: -; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} - %b = extractelement <1 x i8> %a, i32 0 - %c = insertelement <8 x i8> undef, i8 %b, i32 0 - %d = insertelement <8 x i8> %c, i8 %b, i32 1 - %e = insertelement <8 x i8> %d, i8 %b, i32 2 - %f = insertelement <8 x i8> %e, i8 %b, i32 3 - %g = insertelement <8 x i8> %f, i8 %b, i32 4 - %h = insertelement <8 x i8> %g, i8 %b, i32 5 - %i = insertelement <8 x i8> %h, i8 %b, i32 6 - %j = insertelement <8 x i8> %i, i8 %b, i32 7 - ret <8 x i8> %j -} - -define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { -; CHECK-LABEL: testDUP.v1i16: -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} - %b = extractelement <1 x i16> %a, i32 0 - %c = insertelement <8 x i16> undef, i16 %b, i32 0 - %d = insertelement <8 x i16> %c, i16 %b, i32 1 - %e = insertelement <8 x i16> %d, i16 %b, i32 2 - %f = insertelement <8 x i16> %e, i16 %b, i32 3 - %g = insertelement <8 x i16> %f, i16 %b, i32 4 - %h = insertelement <8 x i16> %g, i16 %b, i32 5 - %i = insertelement <8 x i16> %h, i16 %b, i32 6 - %j = insertelement <8 x i16> %i, i16 %b, i32 7 - ret <8 x i16> %j -} - -define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { -; CHECK-LABEL: testDUP.v1i32: -; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} - %b = extractelement <1 x i32> %a, i32 0 - %c = insertelement <4 x i32> undef, i32 %b, i32 0 - %d = insertelement <4 x i32> %c, i32 %b, i32 1 - %e = insertelement <4 x i32> %d, i32 %b, i32 2 - %f = insertelement <4 x i32> %e, i32 %b, i32 3 - ret <4 x i32> %f -} - -define <8 x i8> @getl(<16 x i8> %x) #0 { -; CHECK-LABEL: getl: -; CHECK: ret - %vecext = extractelement <16 x i8> %x, i32 0 - %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <16 x i8> %x, i32 1 - %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <16 x i8> %x, i32 2 - %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <16 x i8> %x, i32 3 - %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <16 x i8> %x, i32 4 - %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <16 x i8> %x, i32 5 - %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <16 x i8> %x, i32 6 - %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <16 x i8> %x, i32 7 - %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 - ret <8 x i8> %vecinit14 -} - -define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { -; CHECK-LABEL: test_dup_v2i32_v4i16: -; CHECK: dup v0.4h, v0.h[2] -entry: - %x = extractelement <2 x i32> %a, i32 1 - %vget_lane = trunc i32 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { -; CHECK-LABEL: test_dup_v4i32_v8i16: -; CHECK: dup v0.8h, v0.h[6] -entry: - %x = extractelement <4 x i32> %a, i32 3 - %vget_lane = trunc i32 %x to i16 - %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 - %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 - %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 - %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 - ret <8 x i16> %vecinit7.i -} - -define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { -; CHECK-LABEL: test_dup_v1i64_v4i16: -; CHECK: dup v0.4h, v0.h[0] -entry: - %x = extractelement <1 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { -; CHECK-LABEL: test_dup_v1i64_v2i32: -; CHECK: dup v0.2s, v0.s[0] -entry: - %x = extractelement <1 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i32 - %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v8i16: -; CHECK: dup v0.8h, v0.h[4] -entry: - %x = extractelement <2 x i64> %a, i32 1 - %vget_lane = trunc i64 %x to i16 - %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 - %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 - %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 - %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 - ret <8 x i16> %vecinit7.i -} - -define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v4i32: -; CHECK: dup v0.4s, v0.s[2] -entry: - %x = extractelement <2 x i64> %a, i32 1 - %vget_lane = trunc i64 %x to i32 - %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 - ret <4 x i32> %vecinit3.i -} - -define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { -; CHECK-LABEL: test_dup_v4i32_v4i16: -; CHECK: dup v0.4h, v0.h[2] -entry: - %x = extractelement <4 x i32> %a, i32 1 - %vget_lane = trunc i32 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v4i16: -; CHECK: dup v0.4h, v0.h[0] -entry: - %x = extractelement <2 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i16 - %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 - %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 - %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 - %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 - ret <4 x i16> %vecinit3.i -} - -define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { -; CHECK-LABEL: test_dup_v2i64_v2i32: -; CHECK: dup v0.2s, v0.s[0] -entry: - %x = extractelement <2 x i64> %a, i32 0 - %vget_lane = trunc i64 %x to i32 - %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 - ret <2 x i32> %vecinit1.i -} - - -define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) { -; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32: -; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s -; CHECK-NEXT: ret -entry: - %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - %1 = insertelement <1 x float> undef, float %0, i32 0 - %2 = extractelement <1 x float> %1, i32 0 - %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0 - ret <2 x float> %vecinit1.i -} - -define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) { -; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32: -; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s -; CHECK-NEXT: ret -entry: - %0 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - %1 = insertelement <1 x float> undef, float %0, i32 0 - %2 = extractelement <1 x float> %1, i32 0 - %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0 - ret <4 x float> %vecinit1.i -} - -declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) - -define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) { -; CHECK-LABEL: test_concat_undef_v1i32: -; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0] -entry: - %0 = extractelement <1 x i32> %a, i32 0 - %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 - ret <2 x i32> %vecinit1.i -} - -declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) #4 - -define <2 x i32> @test_concat_v1i32_undef(<1 x i32> %a) { -; CHECK-LABEL: test_concat_v1i32_undef: -; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} -; CHECK-NEXT: ret -entry: - %b = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) - %0 = extractelement <1 x i32> %b, i32 0 - %vecinit.i432 = insertelement <2 x i32> undef, i32 %0, i32 0 - ret <2 x i32> %vecinit.i432 -} - -define <2 x i32> @test_concat_same_v1i32_v1i32(<1 x i32> %a) { -; CHECK-LABEL: test_concat_same_v1i32_v1i32: -; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -entry: - %0 = extractelement <1 x i32> %a, i32 0 - %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 - %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 - ret <2 x i32> %vecinit1.i -} - -define <2 x i32> @test_concat_diff_v1i32_v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: test_concat_diff_v1i32_v1i32: -; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}} -; CHECK-NEXT: sqabs s{{[0-9]+}}, s{{[0-9]+}} -; CHECK-NEXT: ins v0.s[1], v1.s[0] -entry: - %c = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %a) - %d = extractelement <1 x i32> %c, i32 0 - %e = tail call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %b) - %f = extractelement <1 x i32> %e, i32 0 - %h = shufflevector <1 x i32> %c, <1 x i32> %e, <2 x i32> <i32 0, i32 1> - ret <2 x i32> %h -} - -define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <8 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <8 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <8 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <8 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <8 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <8 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <8 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <16 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <16 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <16 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <16 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <16 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <16 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <16 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <16 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecext15 = extractelement <8 x i8> %y, i32 0 - %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 - %vecext17 = extractelement <8 x i8> %y, i32 1 - %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 - %vecext19 = extractelement <8 x i8> %y, i32 2 - %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 - %vecext21 = extractelement <8 x i8> %y, i32 3 - %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 - %vecext23 = extractelement <8 x i8> %y, i32 4 - %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 - %vecext25 = extractelement <8 x i8> %y, i32 5 - %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 - %vecext27 = extractelement <8 x i8> %y, i32 6 - %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 - %vecext29 = extractelement <8 x i8> %y, i32 7 - %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 - ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i8> %x, i32 0 - %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 - %vecext1 = extractelement <8 x i8> %x, i32 1 - %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 - %vecext3 = extractelement <8 x i8> %x, i32 2 - %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 - %vecext5 = extractelement <8 x i8> %x, i32 3 - %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 - %vecext7 = extractelement <8 x i8> %x, i32 4 - %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 - %vecext9 = extractelement <8 x i8> %x, i32 5 - %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 - %vecext11 = extractelement <8 x i8> %x, i32 6 - %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 - %vecext13 = extractelement <8 x i8> %x, i32 7 - %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 - %vecext15 = extractelement <8 x i8> %y, i32 0 - %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 - %vecext17 = extractelement <8 x i8> %y, i32 1 - %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 - %vecext19 = extractelement <8 x i8> %y, i32 2 - %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 - %vecext21 = extractelement <8 x i8> %y, i32 3 - %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 - %vecext23 = extractelement <8 x i8> %y, i32 4 - %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 - %vecext25 = extractelement <8 x i8> %y, i32 5 - %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 - %vecext27 = extractelement <8 x i8> %y, i32 6 - %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 - %vecext29 = extractelement <8 x i8> %y, i32 7 - %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 - ret <16 x i8> %vecinit30 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <4 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <4 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <4 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <8 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <8 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <8 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <8 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecext7 = extractelement <4 x i16> %y, i32 0 - %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 - %vecext9 = extractelement <4 x i16> %y, i32 1 - %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 - %vecext11 = extractelement <4 x i16> %y, i32 2 - %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 - %vecext13 = extractelement <4 x i16> %y, i32 3 - %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 - ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 - %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 - %vecext1 = extractelement <4 x i16> %x, i32 1 - %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 - %vecext3 = extractelement <4 x i16> %x, i32 2 - %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 - %vecext5 = extractelement <4 x i16> %x, i32 3 - %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 - %vecext7 = extractelement <4 x i16> %y, i32 0 - %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 - %vecext9 = extractelement <4 x i16> %y, i32 1 - %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 - %vecext11 = extractelement <4 x i16> %y, i32 2 - %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 - %vecext13 = extractelement <4 x i16> %y, i32 3 - %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 - ret <8 x i16> %vecinit14 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <2 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <4 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecext3 = extractelement <2 x i32> %y, i32 0 - %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 - %vecext5 = extractelement <2 x i32> %y, i32 1 - %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 - ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i32> %x, i32 0 - %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 - %vecext1 = extractelement <2 x i32> %x, i32 1 - %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 - %vecext3 = extractelement <2 x i32> %y, i32 0 - %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 - %vecext5 = extractelement <2 x i32> %y, i32 1 - %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 - ret <4 x i32> %vecinit6 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <1 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <2 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecext1 = extractelement <1 x i64> %y, i32 0 - %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 - ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <1 x i64> %x, i32 0 - %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 - %vecext1 = extractelement <1 x i64> %y, i32 0 - %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 - ret <2 x i64> %vecinit2 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) - -; This case tests the copy of two FPR8 registers, which is implemented by fmov -; of two FPR32 registers. -define <1 x i8> @test_copy_FPR8_FPR8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: test_copy_FPR8_FPR8: -; CHECK: usqadd b1, b0 -; CHECK-NEXT: fmov s0, s1 -entry: - %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %b, <1 x i8> %a) - ret <1 x i8> %vsqadd2.i -} - -declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: test_copy_FPR16_FPR16: -; CHECK: usqadd h1, h0 -; CHECK-NEXT: fmov s0, s1 -entry: - %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %b, <1 x i16> %a) - ret <1 x i16> %vsqadd2.i -} - -define <4 x i16> @concat_vector_v4i16_const() { -; CHECK-LABEL: concat_vector_v4i16_const: -; CHECK: dup {{v[0-9]+}}.4h, wzr - %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %r -} - -define <4 x i16> @concat_vector_v4i16_const_one() { -; CHECK-LABEL: concat_vector_v4i16_const_one: -; CHECK: movz {{w[0-9]+}}, #1 -; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} - %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %r -} - -define <4 x i32> @concat_vector_v4i32_const() { -; CHECK-LABEL: concat_vector_v4i32_const: -; CHECK: dup {{v[0-9]+}}.4s, wzr - %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer - ret <4 x i32> %r -} - -define <8 x i8> @concat_vector_v8i8_const() { -; CHECK-LABEL: concat_vector_v8i8_const: -; CHECK: dup {{v[0-9]+}}.8b, wzr - %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer - ret <8 x i8> %r -} - -define <8 x i16> @concat_vector_v8i16_const() { -; CHECK-LABEL: concat_vector_v8i16_const: -; CHECK: dup {{v[0-9]+}}.8h, wzr - %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %r -} - -define <8 x i16> @concat_vector_v8i16_const_one() { -; CHECK-LABEL: concat_vector_v8i16_const_one: -; CHECK: movz {{w[0-9]+}}, #1 -; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} - %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %r -} - -define <16 x i8> @concat_vector_v16i8_const() { -; CHECK-LABEL: concat_vector_v16i8_const: -; CHECK: dup {{v[0-9]+}}.16b, wzr - %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %r -} - -define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { -; CHECK-LABEL: concat_vector_v4i16: -; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] - %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %r -} - -define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { -; CHECK-LABEL: concat_vector_v4i32: -; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] - %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer - ret <4 x i32> %r -} - -define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { -; CHECK-LABEL: concat_vector_v8i8: -; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[0] - %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer - ret <8 x i8> %r -} - -define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { -; CHECK-LABEL: concat_vector_v8i16: -; CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] - %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %r -} - -define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { -; CHECK-LABEL: concat_vector_v16i8: -; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[0] - %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %r -} diff --git a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll deleted file mode 100644 index 1256b2b650..0000000000 --- a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has a separate copy due to intrinsics - -define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) { -; CHECK-LABEL: copyTuple.QPair: -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i32 0, i32 4) - %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 1, i32 4) - %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0 - ret <4 x i32> %vld1.fca.0.extract -} - -define <4 x i32> @copyTuple.QTriple(i8* %a, i8* %b, <4 x i32> %c) { -; CHECK-LABEL: copyTuple.QTriple: -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4) - %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, i32 1, i32 4) - %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 - ret <4 x i32> %vld1.fca.0.extract -} - -define <4 x i32> @copyTuple.QQuad(i8* %a, i8* %b, <4 x i32> %c) { -; CHECK-LABEL: copyTuple.QQuad: -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: mov v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i32 0, i32 4) - %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 - %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %b, <4 x i32> %extract, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i32 1, i32 4) - %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 - ret <4 x i32> %vld1.fca.0.extract -} - -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) diff --git a/test/CodeGen/AArch64/neon-crypto.ll b/test/CodeGen/AArch64/neon-crypto.ll deleted file mode 100644 index 5f1491eb1e..0000000000 --- a/test/CodeGen/AArch64/neon-crypto.ll +++ /dev/null @@ -1,145 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s -; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s -; arm64 has a separate test for this, covering the same features (crypto.ll). N.b. NO-CRYPTO will need porting. - -declare <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha256h(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32>, <4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1m(<4 x i32>, i32, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1p(<4 x i32>, i32, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1c(<4 x i32>, i32, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32>, <4 x i32>) #1 - -declare <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32>, <4 x i32>) #1 - -declare i32 @llvm.arm.neon.sha1h(i32) #1 - -declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) #1 - -declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) #1 - -declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1 - -declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1 - -define <16 x i8> @test_vaeseq_u8(<16 x i8> %data, <16 x i8> %key) { -; CHECK: test_vaeseq_u8: -; CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -; CHECK-NO-CRYPTO: Cannot select: intrinsic %llvm.arm.neon.aese -entry: - %aese.i = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) - ret <16 x i8> %aese.i -} - -define <16 x i8> @test_vaesdq_u8(<16 x i8> %data, <16 x i8> %key) { -; CHECK: test_vaesdq_u8: -; CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %aesd.i = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) - ret <16 x i8> %aesd.i -} - -define <16 x i8> @test_vaesmcq_u8(<16 x i8> %data) { -; CHECK: test_vaesmcq_u8: -; CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %aesmc.i = tail call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %data) - ret <16 x i8> %aesmc.i -} - -define <16 x i8> @test_vaesimcq_u8(<16 x i8> %data) { -; CHECK: test_vaesimcq_u8: -; CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b -entry: - %aesimc.i = tail call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %data) - ret <16 x i8> %aesimc.i -} - -define i32 @test_vsha1h_u32(i32 %hash_e) { -; CHECK: test_vsha1h_u32: -; CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}} -entry: - %sha1h1.i = tail call i32 @llvm.arm.neon.sha1h(i32 %hash_e) - ret i32 %sha1h1.i -} - -define <4 x i32> @test_vsha1su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w12_15) { -; CHECK: test_vsha1su1q_u32: -; CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha1su12.i = tail call <4 x i32> @llvm.arm.neon.sha1su1(<4 x i32> %tw0_3, <4 x i32> %w12_15) - ret <4 x i32> %sha1su12.i -} - -define <4 x i32> @test_vsha256su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7) { -; CHECK: test_vsha256su0q_u32: -; CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha256su02.i = tail call <4 x i32> @llvm.arm.neon.sha256su0(<4 x i32> %w0_3, <4 x i32> %w4_7) - ret <4 x i32> %sha256su02.i -} - -define <4 x i32> @test_vsha1cq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { -; CHECK: test_vsha1cq_u32: -; CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha1c1.i = tail call <4 x i32> @llvm.arm.neon.sha1c(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) - ret <4 x i32> %sha1c1.i -} - -define <4 x i32> @test_vsha1pq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { -; CHECK: test_vsha1pq_u32: -; CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha1p1.i = tail call <4 x i32> @llvm.arm.neon.sha1p(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) - ret <4 x i32> %sha1p1.i -} - -define <4 x i32> @test_vsha1mq_u32(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) { -; CHECK: test_vsha1mq_u32: -; CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha1m1.i = tail call <4 x i32> @llvm.arm.neon.sha1m(<4 x i32> %hash_abcd, i32 %hash_e, <4 x i32> %wk) - ret <4 x i32> %sha1m1.i -} - -define <4 x i32> @test_vsha1su0q_u32(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) { -; CHECK: test_vsha1su0q_u32: -; CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha1su03.i = tail call <4 x i32> @llvm.arm.neon.sha1su0(<4 x i32> %w0_3, <4 x i32> %w4_7, <4 x i32> %w8_11) - ret <4 x i32> %sha1su03.i -} - -define <4 x i32> @test_vsha256hq_u32(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) { -; CHECK: test_vsha256hq_u32: -; CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha256h3.i = tail call <4 x i32> @llvm.arm.neon.sha256h(<4 x i32> %hash_abcd, <4 x i32> %hash_efgh, <4 x i32> %wk) - ret <4 x i32> %sha256h3.i -} - -define <4 x i32> @test_vsha256h2q_u32(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) { -; CHECK: test_vsha256h2q_u32: -; CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s -entry: - %sha256h23.i = tail call <4 x i32> @llvm.arm.neon.sha256h2(<4 x i32> %hash_efgh, <4 x i32> %hash_abcd, <4 x i32> %wk) - ret <4 x i32> %sha256h23.i -} - -define <4 x i32> @test_vsha256su1q_u32(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) { -; CHECK: test_vsha256su1q_u32: -; CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %sha256su13.i = tail call <4 x i32> @llvm.arm.neon.sha256su1(<4 x i32> %tw0_3, <4 x i32> %w8_11, <4 x i32> %w12_15) - ret <4 x i32> %sha256su13.i -} - diff --git a/test/CodeGen/AArch64/neon-diagnostics.ll b/test/CodeGen/AArch64/neon-diagnostics.ll index 470bff771e..e28df29f3e 100644 --- a/test/CodeGen/AArch64/neon-diagnostics.ll +++ b/test/CodeGen/AArch64/neon-diagnostics.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll index f16b0365c8..96b4084a25 100644 --- a/test/CodeGen/AArch64/neon-extract.ll +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { diff --git a/test/CodeGen/AArch64/neon-facge-facgt.ll b/test/CodeGen/AArch64/neon-facge-facgt.ll deleted file mode 100644 index bf43e51cc2..0000000000 --- a/test/CodeGen/AArch64/neon-facge-facgt.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has duplicates for this functionality in vcmp.ll. - -declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double>, <2 x double>) - -define <2 x i32> @facge_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facge_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %A, <2 x float> %B) -; CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - ret <2 x i32> %val -} -define <4 x i32> @facge_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facge_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %A, <4 x float> %B) -; CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - ret <4 x i32> %val -} - -define <2 x i64> @facge_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facge_from_intr_v2i64: - %val = call <2 x i64> @llvm.arm.neon.vacge.v2i64.v2f64(<2 x double> %A, <2 x double> %B) -; CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - ret <2 x i64> %val -} - -declare <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float>, <2 x float>) -declare <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float>, <4 x float>) -declare <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double>, <2 x double>) - -define <2 x i32> @facgt_from_intr_v2i32(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facgt_from_intr_v2i32: - %val = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %A, <2 x float> %B) -; CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - ret <2 x i32> %val -} -define <4 x i32> @facgt_from_intr_v4i32( <4 x float> %A, <4 x float> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facgt_from_intr_v4i32: - %val = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %A, <4 x float> %B) -; CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - ret <4 x i32> %val -} - -define <2 x i64> @facgt_from_intr_v2i64(<2 x double> %A, <2 x double> %B) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: facgt_from_intr_v2i64: - %val = call <2 x i64> @llvm.arm.neon.vacgt.v2i64.v2f64(<2 x double> %A, <2 x double> %B) -; CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - ret <2 x i64> %val -} - diff --git a/test/CodeGen/AArch64/neon-fma.ll b/test/CodeGen/AArch64/neon-fma.ll index 9b1657c36f..6df494deda 100644 --- a/test/CodeGen/AArch64/neon-fma.ll +++ b/test/CodeGen/AArch64/neon-fma.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll index f6c0d06872..e48dbbaec9 100644 --- a/test/CodeGen/AArch64/neon-fpround_f128.ll +++ b/test/CodeGen/AArch64/neon-fpround_f128.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) { diff --git a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll b/test/CodeGen/AArch64/neon-frsqrt-frecp.ll deleted file mode 100644 index 199258d60e..0000000000 --- a/test/CodeGen/AArch64/neon-frsqrt-frecp.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has a duplicate for all these tests in vsqrt.ll - -; Set of tests for when the intrinsic is used. - -declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @frsqrts_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frsqrts v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %lhs, <2 x float> %rhs) - ret <2 x float> %val -} - -define <4 x float> @frsqrts_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frsqrts v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %lhs, <4 x float> %rhs) - ret <4 x float> %val -} - -define <2 x double> @frsqrts_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frsqrts v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.arm.neon.vrsqrts.v2f64(<2 x double> %lhs, <2 x double> %rhs) - ret <2 x double> %val -} - -declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @frecps_from_intr_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frecps v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %lhs, <2 x float> %rhs) - ret <2 x float> %val -} - -define <4 x float> @frecps_from_intr_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frecps v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %lhs, <4 x float> %rhs) - ret <4 x float> %val -} - -define <2 x double> @frecps_from_intr_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: frecps v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.arm.neon.vrecps.v2f64(<2 x double> %lhs, <2 x double> %rhs) - ret <2 x double> %val -} - diff --git a/test/CodeGen/AArch64/neon-halving-add-sub.ll b/test/CodeGen/AArch64/neon-halving-add-sub.ll deleted file mode 100644 index 4d9ffe5dbd..0000000000 --- a/test/CodeGen/AArch64/neon-halving-add-sub.ll +++ /dev/null @@ -1,208 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 duplicates these in vhadd.ll and vhsub.ll - -declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uhadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uhadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_shadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_shadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: shadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uhadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uhadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_shadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_shadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: shadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uhadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uhadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_shadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_shadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: shadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uhadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uhadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_shadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_shadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: shadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uhadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uhadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_shadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_shadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: shadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uhadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uhadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_shadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_shadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: shadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - -declare <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uhsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uhsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uhsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_shsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_shsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: shsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uhsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uhsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uhsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_shsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_shsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: shsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uhsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uhsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uhsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_shsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_shsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: shsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uhsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uhsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uhsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_shsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_shsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: shsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uhsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uhsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uhsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_shsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_shsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: shsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uhsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uhsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uhsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_shsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_shsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: shsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll index 9c9758a81f..11e1af7e14 100644 --- a/test/CodeGen/AArch64/neon-idiv.ll +++ b/test/CodeGen/AArch64/neon-idiv.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s define <4 x i32> @test1(<4 x i32> %a) { diff --git a/test/CodeGen/AArch64/neon-load-store-v1i32.ll b/test/CodeGen/AArch64/neon-load-store-v1i32.ll deleted file mode 100644 index 12361ba008..0000000000 --- a/test/CodeGen/AArch64/neon-load-store-v1i32.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 does not use these pseudo-vectors, and they're not blessed by the PCS. Skipping. - -; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly -define void @load.store.v1i8(<1 x i8>* %ptr, <1 x i8>* %ptr2) { -; CHECK-LABEL: load.store.v1i8: -; CHECK: ldr b{{[0-9]+}}, [x{{[0-9]+|sp}}] -; CHECK: str b{{[0-9]+}}, [x{{[0-9]+|sp}}] - %a = load <1 x i8>* %ptr - store <1 x i8> %a, <1 x i8>* %ptr2 - ret void -} - -define void @load.store.v1i16(<1 x i16>* %ptr, <1 x i16>* %ptr2) { -; CHECK-LABEL: load.store.v1i16: -; CHECK: ldr h{{[0-9]+}}, [x{{[0-9]+|sp}}] -; CHECK: str h{{[0-9]+}}, [x{{[0-9]+|sp}}] - %a = load <1 x i16>* %ptr - store <1 x i16> %a, <1 x i16>* %ptr2 - ret void -} - -define void @load.store.v1i32(<1 x i32>* %ptr, <1 x i32>* %ptr2) { -; CHECK-LABEL: load.store.v1i32: -; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+|sp}}] -; CHECK: str s{{[0-9]+}}, [x{{[0-9]+|sp}}] - %a = load <1 x i32>* %ptr - store <1 x i32> %a, <1 x i32>* %ptr2 - ret void -} diff --git a/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/test/CodeGen/AArch64/neon-max-min-pairwise.ll deleted file mode 100644 index 8642f09c4e..0000000000 --- a/test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ /dev/null @@ -1,347 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; These duplicate arm64 tests in vmax.ll - -declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_smaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_smaxp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: smaxp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_umaxp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: umaxp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_smaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_smaxp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: smaxp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_umaxp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_umaxp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: umaxp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_smaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_smaxp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: smaxp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_umaxp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_umaxp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: umaxp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_smaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_smaxp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: smaxp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_umaxp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_umaxp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: umaxp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_smaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_smaxp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: smaxp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_umaxp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_umaxp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: umaxp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_smaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_smaxp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: smaxp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_umaxp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_umaxp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: umaxp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_sminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_sminp_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sminp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_uminp_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uminp v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_sminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sminp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sminp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_uminp_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uminp_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vpminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uminp v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_sminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sminp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sminp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_uminp_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uminp_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uminp v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_sminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sminp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sminp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_uminp_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uminp_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vpminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uminp v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_sminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sminp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sminp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_uminp_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uminp_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uminp v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_sminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sminp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sminp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_uminp_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uminp_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vpminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uminp v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmaxp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmaxp_v2f32: - %val = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmaxp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmaxp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmaxp_v4f32: - %val = call <4 x float> @llvm.arm.neon.vpmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmaxp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmaxp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmaxp_v2f64: - %val = call <2 x double> @llvm.arm.neon.vpmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmaxp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fminp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fminp_v2f32: - %val = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fminp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fminp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fminp_v4f32: - %val = call <4 x float> @llvm.arm.neon.vpmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fminp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fminp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fminp_v2f64: - %val = call <2 x double> @llvm.arm.neon.vpmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fminp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmaxnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmaxnmp_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vpmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmaxnmp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmaxnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmaxnmp_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vpmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmaxnmp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmaxnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmaxnmp_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vpmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmaxnmp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fminnmp_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fminnmp_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vpminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fminnmp v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fminnmp_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fminnmp_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vpminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fminnmp v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fminnmp_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vpminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fminnmp v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -define i32 @test_vminv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vminv_s32 -; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -define i32 @test_vminv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vminv_u32 -; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -define i32 @test_vmaxv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vmaxv_s32 -; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -define i32 @test_vmaxv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vmaxv_u32 -; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i32> %1, i32 0 - ret i32 %2 -} - -declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>) -declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>) -declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>) -declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-max-min.ll b/test/CodeGen/AArch64/neon-max-min.ll deleted file mode 100644 index f9a50f4e5d..0000000000 --- a/test/CodeGen/AArch64/neon-max-min.ll +++ /dev/null @@ -1,311 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; These duplicate tests in arm64's vmax.ll - -declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_smax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_smax_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: smax v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_umax_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: umax v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_smax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_smax_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: smax v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_umax_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_umax_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: umax v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_smax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_smax_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: smax v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_umax_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_umax_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: umax v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_smax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_smax_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: smax v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_umax_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_umax_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: umax v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_smax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_smax_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: smax v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_umax_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_umax_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: umax v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_smax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_smax_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: smax v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_umax_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_umax_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: umax v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_smin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; Using registers other than v0, v1 are possible, but would be odd. -; CHECK: test_smin_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: smin v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_umin_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { - %tmp1 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: umin v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_smin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_smin_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: smin v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_umin_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_umin_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: umin v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_smin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_smin_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: smin v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_umin_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_umin_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: umin v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - - -declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_smin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_smin_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: smin v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_umin_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_umin_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: umin v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - - -declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_smin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_smin_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: smin v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_umin_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_umin_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: umin v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_smin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_smin_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: smin v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_umin_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_umin_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: umin v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmax_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmax_v2f32: - %val = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmax v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmax_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmax_v4f32: - %val = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmax v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmax_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmax_v2f64: - %val = call <2 x double> @llvm.arm.neon.vmaxs.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmax v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmin_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmin_v2f32: - %val = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmin v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmin_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmin_v4f32: - %val = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmin v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmin_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmin_v2f64: - %val = call <2 x double> @llvm.arm.neon.vmins.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmin v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - - -declare <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fmaxnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fmaxnm_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vmaxnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fmaxnm v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fmaxnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fmaxnm_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vmaxnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fmaxnm v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fmaxnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fmaxnm_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vmaxnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fmaxnm v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} - -declare <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @test_fminnm_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; CHECK: test_fminnm_v2f32: - %val = call <2 x float> @llvm.aarch64.neon.vminnm.v2f32(<2 x float> %lhs, <2 x float> %rhs) -; CHECK: fminnm v0.2s, v0.2s, v1.2s - ret <2 x float> %val -} - -define <4 x float> @test_fminnm_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; CHECK: test_fminnm_v4f32: - %val = call <4 x float> @llvm.aarch64.neon.vminnm.v4f32(<4 x float> %lhs, <4 x float> %rhs) -; CHECK: fminnm v0.4s, v0.4s, v1.4s - ret <4 x float> %val -} - -define <2 x double> @test_fminnm_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; CHECK: test_fminnm_v2f64: - %val = call <2 x double> @llvm.aarch64.neon.vminnm.v2f64(<2 x double> %lhs, <2 x double> %rhs) -; CHECK: fminnm v0.2d, v0.2d, v1.2d - ret <2 x double> %val -} diff --git a/test/CodeGen/AArch64/neon-misc-scalar.ll b/test/CodeGen/AArch64/neon-misc-scalar.ll deleted file mode 100644 index 3472c5f07b..0000000000 --- a/test/CodeGen/AArch64/neon-misc-scalar.ll +++ /dev/null @@ -1,61 +0,0 @@ -;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 already has copies of these tests (scattered). - -declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>) - -declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>) - -declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>) - -declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) - -declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) { -entry: - ; CHECK: test_vuqadd_s64 - %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) - ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vuqadd2.i -} - -define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) { -entry: - ; CHECK: test_vsqadd_u64 - %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) - ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vsqadd2.i -} - -define <1 x i64> @test_vabs_s64(<1 x i64> %a) { - ; CHECK: test_vabs_s64 -entry: - %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a) - ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vabs1.i -} - -define <1 x i64> @test_vqabs_s64(<1 x i64> %a) { - ; CHECK: test_vqabs_s64 -entry: - %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a) - ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vqabs1.i -} - -define <1 x i64> @test_vqneg_s64(<1 x i64> %a) { - ; CHECK: test_vqneg_s64 -entry: - %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a) - ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %vqneg1.i -} - -define <1 x i64> @test_vneg_s64(<1 x i64> %a) { - ; CHECK: test_vneg_s64 -entry: - %sub.i = sub <1 x i64> zeroinitializer, %a - ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} - ret <1 x i64> %sub.i -} - diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll deleted file mode 100644 index 5682f103e9..0000000000 --- a/test/CodeGen/AArch64/neon-misc.ll +++ /dev/null @@ -1,2014 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has a separate copy of these in aarch64-neon-misc.ll due to different intrinsics. - -define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { -; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> - ret <8 x i8> %shuffle.i -} - -define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { -; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> - ret <16 x i8> %shuffle.i -} - -define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> - ret <4 x i16> %shuffle.i -} - -define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { -; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> - ret <8 x i16> %shuffle.i -} - -define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0> - ret <2 x i32> %shuffle.i -} - -define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0> - ret <2 x float> %shuffle.i -} - -define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> - ret <4 x i32> %shuffle.i -} - -define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { -; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> - ret <4 x float> %shuffle.i -} - -define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4 - ret <4 x i16> %vpaddl.i -} - -define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4 - ret <2 x i32> %vpaddl1.i -} - -define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4 - ret <1 x i64> %vpaddl1.i -} - -define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpaddl.i = tail call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4 - ret <4 x i16> %vpaddl.i -} - -define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpaddl1.i = tail call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4 - ret <2 x i32> %vpaddl1.i -} - -define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpaddl1.i = tail call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4 - ret <1 x i64> %vpaddl1.i -} - -define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4 - ret <8 x i16> %vpaddl.i -} - -define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4 - ret <4 x i32> %vpaddl1.i -} - -define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { -; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4 - ret <2 x i64> %vpaddl1.i -} - -define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpaddl.i = tail call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4 - ret <8 x i16> %vpaddl.i -} - -define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpaddl1.i = tail call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4 - ret <4 x i32> %vpaddl1.i -} - -define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { -; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpaddl1.i = tail call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4 - ret <2 x i64> %vpaddl1.i -} - -define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 - ret <4 x i16> %vpadal1.i -} - -define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 - ret <2 x i32> %vpadal2.i -} - -define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 - ret <1 x i64> %vpadal2.i -} - -define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b - %vpadal1.i = tail call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 - ret <4 x i16> %vpadal1.i -} - -define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h - %vpadal2.i = tail call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 - ret <2 x i32> %vpadal2.i -} - -define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s - %vpadal2.i = tail call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 - ret <1 x i64> %vpadal2.i -} - -define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 - ret <8 x i16> %vpadal1.i -} - -define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 - ret <4 x i32> %vpadal2.i -} - -define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { -; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 - ret <2 x i64> %vpadal2.i -} - -define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b - %vpadal1.i = tail call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 - ret <8 x i16> %vpadal1.i -} - -define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %vpadal2.i = tail call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 - ret <4 x i32> %vpadal2.i -} - -define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { -; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %vpadal2.i = tail call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 - ret <2 x i64> %vpadal2.i -} - -define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vqabs.i = tail call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vqabs.i -} - -define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vqabs.i = tail call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vqabs.i -} - -define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vqabs1.i = tail call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vqabs1.i -} - -define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vqabs1.i = tail call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vqabs1.i -} - -define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vqabs1.i = tail call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vqabs1.i -} - -define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vqabs1.i = tail call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vqabs1.i -} - -define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 { -; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vqabs1.i = tail call <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vqabs1.i -} - -define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vqneg.i = tail call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vqneg.i -} - -define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vqneg.i = tail call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vqneg.i -} - -define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vqneg1.i = tail call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vqneg1.i -} - -define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vqneg1.i = tail call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vqneg1.i -} - -define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vqneg1.i = tail call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vqneg1.i -} - -define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vqneg1.i = tail call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vqneg1.i -} - -define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 { -; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vqneg1.i = tail call <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vqneg1.i -} - -define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %sub.i = sub <8 x i8> zeroinitializer, %a - ret <8 x i8> %sub.i -} - -define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %sub.i = sub <16 x i8> zeroinitializer, %a - ret <16 x i8> %sub.i -} - -define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %sub.i = sub <4 x i16> zeroinitializer, %a - ret <4 x i16> %sub.i -} - -define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %sub.i = sub <8 x i16> zeroinitializer, %a - ret <8 x i16> %sub.i -} - -define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %sub.i = sub <2 x i32> zeroinitializer, %a - ret <2 x i32> %sub.i -} - -define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %sub.i = sub <4 x i32> zeroinitializer, %a - ret <4 x i32> %sub.i -} - -define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 { -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %sub.i = sub <2 x i64> zeroinitializer, %a - ret <2 x i64> %sub.i -} - -define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a - ret <2 x float> %sub.i -} - -define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a - ret <4 x float> %sub.i -} - -define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 { -; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a - ret <2 x double> %sub.i -} - -define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { -; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vabs.i = tail call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vabs.i -} - -define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { -; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vabs.i = tail call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vabs.i -} - -define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { -; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vabs1.i = tail call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vabs1.i -} - -define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { -; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vabs1.i = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vabs1.i -} - -define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { -; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vabs1.i = tail call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vabs1.i -} - -define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { -; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vabs1.i = tail call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vabs1.i -} - -define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 { -; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vabs1.i = tail call <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64> %a) #4 - ret <2 x i64> %vabs1.i -} - -define <2 x float> @test_vabs_f32(<2 x float> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4 - ret <2 x float> %vabs1.i -} - -define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4 - ret <4 x float> %vabs1.i -} - -define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 { -; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4 - ret <2 x double> %vabs1.i -} - -define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vuqadd.i = tail call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 - ret <8 x i8> %vuqadd.i -} - -define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vuqadd.i = tail call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4 - ret <16 x i8> %vuqadd.i -} - -define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vuqadd2.i = tail call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 - ret <4 x i16> %vuqadd2.i -} - -define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vuqadd2.i = tail call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4 - ret <8 x i16> %vuqadd2.i -} - -define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vuqadd2.i = tail call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 - ret <2 x i32> %vuqadd2.i -} - -define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vuqadd2.i = tail call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4 - ret <4 x i32> %vuqadd2.i -} - -define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { -; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vuqadd2.i = tail call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4 - ret <2 x i64> %vuqadd2.i -} - -define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { -; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vcls.i = tail call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vcls.i -} - -define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { -; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vcls.i = tail call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vcls.i -} - -define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { -; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vcls1.i = tail call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4 - ret <4 x i16> %vcls1.i -} - -define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { -; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vcls1.i = tail call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4 - ret <8 x i16> %vcls1.i -} - -define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { -; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcls1.i = tail call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vcls1.i -} - -define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { -; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcls1.i = tail call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vcls1.i -} - -define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { -; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 - ret <8 x i8> %vclz.i -} - -define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { -; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 - ret <16 x i8> %vclz.i -} - -define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { -; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 - ret <4 x i16> %vclz1.i -} - -define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { -; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 - ret <8 x i16> %vclz1.i -} - -define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { -; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 - ret <2 x i32> %vclz1.i -} - -define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { -; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 - ret <4 x i32> %vclz1.i -} - -define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { -; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vctpop.i -} - -define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { -; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vctpop.i -} - -define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> - ret <8 x i8> %neg.i -} - -define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> - ret <16 x i8> %neg.i -} - -define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1> - ret <4 x i16> %neg.i -} - -define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> - ret <8 x i16> %neg.i -} - -define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { -; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1> - ret <2 x i32> %neg.i -} - -define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { -; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> - ret <4 x i32> %neg.i -} - -define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 { -; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %vrbit.i = tail call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #4 - ret <8 x i8> %vrbit.i -} - -define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 { -; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %vrbit.i = tail call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #4 - ret <16 x i8> %vrbit.i -} - -define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vmovn.i = trunc <8 x i16> %a to <8 x i8> - ret <8 x i8> %vmovn.i -} - -define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vmovn.i = trunc <4 x i32> %a to <4 x i16> - ret <4 x i16> %vmovn.i -} - -define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vmovn.i = trunc <2 x i64> %a to <2 x i32> - ret <2 x i32> %vmovn.i -} - -define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vmovn.i.i = trunc <8 x i16> %b to <8 x i8> - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vmovn.i.i = trunc <4 x i32> %b to <4 x i16> - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vmovn.i.i = trunc <2 x i64> %b to <2 x i32> - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqdmull1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqdmull1.i -} - -define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqdmull1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqdmull1.i -} - -define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { -; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqdmull1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqdmull1.i -} - -define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqdmull1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vqdmull1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vqdmull1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqmovn1.i -} - -define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqmovn1.i -} - -define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { -; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqmovn1.i -} - -define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: test_vqmovn_high_s32 - %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: test_vqmovn_high_s64 - %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i32> %shuffle.i -} - -define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h - %vqmovn1.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4 - ret <8 x i8> %vqmovn1.i -} - -define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vqmovn1.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4 - ret <4 x i16> %vqmovn1.i -} - -define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { -; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vqmovn1.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4 - ret <2 x i32> %vqmovn1.i -} - -define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h - %vqmovn1.i.i = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %b) #4 - %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <16 x i8> %shuffle.i -} - -define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vqmovn1.i.i = tail call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i16> %shuffle.i -} - -define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 { -; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vqmovn1.i.i = tail call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %b) #4 - %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i32> %shuffle.i -} - -define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { -; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8 - %1 = sext <8 x i8> %a to <8 x i16> - %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { -; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16 - %1 = sext <4 x i16> %a to <4 x i32> - %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { -; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32 - %1 = sext <2 x i32> %a to <2 x i64> - %vshll_n = shl <2 x i64> %1, <i64 32, i64 32> - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { -; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8 - %1 = zext <8 x i8> %a to <8 x i16> - %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { -; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16 - %1 = zext <4 x i16> %a to <4 x i32> - %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { -; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32 - %1 = zext <2 x i32> %a to <2 x i64> - %vshll_n = shl <2 x i64> %1, <i64 32, i64 32> - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8 - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %1 = sext <8 x i8> %shuffle.i to <8 x i16> - %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16 - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %1 = sext <4 x i16> %shuffle.i to <4 x i32> - %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32 - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %1 = sext <2 x i32> %shuffle.i to <2 x i64> - %vshll_n = shl <2 x i64> %1, <i64 32, i64 32> - ret <2 x i64> %vshll_n -} - -define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8 - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %1 = zext <8 x i8> %shuffle.i to <8 x i16> - %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> - ret <8 x i16> %vshll_n -} - -define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16 - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %1 = zext <4 x i16> %shuffle.i to <4 x i32> - %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> - ret <4 x i32> %vshll_n -} - -define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 { -; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32 - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - %1 = zext <2 x i32> %shuffle.i to <2 x i64> - %vshll_n = shl <2 x i64> %1, <i64 32, i64 32> - ret <2 x i64> %vshll_n -} - -define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 { -; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt1.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4 - ret <4 x i16> %vcvt1.i -} - -define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 { -; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s - %vcvt1.i.i = tail call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %b) #4 - %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i16> %shuffle.i -} - -define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h - %vcvt1.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %a) #4 - ret <4 x float> %vcvt1.i -} - -define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 { -; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h - %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - %vcvt1.i.i = tail call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4 - ret <4 x float> %vcvt1.i.i -} - -define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 { -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptrunc <2 x double> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { -; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvt.i.i = fptrunc <2 x double> %b to <2 x float> - %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x float> %shuffle.i -} - -define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { -; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4 - ret <2 x float> %vcvtx_f32_f641.i -} - -define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { -; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4 - %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x float> %shuffle.i -} - -define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s - %vcvt.i = fpext <2 x float> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 { -; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s - %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3> - %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double> - ret <2 x double> %vcvt.i.i -} - -define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndn1.i = tail call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndn1.i -} - -define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndn1.i = tail call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndn1.i -} - -define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 { -; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndn1.i = tail call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndn1.i -} - -define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrnda1.i -} - -define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrnda1.i -} - -define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 { -; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrnda1.i -} - -define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndp1.i -} - -define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndp1.i -} - -define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 { -; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndp1.i -} - -define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndm1.i -} - -define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndm1.i -} - -define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 { -; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndm1.i -} - -define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndx1.i -} - -define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndx1.i -} - -define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 { -; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndx1.i -} - -define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrnd1.i -} - -define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrnd1.i -} - -define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 { -; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrnd1.i -} - -define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrndi1.i -} - -define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrndi1.i -} - -define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 { -; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrndi1.i -} - -define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = fptosi <2 x float> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = fptosi <4 x float> %a to <4 x i32> - ret <4 x i32> %vcvt.i -} - -define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x double> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = fptoui <2 x float> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = fptoui <4 x float> %a to <4 x i32> - ret <4 x i32> %vcvt.i -} - -define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x double> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x float> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 { -; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x float> %a to <2 x i64> - ret <2 x i64> %vcvt.i -} - -define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt.i = fptosi <4 x float> %a to <4 x i16> - ret <4 x i16> %vcvt.i -} - -define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s - %vcvt.i = fptoui <4 x float> %a to <4 x i16> - ret <4 x i16> %vcvt.i -} - -define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 { -; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptosi <2 x double> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 { -; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = fptoui <2 x double> %a to <2 x i32> - ret <2 x i32> %vcvt.i -} - -define <1 x i8> @test_vcvt_s8_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}} - %vcvt.i = fptosi <1 x double> %a to <1 x i8> - ret <1 x i8> %vcvt.i -} - -define <1 x i8> @test_vcvt_u8_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}} - %vcvt.i = fptoui <1 x double> %a to <1 x i8> - ret <1 x i8> %vcvt.i -} - -define <1 x i16> @test_vcvt_s16_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}} - %vcvt.i = fptosi <1 x double> %a to <1 x i16> - ret <1 x i16> %vcvt.i -} - -define <1 x i16> @test_vcvt_u16_f64(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}} - %vcvt.i = fptoui <1 x double> %a to <1 x i16> - ret <1 x i16> %vcvt.i -} - -define <1 x i32> @test_vcvt_s32_f64_v1(<1 x double> %a) #0 { -; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = fptosi <1 x double> %a to <1 x i32> - ret <1 x i32> %vcvt.i -} - -define <1 x i32> @test_vcvt_u32_f64_v1(<1 x double> %a) #0 { -; CHECK: fcvtzu w{{[0-9]+}}, d{{[0-9]+}} -; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = fptoui <1 x double> %a to <1 x i32> - ret <1 x i32> %vcvt.i -} - -define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtn_s32_f32 -; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtns_f321.i -} - -define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtnq_s32_f32 -; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtns_f321.i -} - -define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtnq_s64_f64 -; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtns_f641.i -} - -define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtn_u32_f32 -; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtnu_f321.i -} - -define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtnq_u32_f32 -; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtnu_f321.i -} - -define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtnq_u64_f64 -; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtnu_f641.i -} - -define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtp_s32_f32 -; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtps_f321.i -} - -define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtpq_s32_f32 -; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtps_f321.i -} - -define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtpq_s64_f64 -; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtps_f641.i -} - -define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtp_u32_f32 -; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtpu_f321.i -} - -define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtpq_u32_f32 -; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtpu_f321.i -} - -define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtpq_u64_f64 -; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtpu_f641.i -} - -define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtm_s32_f32 -; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtms_f321.i -} - -define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtmq_s32_f32 -; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtms_f321.i -} - -define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtmq_s64_f64 -; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtms_f641.i -} - -define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvtm_u32_f32 -; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtmu_f321.i -} - -define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtmq_u32_f32 -; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtmu_f321.i -} - -define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtmq_u64_f64 -; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtmu_f641.i -} - -define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvta_s32_f32 -; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtas_f321.i -} - -define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtaq_s32_f32 -; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtas_f321.i -} - -define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtaq_s64_f64 -; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtas_f641.i -} - -define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) { -; CHECK-LABEL: test_vcvta_u32_f32 -; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) - ret <2 x i32> %vcvtau_f321.i -} - -define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) { -; CHECK-LABEL: test_vcvtaq_u32_f32 -; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) - ret <4 x i32> %vcvtau_f321.i -} - -define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) { -; CHECK-LABEL: test_vcvtaq_u64_f64 -; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a) - ret <2 x i64> %vcvtau_f641.i -} - -define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrsqrte1.i = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrsqrte1.i -} - -define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrsqrte1.i = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrsqrte1.i -} - -define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 { -; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrsqrte1.i = tail call <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrsqrte1.i -} - -define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrecpe1.i = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4 - ret <2 x float> %vrecpe1.i -} - -define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrecpe1.i = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4 - ret <4 x float> %vrecpe1.i -} - -define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 { -; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vrecpe1.i = tail call <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double> %a) #4 - ret <2 x double> %vrecpe1.i -} - -define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { -; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vrecpe1.i = tail call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4 - ret <2 x i32> %vrecpe1.i -} - -define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { -; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vrecpe1.i = tail call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4 - ret <4 x i32> %vrecpe1.i -} - -define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4 - ret <2 x float> %vsqrt1.i -} - -define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4 - ret <4 x float> %vsqrt1.i -} - -define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 { -; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4 - ret <2 x double> %vsqrt1.i -} - -define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = sitofp <2 x i32> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvt.i = uitofp <2 x i32> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = sitofp <4 x i32> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = uitofp <4 x i32> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i64> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i64> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_s64(<2 x i64> %a) #0 { -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i64> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <2 x float> @test_vcvt_f32_u64(<2 x i64> %a) #0 { -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i64> %a to <2 x float> - ret <2 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 { -; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 -; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = sitofp <4 x i16> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 { -; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0 -; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvt.i = uitofp <4 x i16> %a to <4 x float> - ret <4 x float> %vcvt.i -} - -define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 { -; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 -; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = sitofp <2 x i32> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 { -; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0 -; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvt.i = uitofp <2 x i32> %a to <2 x double> - ret <2 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_s8(<1 x i8> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0] -; CHECK: sxtb w{{[0-9]+}}, w{{[0-9]+}} -; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = sitofp <1 x i8> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_u8(<1 x i8> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0] -; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xff -; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = uitofp <1 x i8> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_s16(<1 x i16> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0] -; CHECK: sxth w{{[0-9]+}}, w{{[0-9]+}} -; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = sitofp <1 x i16> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_u16(<1 x i16> %a) #0 { -; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0] -; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xffff -; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = uitofp <1 x i16> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_s32_v1(<1 x i32> %a) #0 { -; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}} -; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = sitofp <1 x i32> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -define <1 x double> @test_vcvt_f64_u32_v1(<1 x i32> %a) #0 { -; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}} -; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}} - %vcvt.i = uitofp <1 x i32> %a to <1 x double> - ret <1 x double> %vcvt.i -} - -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2 - -declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) #2 - -declare <2 x double> @llvm.arm.neon.vrecpe.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) #2 - -declare <2 x double> @llvm.arm.neon.vrsqrte.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2 - -declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>) - -declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>) - -declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>) - -declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>) - -declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.round.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.round.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.round.v2f32(<2 x float>) #3 - -declare <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double>) #2 - -declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2 - -declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2 - -declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2 - -declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2 - -declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) #2 - -declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) #2 - -declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) #2 - -declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) #2 - -declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) #2 - -declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8>) #2 - -declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2 - -declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2 - -declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2 - -declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2 - -declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2 - -declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2 - -declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2 - -declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2 - -declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2 - -declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2 - -declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2 - -declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2 - -declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2 - -declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2 - -declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3 - -declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3 - -declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3 - -declare <2 x i64> @llvm.arm.neon.vabs.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vqneg.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vqabs.v2i64(<2 x i64>) #2 - -declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) #2 - -declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) #2 - -declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) #2 - -declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) #2 - -declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) #2 - -declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) #2 - -declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) #2 - -declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) #2 - -declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) #2 - -declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2 - -declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2 - - -define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_s64_f64 -; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}} - %1 = fptosi <1 x double> %a to <1 x i64> - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_u64_f64 -; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}} - %1 = fptoui <1 x double> %a to <1 x i64> - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtn_s64_f64 -; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtn_u64_f64 -; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtp_s64_f64 -; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtp_u64_f64 -; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtm_s64_f64 -; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvtm_u64_f64 -; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvta_s64_f64 -; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvta_u64_f64 -; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}} - %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a) - ret <1 x i64> %1 -} - -define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_f64_s64 -; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}} - %1 = sitofp <1 x i64> %a to <1 x double> - ret <1 x double> %1 -} - -define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_f64_u64 -; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}} - %1 = uitofp <1 x i64> %a to <1 x double> - ret <1 x double> %1 -} - -declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>) -declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>) - -define <1 x double> @test_vrndn_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndn_f64 -; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrnda_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrnda_f64 -; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndp_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndp_f64 -; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndm_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndm_f64 -; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndx_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndx_f64 -; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrnd_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrnd_f64 -; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrndi_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrndi_f64 -; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) -declare <1 x double> @llvm.trunc.v1f64(<1 x double>) -declare <1 x double> @llvm.rint.v1f64(<1 x double>) -declare <1 x double> @llvm.floor.v1f64(<1 x double>) -declare <1 x double> @llvm.ceil.v1f64(<1 x double>) -declare <1 x double> @llvm.round.v1f64(<1 x double>) -declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>) - -define <1 x double> @test_vrsqrte_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrsqrte_f64 -; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrecpe_f64(<1 x double> %a) { -; CHECK-LABEL: test_vrecpe_f64 -; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vsqrt_f64(<1 x double> %a) { -; CHECK-LABEL: test_vsqrt_f64 -; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) - ret <1 x double> %1 -} - -define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vrecps_f64 -; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) { -; CHECK-LABEL: test_vrsqrts_f64 -; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b) - ret <1 x double> %1 -} - -declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) -declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) -declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) -declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) - -define i64 @test_vaddlv_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddlv_s32 -; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s - %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -define i64 @test_vaddlv_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vaddlv_u32 -; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s - %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>) -declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-mla-mls.ll b/test/CodeGen/AArch64/neon-mla-mls.ll index 37daadef0b..e7bff748ad 100644 --- a/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/test/CodeGen/AArch64/neon-mla-mls.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll index 7eadde4816..b7baf25f80 100644 --- a/test/CodeGen/AArch64/neon-mov.ll +++ b/test/CodeGen/AArch64/neon-mov.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define <8 x i8> @movi8b() { @@ -15,21 +14,18 @@ define <16 x i8> @movi16b() { define <2 x i32> @movi2s_lsl0() { ; CHECK-LABEL: movi2s_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff ; CHECK-ARM64: movi {{d[0-9]+}}, #0x0000ff000000ff ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { ; CHECK-LABEL: movi2s_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #8 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ff000000ff00 ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { ; CHECK-LABEL: movi2s_lsl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #16 ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff0000 ret <2 x i32> < i32 16711680, i32 16711680 > @@ -37,28 +33,24 @@ define <2 x i32> @movi2s_lsl16() { define <2 x i32> @movi2s_lsl24() { ; CHECK-LABEL: movi2s_lsl24: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, lsl #24 ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff000000ff000000 ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { ; CHECK-LABEL: movi4s_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x0000ff000000ff ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { ; CHECK-LABEL: movi4s_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #8 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0x00ff000000ff00 ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { ; CHECK-LABEL: movi4s_lsl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #16 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff0000 ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > @@ -66,35 +58,30 @@ define <4 x i32> @movi4s_lsl16() { define <4 x i32> @movi4s_lsl24() { ; CHECK-LABEL: movi4s_lsl24: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, lsl #24 ; CHECK-ARM64: movi {{v[0-9]+}}.2d, #0xff000000ff000000 ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { ; CHECK-LABEL: movi4h_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.4h, #0xff ; CHECK-ARM64: movi {{d[0-9]+}}, #0xff00ff00ff00ff ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { ; CHECK-LABEL: movi4h_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.4h, #{{0xff|255}}, lsl #8 ; CHECK-ARM64: movi d0, #0xff00ff00ff00ff00 ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { ; CHECK-LABEL: movi8h_lsl0: -; CHECK-AARCH64: movi {{v[0-9]+}}.8h, #{{0xff|255}} ; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { ; CHECK-LABEL: movi8h_lsl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.8h, #{{0xff|255}}, lsl #8 ; CHECK-ARM64: movi v0.2d, #0xff00ff00ff00ff00 ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } @@ -177,14 +164,12 @@ define <8 x i16> @mvni8h_lsl8() { define <2 x i32> @movi2s_msl8(<2 x i32> %a) { ; CHECK-LABEL: movi2s_msl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, msl #8 ; CHECK-ARM64: movi {{d[0-9]+}}, #0x00ffff0000ffff ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { ; CHECK-LABEL: movi2s_msl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.2s, #0xff, msl #16 ; CHECK-ARM64: movi d0, #0xffffff00ffffff ret <2 x i32> < i32 16777215, i32 16777215 > } @@ -192,14 +177,12 @@ define <2 x i32> @movi2s_msl16() { define <4 x i32> @movi4s_msl8() { ; CHECK-LABEL: movi4s_msl8: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, msl #8 ; CHECK-ARM64: movi v0.2d, #0x00ffff0000ffff ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { ; CHECK-LABEL: movi4s_msl16: -; CHECK-AARCH64: movi {{v[0-9]+}}.4s, #0xff, msl #16 ; CHECK-ARM64: movi v0.2d, #0xffffff00ffffff ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } diff --git a/test/CodeGen/AArch64/neon-mul-div.ll b/test/CodeGen/AArch64/neon-mul-div.ll deleted file mode 100644 index 869bd445c7..0000000000 --- a/test/CodeGen/AArch64/neon-mul-div.ll +++ /dev/null @@ -1,754 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy of this because of the intrinsics - -define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = mul <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = mul <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = mul <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = mul <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = mul <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = mul <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK-LABEL: mul1xi64: -;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} - %tmp3 = mul <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK-LABEL: mul2xi64: -;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} -;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} - %tmp3 = mul <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - - define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fmul <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fmul <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fmul <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - - - define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = fdiv <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = fdiv <4 x float> %A, %B; - ret <4 x float> %tmp3 -} -define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = fdiv <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = sdiv <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = sdiv <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = sdiv <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = udiv <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = udiv <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = udiv <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = srem <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = srem <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = srem <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <1 x i8> %A, %B; - ret <1 x i8> %tmp3 -} - -define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <8 x i8> %A, %B; - ret <8 x i8> %tmp3 -} - -define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <16 x i8> %A, %B; - ret <16 x i8> %tmp3 -} - -define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <1 x i16> %A, %B; - ret <1 x i16> %tmp3 -} - -define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <4 x i16> %A, %B; - ret <4 x i16> %tmp3 -} - -define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <8 x i16> %A, %B; - ret <8 x i16> %tmp3 -} - -define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <1 x i32> %A, %B; - ret <1 x i32> %tmp3 -} - -define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <2 x i32> %A, %B; - ret <2 x i32> %tmp3 -} - -define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} -;CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - %tmp3 = urem <4 x i32> %A, %B; - ret <4 x i32> %tmp3 -} - -define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = urem <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} -;CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} - %tmp3 = urem <2 x i64> %A, %B; - ret <2 x i64> %tmp3 -} - -define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { -; CHECK: bl fmodf -; CHECK: bl fmodf - %tmp3 = frem <2 x float> %A, %B; - ret <2 x float> %tmp3 -} - -define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { -; CHECK: bl fmodf -; CHECK: bl fmodf -; CHECK: bl fmodf -; CHECK: bl fmodf - %tmp3 = frem <4 x float> %A, %B; - ret <4 x float> %tmp3 -} - -define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) { -; CHECK: bl fmod - %tmp3 = frem <1 x double> %A, %B; - ret <1 x double> %tmp3 -} - -define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { -; CHECK: bl fmod -; CHECK: bl fmod - %tmp3 = frem <2 x double> %A, %B; - ret <2 x double> %tmp3 -} - -declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) -declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) - -define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: poly_mulv8i8: - %prod = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: pmul v0.8b, v0.8b, v1.8b - ret <8 x i8> %prod -} - -define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: poly_mulv16i8: - %prod = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: pmul v0.16b, v0.16b, v1.16b - ret <16 x i8> %prod -} - -declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqdmulh_v4i16: - %prod = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqdmulh v0.4h, v0.4h, v1.4h - ret <4 x i16> %prod -} - -define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqdmulh_v8i16: - %prod = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqdmulh v0.8h, v0.8h, v1.8h - ret <8 x i16> %prod -} - -define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqdmulh_v2i32: - %prod = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqdmulh v0.2s, v0.2s, v1.2s - ret <2 x i32> %prod -} - -define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqdmulh_v4i32: - %prod = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqdmulh v0.4s, v0.4s, v1.4s - ret <4 x i32> %prod -} - -declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqrdmulh_v4i16: - %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqrdmulh v0.4h, v0.4h, v1.4h - ret <4 x i16> %prod -} - -define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqrdmulh_v8i16: - %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqrdmulh v0.8h, v0.8h, v1.8h - ret <8 x i16> %prod -} - -define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqrdmulh_v2i32: - %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqrdmulh v0.2s, v0.2s, v1.2s - ret <2 x i32> %prod -} - -define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqrdmulh_v4i32: - %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqrdmulh v0.4s, v0.4s, v1.4s - ret <4 x i32> %prod -} - -declare <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float>, <4 x float>) -declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>) - -define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: fmulx v0.2s, v0.2s, v1.2s - %val = call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) - ret <2 x float> %val -} - -define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: fmulx v0.4s, v0.4s, v1.4s - %val = call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) - ret <4 x float> %val -} - -define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { -; Using registers other than v0, v1 and v2 are possible, but would be odd. -; CHECK: fmulx v0.2d, v0.2d, v1.2d - %val = call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) - ret <2 x double> %val -} - -define <1 x i8> @test_mul_v1i8(<1 x i8> %a, <1 x i8> %b) { -;CHECK-LABEL: test_mul_v1i8: -;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %c = mul <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @test_mul_v1i16(<1 x i16> %a, <1 x i16> %b) { -;CHECK-LABEL: test_mul_v1i16: -;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %c = mul <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @test_mul_v1i32(<1 x i32> %a, <1 x i32> %b) { -;CHECK-LABEL: test_mul_v1i32: -;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %c = mul <1 x i32> %a, %b - ret <1 x i32> %c -} diff --git a/test/CodeGen/AArch64/neon-or-combine.ll b/test/CodeGen/AArch64/neon-or-combine.ll index e8da72f42c..d98c12802a 100644 --- a/test/CodeGen/AArch64/neon-or-combine.ll +++ b/test/CodeGen/AArch64/neon-or-combine.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s ; Check that the DAGCombiner does not crash with an assertion failure diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll index 99507cecf1..d45dde649e 100644 --- a/test/CodeGen/AArch64/neon-perm.ll +++ b/test/CodeGen/AArch64/neon-perm.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 %struct.int8x8x2_t = type { [2 x <8 x i8>] } @@ -54,7 +53,6 @@ entry: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp1_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2> @@ -71,7 +69,6 @@ entry: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp1q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> @@ -112,7 +109,6 @@ entry: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp1_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2> @@ -129,7 +125,6 @@ entry: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp1q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> @@ -138,7 +133,6 @@ entry: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp1_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2> @@ -155,7 +149,6 @@ entry: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vuzp1q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2> @@ -228,7 +221,6 @@ entry: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp2_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> @@ -245,8 +237,6 @@ entry: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp2q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> @@ -287,7 +277,6 @@ entry: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp2_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> @@ -304,8 +293,6 @@ entry: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vuzp2q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> @@ -314,7 +301,6 @@ entry: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp2_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3> @@ -331,8 +317,6 @@ entry: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vuzp2q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: mov {{v[0-9]+}}.16b, {{v[0-9]+}}.16b ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3> @@ -405,7 +389,6 @@ entry: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip1_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2> @@ -422,7 +405,6 @@ entry: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip1q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> @@ -463,7 +445,6 @@ entry: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip1_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2> @@ -480,7 +461,6 @@ entry: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip1q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> @@ -489,7 +469,6 @@ entry: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip1_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2> @@ -506,7 +485,6 @@ entry: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vzip1q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2> @@ -579,7 +557,6 @@ entry: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip2_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> @@ -596,7 +573,6 @@ entry: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip2q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> @@ -637,7 +613,6 @@ entry: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip2_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> @@ -654,7 +629,6 @@ entry: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vzip2q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> @@ -663,7 +637,6 @@ entry: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip2_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3> @@ -680,7 +653,6 @@ entry: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vzip2q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3> @@ -753,7 +725,6 @@ entry: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn1_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2> @@ -770,7 +741,6 @@ entry: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn1q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> @@ -811,7 +781,6 @@ entry: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn1_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2> @@ -828,7 +797,6 @@ entry: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn1q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> @@ -837,7 +805,6 @@ entry: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn1_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2> @@ -854,7 +821,6 @@ entry: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vtrn1q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2> @@ -927,7 +893,6 @@ entry: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn2_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> @@ -944,7 +909,6 @@ entry: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn2q_s64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> @@ -985,7 +949,6 @@ entry: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn2_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> @@ -1002,7 +965,6 @@ entry: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vtrn2q_u64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> @@ -1011,7 +973,6 @@ entry: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn2_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3> @@ -1028,7 +989,6 @@ entry: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vtrn2q_f64: -; CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3> @@ -2534,8 +2494,6 @@ entry: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2572,8 +2530,6 @@ entry: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vuzp_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2586,8 +2542,6 @@ entry: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vuzp_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2756,8 +2710,6 @@ entry: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2794,8 +2746,6 @@ entry: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vzip_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2808,8 +2758,6 @@ entry: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vzip_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -2978,8 +2926,6 @@ entry: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn_s32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -3016,8 +2962,6 @@ entry: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vtrn_u32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -3030,8 +2974,6 @@ entry: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: test_vtrn_f32: -; CHECK-AARCH64: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -; CHECK-AARCH64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] ; CHECK-ARM64: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s ; CHECK-ARM64: zip2 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s entry: @@ -3183,7 +3125,4 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) { %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 ret %struct.uint8x8x2_t %.fca.0.1.insert -; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-AARCH64-NEXT: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b -; CHECK-AARCH64-NEXT: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b } diff --git a/test/CodeGen/AArch64/neon-rounding-halving-add.ll b/test/CodeGen/AArch64/neon-rounding-halving-add.ll deleted file mode 100644 index 5c99ba1e4d..0000000000 --- a/test/CodeGen/AArch64/neon-rounding-halving-add.ll +++ /dev/null @@ -1,106 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vhadd.ll - -declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_urhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_urhadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: urhadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_srhadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_srhadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: srhadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_urhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_urhadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: urhadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_srhadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_srhadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: srhadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_urhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_urhadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: urhadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_srhadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_srhadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: srhadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_urhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_urhadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: urhadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_srhadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_srhadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: srhadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_urhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_urhadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: urhadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_srhadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_srhadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: srhadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_urhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_urhadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: urhadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_srhadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_srhadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: srhadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - diff --git a/test/CodeGen/AArch64/neon-rounding-shift.ll b/test/CodeGen/AArch64/neon-rounding-shift.ll deleted file mode 100644 index 692df988cf..0000000000 --- a/test/CodeGen/AArch64/neon-rounding-shift.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vshift.ll - -declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_urshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_urshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: urshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_srshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_srshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: srshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_urshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_urshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: urshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_srshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_srshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: srshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_urshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_urshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: urshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_srshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_srshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: srshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_urshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_urshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: urshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_srshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_srshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: srshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_urshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_urshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: urshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_srshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_srshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: srshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_urshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_urshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: urshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_srshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: srshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_urshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_urshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: urshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_srshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_srshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: srshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-saturating-add-sub.ll deleted file mode 100644 index 996835bfc5..0000000000 --- a/test/CodeGen/AArch64/neon-saturating-add-sub.ll +++ /dev/null @@ -1,241 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vqadd.ll -declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqadd_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqadd_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqadd v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqadd_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqadd_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqadd v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqadd_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqadd_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqadd v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqadd_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqadd_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqadd v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqadd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqadd_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqadd v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqadd_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqadd v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - - - -declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqadd_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqadd v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqadd_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqadd_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqadd v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqsub_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqsub_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqsub v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqsub_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqsub_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqsub v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqsub_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqsub_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqsub v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqsub_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqsub_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqsub v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqsub_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqsub_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqsub v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqsub_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqsub_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqsub v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqsub_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqsub v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqsub_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqsub v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} diff --git a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll deleted file mode 100644 index a59eebd55d..0000000000 --- a/test/CodeGen/AArch64/neon-saturating-rounding-shift.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vshift.ll - -declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqrshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqrshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqrshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqrshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqrshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqrshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqrshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqrshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqrshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqrshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqrshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqrshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqrshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqrshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqrshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqrshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqrshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqrshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqrshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqrshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqrshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqrshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqrshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqrshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqrshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqrshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqrshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqrshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqrshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqrshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqrshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqrshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqrshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqrshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-saturating-shift.ll b/test/CodeGen/AArch64/neon-saturating-shift.ll deleted file mode 100644 index 035740cba5..0000000000 --- a/test/CodeGen/AArch64/neon-saturating-shift.ll +++ /dev/null @@ -1,122 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic calls: arm64 has similar in vshift.ll - -declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: uqshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sqshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_uqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_uqshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: uqshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sqshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sqshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sqshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_uqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_uqshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: uqshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sqshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sqshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sqshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_uqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_uqshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: uqshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sqshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sqshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sqshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_uqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_uqshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: uqshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sqshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sqshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sqshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_uqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_uqshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: uqshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sqshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sqshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_uqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_uqshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: uqshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sqshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sqshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sqshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - diff --git a/test/CodeGen/AArch64/neon-scalar-abs.ll b/test/CodeGen/AArch64/neon-scalar-abs.ll deleted file mode 100644 index bb351ab86f..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-abs.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has tests for i64 versions, uses different approach for others. - -define i64 @test_vabsd_s64(i64 %a) { -; CHECK: test_vabsd_s64 -; CHECK: abs {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vabs.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vabs1.i = tail call <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64> %vabs.i) - %0 = extractelement <1 x i64> %vabs1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vabs(<1 x i64>) - -define i8 @test_vqabsb_s8(i8 %a) { -; CHECK: test_vqabsb_s8 -; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i) - %0 = extractelement <1 x i8> %vqabs1.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>) - -define i16 @test_vqabsh_s16(i16 %a) { -; CHECK: test_vqabsh_s16 -; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i) - %0 = extractelement <1 x i16> %vqabs1.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>) - -define i32 @test_vqabss_s32(i32 %a) { -; CHECK: test_vqabss_s32 -; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i) - %0 = extractelement <1 x i32> %vqabs1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>) - -define i64 @test_vqabsd_s64(i64 %a) { -; CHECK: test_vqabsd_s64 -; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i) - %0 = extractelement <1 x i64> %vqabs1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-add-sub.ll deleted file mode 100644 index 7e262cb8bd..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-add-sub.ll +++ /dev/null @@ -1,51 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has a copy of the key parts in AdvSIMD-Scalar.ll - -define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %tmp3 = add <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %tmp3 = sub <1 x i64> %A, %B; - ret <1 x i64> %tmp3 -} - -declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_add_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uadd_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sub_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_usub_v1i64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - - diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll index f5636db5e1..6cfdc5be13 100644 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll +++ b/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s declare float @llvm.fma.f32(float, float, float) diff --git a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll deleted file mode 100644 index ff29413252..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll +++ /dev/null @@ -1,124 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has separate copy due to intrinsics (aarch64-neon-scalar-by-elem-mul.ll) -define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { - ; CHECK: test_fmul_lane_ss2S - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = fmul float %a, %tmp1; - ret float %tmp2; -} - -define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) { - ; CHECK: test_fmul_lane_ss2S_swap - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = fmul float %tmp1, %a; - ret float %tmp2; -} - - -define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) { - ; CHECK: test_fmul_lane_ss4S - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = fmul float %a, %tmp1; - ret float %tmp2; -} - -define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) { - ; CHECK: test_fmul_lane_ss4S_swap - ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = fmul float %tmp1, %a; - ret float %tmp2; -} - - -define double @test_fmul_lane_ddD(double %a, <1 x double> %v) { - ; CHECK: test_fmul_lane_ddD - ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp1 = extractelement <1 x double> %v, i32 0 - %tmp2 = fmul double %a, %tmp1; - ret double %tmp2; -} - - - -define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) { - ; CHECK: test_fmul_lane_dd2D - ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = fmul double %a, %tmp1; - ret double %tmp2; -} - - -define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) { - ; CHECK: test_fmul_lane_dd2D_swap - ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = fmul double %tmp1, %a; - ret double %tmp2; -} - -declare float @llvm.aarch64.neon.vmulx.f32(float, float) - -define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { - ; CHECK: test_fmulx_lane_f32 - ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] - %tmp1 = extractelement <2 x float> %v, i32 1 - %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) - ret float %tmp2; -} - -define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { - ; CHECK: test_fmulx_laneq_f32 - ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) - ret float %tmp2; -} - -define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) { - ; CHECK: test_fmulx_laneq_f32_swap - ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] - %tmp1 = extractelement <4 x float> %v, i32 3 - %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a) - ret float %tmp2; -} - -declare double @llvm.aarch64.neon.vmulx.f64(double, double) - -define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { - ; CHECK: test_fmulx_lane_f64 - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp1 = extractelement <1 x double> %v, i32 0 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) - ret double %tmp2; -} - -define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { - ; CHECK: test_fmulx_laneq_f64_0 - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] - %tmp1 = extractelement <2 x double> %v, i32 0 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) - ret double %tmp2; -} - - -define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { - ; CHECK: test_fmulx_laneq_f64_1 - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) - ret double %tmp2; -} - -define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) { - ; CHECK: test_fmulx_laneq_f64_1_swap - ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] - %tmp1 = extractelement <2 x double> %v, i32 1 - %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a) - ret double %tmp2; -} - diff --git a/test/CodeGen/AArch64/neon-scalar-compare.ll b/test/CodeGen/AArch64/neon-scalar-compare.ll deleted file mode 100644 index 2ecde91d7e..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-compare.ll +++ /dev/null @@ -1,344 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has (the non-trivial parts of) this test covered by vcmp.ll - -;; Scalar Integer Compare - -define i64 @test_vceqd(i64 %a, i64 %b) { -; CHECK: test_vceqd -; CHECK: cmeq {{d[0-9]+}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vceq.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vceq1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceq.i, <1 x i64> %vceq1.i) - %0 = extractelement <1 x i64> %vceq2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vceqzd(i64 %a) { -; CHECK: test_vceqzd -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vceqz.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vceqz1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64> %vceqz.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vceqz1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcged(i64 %a, i64 %b) { -; CHECK: test_vcged -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcge.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcge1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcgezd(i64 %a) { -; CHECK: test_vcgezd -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vcgez.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgez1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcgez.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vcgez1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcgtd(i64 %a, i64 %b) { -; CHECK: test_vcgtd -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcgt.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgt1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcgtzd(i64 %a) { -; CHECK: test_vcgtzd -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vcgtz.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgtz1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgtz.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vcgtz1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcled(i64 %a, i64 %b) { -; CHECK: test_vcled -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcgt.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcgt1.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64> %vcgt.i, <1 x i64> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vclezd(i64 %a) { -; CHECK: test_vclezd -; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vclez.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vclez1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64> %vclez.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vclez1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcltd(i64 %a, i64 %b) { -; CHECK: test_vcltd -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vcge.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vcge1.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64> %vcge.i, <1 x i64> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 - ret i64 %0 -} - -define i64 @test_vcltzd(i64 %a) { -; CHECK: test_vcltzd -; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0x0 -entry: - %vcltz.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vcltz1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64> %vcltz.i, <1 x i64> zeroinitializer) - %0 = extractelement <1 x i64> %vcltz1.i, i32 0 - ret i64 %0 -} - -define i64 @test_vtstd(i64 %a, i64 %b) { -; CHECK: test_vtstd -; CHECK: cmtst {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %vtst.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vtst1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vtst2.i = call <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64> %vtst.i, <1 x i64> %vtst1.i) - %0 = extractelement <1 x i64> %vtst2.i, i32 0 - ret i64 %0 -} - - -define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcage_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 - ret <1 x i64> %vcage2.i -} - -define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcagt_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) #2 - ret <1 x i64> %vcagt2.i -} - -define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcale_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcage2.i = tail call <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 - ret <1 x i64> %vcage2.i -} - -define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcalt_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %vcagt2.i = tail call <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) #2 - ret <1 x i64> %vcagt2.i -} - -define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vceq_s64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp eq <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vceq_u64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp eq <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vceq_f64 -; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp oeq <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcge_s64 -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp sge <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcge_u64 -; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp uge <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcge_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp oge <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcle_s64 -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp sle <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcle_u64 -; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp ule <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcle_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp ole <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcgt_s64 -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp sgt <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vcgt_u64 -; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp ugt <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vcgt_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp ogt <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vclt_s64 -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp slt <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { -; CHECK: test_vclt_u64 -; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = icmp ult <1 x i64> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { -; CHECK: test_vclt_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} - %cmp.i = fcmp olt <1 x double> %a, %b - %sext.i = sext <1 x i1> %cmp.i to <1 x i64> - ret <1 x i64> %sext.i -} - -define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 { -; CHECK: test_vceqz_s64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 { -; CHECK: test_vceqz_u64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 { -; CHECK: test_vceqz_p64 -; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp eq <1 x i64> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 { -; CHECK: test_vceqzq_p64 -; CHECK: cmeq {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0 - %1 = icmp eq <2 x i64> %a, zeroinitializer - %vceqz.i = sext <2 x i1> %1 to <2 x i64> - ret <2 x i64> %vceqz.i -} - -define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 { -; CHECK: test_vcgez_s64 -; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp sge <1 x i64> %a, zeroinitializer - %vcgez.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vcgez.i -} - -define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 { -; CHECK: test_vclez_s64 -; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp sle <1 x i64> %a, zeroinitializer - %vclez.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vclez.i -} - -define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 { -; CHECK: test_vcgtz_s64 -; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 - %1 = icmp sgt <1 x i64> %a, zeroinitializer - %vcgtz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vcgtz.i -} - -define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { -; CHECK: test_vcltz_s64 -; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0 - %1 = icmp slt <1 x i64> %a, zeroinitializer - %vcltz.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vcltz.i -} - -declare <1 x i64> @llvm.arm.neon.vacgt.v1i64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.arm.neon.vacge.v1i64.v1f64(<1 x double>, <1 x double>) -declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vchi.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index a505dafa3e..ab7ea661b4 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -1,10 +1,8 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 ; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 define float @test_dup_sv2S(<2 x float> %v) { ; CHECK-LABEL: test_dup_sv2S - ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] ; CHECK-ARM64: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 @@ -39,7 +37,6 @@ define double @test_dup_dvD(<1 x double> %v) { define double @test_dup_dv2D(<2 x double> %v) { ; CHECK-LABEL: test_dup_dv2D - ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 @@ -47,7 +44,6 @@ define double @test_dup_dv2D(<2 x double> %v) { define double @test_dup_dv2D_0(<2 x double> %v) { ; CHECK-LABEL: test_dup_dv2D_0 - ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-ARM64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] ; CHECK: ret %tmp1 = extractelement <2 x double> %v, i32 1 @@ -56,49 +52,42 @@ define double @test_dup_dv2D_0(<2 x double> %v) { define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { ; CHECK-LABEL: test_vector_dup_bv16B - ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14] %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> ret <1 x i8> %shuffle.i } define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) { ; CHECK-LABEL: test_vector_dup_bv8B - ; CHECK-AARCH64: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7] %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7> ret <1 x i8> %shuffle.i } define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) { ; CHECK-LABEL: test_vector_dup_hv8H - ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> ret <1 x i16> %shuffle.i } define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) { ; CHECK-LABEL: test_vector_dup_hv4H - ; CHECK-AARCH64: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3> ret <1 x i16> %shuffle.i } define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) { ; CHECK-LABEL: test_vector_dup_sv4S - ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3] %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> ret <1 x i32> %shuffle } define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { ; CHECK-LABEL: test_vector_dup_sv2S - ; CHECK-AARCH64: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1> ret <1 x i32> %shuffle } define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { ; CHECK-LABEL: test_vector_dup_dv2D - ; CHECK-AARCH64: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-ARM64: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8 %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1> ret <1 x i64> %shuffle.i diff --git a/test/CodeGen/AArch64/neon-scalar-cvt.ll b/test/CodeGen/AArch64/neon-scalar-cvt.ll deleted file mode 100644 index c19b0a765c..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-cvt.ll +++ /dev/null @@ -1,134 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 has a different approach to scalars. Discarding. - -define float @test_vcvts_f32_s32(i32 %a) { -; CHECK: test_vcvts_f32_s32 -; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32> %vcvtf.i) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32>) - -define double @test_vcvtd_f64_s64(i64 %a) { -; CHECK: test_vcvtd_f64_s64 -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64> %vcvtf.i) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64>) - -define float @test_vcvts_f32_u32(i32 %a) { -; CHECK: test_vcvts_f32_u32 -; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32> %vcvtf.i) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32>) - -define double @test_vcvtd_f64_u64(i64 %a) { -; CHECK: test_vcvtd_f64_u64 -; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64> %vcvtf.i) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64>) - -define float @test_vcvts_n_f32_s32(i32 %a) { -; CHECK: test_vcvts_n_f32_s32 -; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32>, i32) - -define double @test_vcvtd_n_f64_s64(i64 %a) { -; CHECK: test_vcvtd_n_f64_s64 -; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64>, i32) - -define float @test_vcvts_n_f32_u32(i32 %a) { -; CHECK: test_vcvts_n_f32_u32 -; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) - ret float %0 -} - -declare float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32>, i32) - -define double @test_vcvtd_n_f64_u64(i64 %a) { -; CHECK: test_vcvtd_n_f64_u64 -; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 -entry: - %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) - ret double %0 -} - -declare double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64>, i32) - -define i32 @test_vcvts_n_s32_f32(float %a) { -; CHECK: test_vcvts_n_s32_f32 -; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1 -entry: - %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float %a, i32 1) - %0 = extractelement <1 x i32> %fcvtzs1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float, i32) - -define i64 @test_vcvtd_n_s64_f64(double %a) { -; CHECK: test_vcvtd_n_s64_f64 -; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1 -entry: - %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double %a, i32 1) - %0 = extractelement <1 x i64> %fcvtzs1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double, i32) - -define i32 @test_vcvts_n_u32_f32(float %a) { -; CHECK: test_vcvts_n_u32_f32 -; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32 -entry: - %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float %a, i32 32) - %0 = extractelement <1 x i32> %fcvtzu1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float, i32) - -define i64 @test_vcvtd_n_u64_f64(double %a) { -; CHECK: test_vcvtd_n_u64_f64 -; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64 -entry: - %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double %a, i32 64) - %0 = extractelement <1 x i64> %fcvtzu1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double, i32) diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll deleted file mode 100644 index 502fcdacfc..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-ext.ll +++ /dev/null @@ -1,114 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 doesn't use <1 x iN> types, for N < 64. - -define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i32_v1i64: -; CHECK: ushll v0.2d, v0.2s, #0 - %1 = extractelement <2 x i32> %v, i32 0 - %2 = insertelement <1 x i32> undef, i32 %1, i32 0 - %3 = zext <1 x i32> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i32> @test_zext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i16_v1i32: -; CHECK: ushll v0.4s, v0.4h, #0 - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = zext <1 x i16> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i8_v1i16: -; CHECK: ushll v0.8h, v0.8b, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = zext <1 x i8> %2 to <1 x i16> - ret <1 x i16> %3 -} - -define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i8_v1i32: -; CHECK: dup b0, v0.b[0] - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = zext <1 x i8> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i64> @test_zext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i16_v1i64: -; CHECK: dup h0, v0.h[0] - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = zext <1 x i16> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i64> @test_zext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_zext_v1i8_v1i64: -; CHECK: dup b0, v0.b[0] - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = zext <1 x i8> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i64> @test_sext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i32_v1i64: -; CHECK: sshll v0.2d, v0.2s, #0 - %1 = extractelement <2 x i32> %v, i32 0 - %2 = insertelement <1 x i32> undef, i32 %1, i32 0 - %3 = sext <1 x i32> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i32> @test_sext_v1i16_v1i32(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i16_v1i32: -; CHECK: sshll v0.4s, v0.4h, #0 - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = sext <1 x i16> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i16> @test_sext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i8_v1i16: -; CHECK: sshll v0.8h, v0.8b, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = sext <1 x i8> %2 to <1 x i16> - ret <1 x i16> %3 -} - -define <1 x i32> @test_sext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i8_v1i32: -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK: sshll v0.4s, v0.4h, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = sext <1 x i8> %2 to <1 x i32> - ret <1 x i32> %3 -} - -define <1 x i64> @test_sext_v1i16_v1i64(<4 x i16> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i16_v1i64: -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK: sshll v0.2d, v0.2s, #0 - %1 = extractelement <4 x i16> %v, i32 0 - %2 = insertelement <1 x i16> undef, i16 %1, i32 0 - %3 = sext <1 x i16> %2 to <1 x i64> - ret <1 x i64> %3 -} - -define <1 x i64> @test_sext_v1i8_v1i64(<8 x i8> %v) nounwind readnone { -; CHECK-LABEL: test_sext_v1i8_v1i64: -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK: sshll v0.2d, v0.2s, #0 - %1 = extractelement <8 x i8> %v, i32 0 - %2 = insertelement <1 x i8> undef, i8 %1, i32 0 - %3 = sext <1 x i8> %2 to <1 x i64> - ret <1 x i64> %3 -} diff --git a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll b/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll deleted file mode 100644 index 2004226bd1..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-extract-narrow.ll +++ /dev/null @@ -1,105 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; intrinsic wrangling that arm64 does differently. - -define i8 @test_vqmovunh_s16(i16 %a) { -; CHECK: test_vqmovunh_s16 -; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}} -entry: - %vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i) - %0 = extractelement <1 x i8> %vqmovun1.i, i32 0 - ret i8 %0 -} - -define i16 @test_vqmovuns_s32(i32 %a) { -; CHECK: test_vqmovuns_s32 -; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}} -entry: - %vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i) - %0 = extractelement <1 x i16> %vqmovun1.i, i32 0 - ret i16 %0 -} - -define i32 @test_vqmovund_s64(i64 %a) { -; CHECK: test_vqmovund_s64 -; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}} -entry: - %vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i) - %0 = extractelement <1 x i32> %vqmovun1.i, i32 0 - ret i32 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>) - -define i8 @test_vqmovnh_s16(i16 %a) { -; CHECK: test_vqmovnh_s16 -; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i) - %0 = extractelement <1 x i8> %vqmovn1.i, i32 0 - ret i8 %0 -} - -define i16 @test_vqmovns_s32(i32 %a) { -; CHECK: test_vqmovns_s32 -; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i) - %0 = extractelement <1 x i16> %vqmovn1.i, i32 0 - ret i16 %0 -} - -define i32 @test_vqmovnd_s64(i64 %a) { -; CHECK: test_vqmovnd_s64 -; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i) - %0 = extractelement <1 x i32> %vqmovn1.i, i32 0 - ret i32 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>) - -define i8 @test_vqmovnh_u16(i16 %a) { -; CHECK: test_vqmovnh_u16 -; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i) - %0 = extractelement <1 x i8> %vqmovn1.i, i32 0 - ret i8 %0 -} - - -define i16 @test_vqmovns_u32(i32 %a) { -; CHECK: test_vqmovns_u32 -; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i) - %0 = extractelement <1 x i16> %vqmovn1.i, i32 0 - ret i16 %0 -} - -define i32 @test_vqmovnd_u64(i64 %a) { -; CHECK: test_vqmovnd_u64 -; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}} -entry: - %vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i) - %0 = extractelement <1 x i32> %vqmovn1.i, i32 0 - ret i32 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-fabd.ll b/test/CodeGen/AArch64/neon-scalar-fabd.ll deleted file mode 100644 index 9b2ae2bbc0..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-fabd.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has these two tests in vabs.ll - -define float @test_vabds_f32(float %a, float %b) { -; CHECK-LABEL: test_vabds_f32 -; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vabd.f32(float %a, float %a) - ret float %0 -} - -define double @test_vabdd_f64(double %a, double %b) { -; CHECK-LABEL: test_vabdd_f64 -; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vabd.f64(double %a, double %b) - ret double %0 -} - -declare double @llvm.aarch64.neon.vabd.f64(double, double) -declare float @llvm.aarch64.neon.vabd.f32(float, float) diff --git a/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/test/CodeGen/AArch64/neon-scalar-fcvt.ll deleted file mode 100644 index 341ed69b48..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-fcvt.ll +++ /dev/null @@ -1,234 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 duplicates these tests in cvt.ll - -;; Scalar Floating-point Convert - -define float @test_vcvtxn(double %a) { -; CHECK: test_vcvtxn -; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}} -entry: - %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a) - ret float %vcvtf -} - -declare float @llvm.aarch64.neon.fcvtxn(double) - -define i32 @test_vcvtass(float %a) { -; CHECK: test_vcvtass -; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtas1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float) - -define i64 @test_test_vcvtasd(double %a) { -; CHECK: test_test_vcvtasd -; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtas1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double) - -define i32 @test_vcvtaus(float %a) { -; CHECK: test_vcvtaus -; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtau1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float) - -define i64 @test_vcvtaud(double %a) { -; CHECK: test_vcvtaud -; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtau1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) - -define i32 @test_vcvtmss(float %a) { -; CHECK: test_vcvtmss -; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtms1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float) - -define i64 @test_vcvtmd_s64_f64(double %a) { -; CHECK: test_vcvtmd_s64_f64 -; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtms1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double) - -define i32 @test_vcvtmus(float %a) { -; CHECK: test_vcvtmus -; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float) - -define i64 @test_vcvtmud(double %a) { -; CHECK: test_vcvtmud -; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double) - -define i32 @test_vcvtnss(float %a) { -; CHECK: test_vcvtnss -; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtns1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float) - -define i64 @test_vcvtnd_s64_f64(double %a) { -; CHECK: test_vcvtnd_s64_f64 -; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtns1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double) - -define i32 @test_vcvtnus(float %a) { -; CHECK: test_vcvtnus -; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float) - -define i64 @test_vcvtnud(double %a) { -; CHECK: test_vcvtnud -; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double) - -define i32 @test_vcvtpss(float %a) { -; CHECK: test_vcvtpss -; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtps1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float) - -define i64 @test_vcvtpd_s64_f64(double %a) { -; CHECK: test_vcvtpd_s64_f64 -; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtps1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double) - -define i32 @test_vcvtpus(float %a) { -; CHECK: test_vcvtpus -; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float) - -define i64 @test_vcvtpud(double %a) { -; CHECK: test_vcvtpud -; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double) - -define i32 @test_vcvtss(float %a) { -; CHECK: test_vcvtss -; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float) - -define i64 @test_vcvtd_s64_f64(double %a) { -; CHECK: test_vcvtd_s64_f64 -; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvzs1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double) - -define i32 @test_vcvtus(float %a) { -; CHECK: test_vcvtus -; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}} -entry: - %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a) - %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float) - -define i64 @test_vcvtud(double %a) { -; CHECK: test_vcvtud -; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}} -entry: - %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a) - %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double) diff --git a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/test/CodeGen/AArch64/neon-scalar-fp-compare.ll deleted file mode 100644 index b17d8655c6..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-fp-compare.ll +++ /dev/null @@ -1,283 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; arm64 does not use intrinsics for comparisons. - -;; Scalar Floating-point Compare - -define i32 @test_vceqs_f32(float %a, float %b) { -; CHECK-LABEL: test_vceqs_f32 -; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fceq2.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fceq2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vceqd_f64(double %a, double %b) { -; CHECK-LABEL: test_vceqd_f64 -; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fceq2.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fceq2.i, i32 0 - ret i64 %0 -} - -define <1 x i64> @test_vceqz_f64(<1 x double> %a) { -; CHECK-LABEL: test_vceqz_f64 -; CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0 -entry: - %0 = fcmp oeq <1 x double> %a, zeroinitializer - %vceqz.i = sext <1 x i1> %0 to <1 x i64> - ret <1 x i64> %vceqz.i -} - -define i32 @test_vceqzs_f32(float %a) { -; CHECK-LABEL: test_vceqzs_f32 -; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fceq1.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fceq1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vceqzd_f64(double %a) { -; CHECK-LABEL: test_vceqzd_f64 -; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fceq1.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fceq1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcges_f32(float %a, float %b) { -; CHECK-LABEL: test_vcges_f32 -; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcge2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcged_f64(double %a, double %b) { -; CHECK-LABEL: test_vcged_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcge2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcgezs_f32(float %a) { -; CHECK-LABEL: test_vcgezs_f32 -; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fcge1.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fcge1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcgezd_f64(double %a) { -; CHECK-LABEL: test_vcgezd_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fcge1.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fcge1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcgts_f32(float %a, float %b) { -; CHECK-LABEL: test_vcgts_f32 -; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcgt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcgtd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcgtd_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcgt2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcgtzs_f32(float %a) { -; CHECK-LABEL: test_vcgtzs_f32 -; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fcgt1.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fcgt1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcgtzd_f64(double %a) { -; CHECK-LABEL: test_vcgtzd_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fcgt1.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fcgt1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcles_f32(float %a, float %b) { -; CHECK-LABEL: test_vcles_f32 -; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcge2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcled_f64(double %a, double %b) { -; CHECK-LABEL: test_vcled_f64 -; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcge2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vclezs_f32(float %a) { -; CHECK-LABEL: test_vclezs_f32 -; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fcle1.i = call <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fcle1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vclezd_f64(double %a) { -; CHECK-LABEL: test_vclezd_f64 -; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fcle1.i = call <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fcle1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vclts_f32(float %a, float %b) { -; CHECK-LABEL: test_vclts_f32 -; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcgt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcltd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcltd_f64 -; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcgt2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcltzs_f32(float %a) { -; CHECK-LABEL: test_vcltzs_f32 -; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0 -entry: - %fclt1.i = call <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float %a, float 0.0) - %0 = extractelement <1 x i32> %fclt1.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcltzd_f64(double %a) { -; CHECK-LABEL: test_vcltzd_f64 -; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0 -entry: - %fclt1.i = call <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double %a, float 0.0) - %0 = extractelement <1 x i64> %fclt1.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcages_f32(float %a, float %b) { -; CHECK-LABEL: test_vcages_f32 -; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcage2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcaged_f64(double %a, double %b) { -; CHECK-LABEL: test_vcaged_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcage2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcagts_f32(float %a, float %b) { -; CHECK-LABEL: test_vcagts_f32 -; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcagt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcagt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcagtd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcagtd_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcagt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcagt2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcales_f32(float %a, float %b) { -; CHECK-LABEL: test_vcales_f32 -; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcage2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcaled_f64(double %a, double %b) { -; CHECK-LABEL: test_vcaled_f64 -; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcage2.i, i32 0 - ret i64 %0 -} - -define i32 @test_vcalts_f32(float %a, float %b) { -; CHECK-LABEL: test_vcalts_f32 -; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} -entry: - %fcalt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) - %0 = extractelement <1 x i32> %fcalt2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vcaltd_f64(double %a, double %b) { -; CHECK-LABEL: test_vcaltd_f64 -; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} -entry: - %fcalt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) - %0 = extractelement <1 x i64> %fcalt2.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double, float) -declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double, float) -declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double, float) -declare <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double, float) -declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double, float) -declare <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double, double) -declare <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float, float) -declare <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double, double) diff --git a/test/CodeGen/AArch64/neon-scalar-mul.ll b/test/CodeGen/AArch64/neon-scalar-mul.ll deleted file mode 100644 index ac44c090b4..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-mul.ll +++ /dev/null @@ -1,144 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Just intrinsic wrangling, and arm64 does scalar differently anyway. - -define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) { -; CHECK: test_vqdmulhh_s16 -; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - %1 = insertelement <1 x i16> undef, i16 %a, i32 0 - %2 = insertelement <1 x i16> undef, i16 %b, i32 0 - %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2) - %4 = extractelement <1 x i16> %3, i32 0 - ret i16 %4 -} - -define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) { -; CHECK: test_vqdmulhs_s32 -; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x i32> undef, i32 %a, i32 0 - %2 = insertelement <1 x i32> undef, i32 %b, i32 0 - %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2) - %4 = extractelement <1 x i32> %3, i32 0 - ret i32 %4 -} - -declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>) - -define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) { -; CHECK: test_vqrdmulhh_s16 -; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - %1 = insertelement <1 x i16> undef, i16 %a, i32 0 - %2 = insertelement <1 x i16> undef, i16 %b, i32 0 - %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2) - %4 = extractelement <1 x i16> %3, i32 0 - ret i16 %4 -} - -define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) { -; CHECK: test_vqrdmulhs_s32 -; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x i32> undef, i32 %a, i32 0 - %2 = insertelement <1 x i32> undef, i32 %b, i32 0 - %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2) - %4 = extractelement <1 x i32> %3, i32 0 - ret i32 %4 -} - -declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>) - -define float @test_vmulxs_f32(float %a, float %b) { -; CHECK: test_vmulxs_f32 -; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %b) - ret float %1 -} - -define double @test_vmulxd_f64(double %a, double %b) { -; CHECK: test_vmulxd_f64 -; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %b) - ret double %1 -} - -declare float @llvm.aarch64.neon.vmulx.f32(float, float) -declare double @llvm.aarch64.neon.vmulx.f64(double, double) - -define i32 @test_vqdmlalh_s16(i32 %a, i16 %b, i16 %c) { -; CHECK: test_vqdmlalh_s16 -; CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqdmlal.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqdmlal1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmlal2.i = insertelement <1 x i16> undef, i16 %c, i32 0 - %vqdmlal3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32> %vqdmlal.i, <1 x i16> %vqdmlal1.i, <1 x i16> %vqdmlal2.i) - %0 = extractelement <1 x i32> %vqdmlal3.i, i32 0 - ret i32 %0 -} - -define i64 @test_vqdmlals_s32(i64 %a, i32 %b, i32 %c) { -; CHECK: test_vqdmlals_s32 -; CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqdmlal.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqdmlal1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmlal2.i = insertelement <1 x i32> undef, i32 %c, i32 0 - %vqdmlal3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64> %vqdmlal.i, <1 x i32> %vqdmlal1.i, <1 x i32> %vqdmlal2.i) - %0 = extractelement <1 x i64> %vqdmlal3.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqdmlal.v1i32(<1 x i32>, <1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.aarch64.neon.vqdmlal.v1i64(<1 x i64>, <1 x i32>, <1 x i32>) - -define i32 @test_vqdmlslh_s16(i32 %a, i16 %b, i16 %c) { -; CHECK: test_vqdmlslh_s16 -; CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqdmlsl.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqdmlsl1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmlsl2.i = insertelement <1 x i16> undef, i16 %c, i32 0 - %vqdmlsl3.i = call <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32> %vqdmlsl.i, <1 x i16> %vqdmlsl1.i, <1 x i16> %vqdmlsl2.i) - %0 = extractelement <1 x i32> %vqdmlsl3.i, i32 0 - ret i32 %0 -} - -define i64 @test_vqdmlsls_s32(i64 %a, i32 %b, i32 %c) { -; CHECK: test_vqdmlsls_s32 -; CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqdmlsl.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqdmlsl1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmlsl2.i = insertelement <1 x i32> undef, i32 %c, i32 0 - %vqdmlsl3.i = call <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64> %vqdmlsl.i, <1 x i32> %vqdmlsl1.i, <1 x i32> %vqdmlsl2.i) - %0 = extractelement <1 x i64> %vqdmlsl3.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqdmlsl.v1i32(<1 x i32>, <1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.aarch64.neon.vqdmlsl.v1i64(<1 x i64>, <1 x i32>, <1 x i32>) - -define i32 @test_vqdmullh_s16(i16 %a, i16 %b) { -; CHECK: test_vqdmullh_s16 -; CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqdmull.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqdmull1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vqdmull2.i = call <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16> %vqdmull.i, <1 x i16> %vqdmull1.i) - %0 = extractelement <1 x i32> %vqdmull2.i, i32 0 - ret i32 %0 -} - -define i64 @test_vqdmulls_s32(i32 %a, i32 %b) { -; CHECK: test_vqdmulls_s32 -; CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqdmull.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqdmull1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vqdmull2.i = call <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32> %vqdmull.i, <1 x i32> %vqdmull1.i) - %0 = extractelement <1 x i64> %vqdmull2.i, i32 0 - ret i64 %0 -} - -declare <1 x i32> @llvm.arm.neon.vqdmull.v1i32(<1 x i16>, <1 x i16>) -declare <1 x i64> @llvm.arm.neon.vqdmull.v1i64(<1 x i32>, <1 x i32>) diff --git a/test/CodeGen/AArch64/neon-scalar-neg.ll b/test/CodeGen/AArch64/neon-scalar-neg.ll deleted file mode 100644 index 6eb0a1a152..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-neg.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Intrinsic wrangling. arm64 does it differently. - -define i64 @test_vnegd_s64(i64 %a) { -; CHECK: test_vnegd_s64 -; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vneg.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vneg1.i = tail call <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64> %vneg.i) - %0 = extractelement <1 x i64> %vneg1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vneg(<1 x i64>) - -define i8 @test_vqnegb_s8(i8 %a) { -; CHECK: test_vqnegb_s8 -; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i) - %0 = extractelement <1 x i8> %vqneg1.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>) - -define i16 @test_vqnegh_s16(i16 %a) { -; CHECK: test_vqnegh_s16 -; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i) - %0 = extractelement <1 x i16> %vqneg1.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>) - -define i32 @test_vqnegs_s32(i32 %a) { -; CHECK: test_vqnegs_s32 -; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i) - %0 = extractelement <1 x i32> %vqneg1.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>) - -define i64 @test_vqnegd_s64(i64 %a) { -; CHECK: test_vqnegd_s64 -; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i) - %0 = extractelement <1 x i64> %vqneg1.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll deleted file mode 100644 index 4b1ca6e91c..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-recip.ll +++ /dev/null @@ -1,93 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; duplicates arm64 tests in vsqrt.ll - -define float @test_vrecpss_f32(float %a, float %b) { -; CHECK: test_vrecpss_f32 -; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b) - ret float %1 -} - -define double @test_vrecpsd_f64(double %a, double %b) { -; CHECK: test_vrecpsd_f64 -; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b) - ret double %1 -} - -declare float @llvm.aarch64.neon.vrecps.f32(float, float) -declare double @llvm.aarch64.neon.vrecps.f64(double, double) - -define float @test_vrsqrtss_f32(float %a, float %b) { -; CHECK: test_vrsqrtss_f32 -; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b) - ret float %1 -} - -define double @test_vrsqrtsd_f64(double %a, double %b) { -; CHECK: test_vrsqrtsd_f64 -; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b) - ret double %1 -} - -declare float @llvm.aarch64.neon.vrsqrts.f32(float, float) -declare double @llvm.aarch64.neon.vrsqrts.f64(double, double) - -define float @test_vrecpes_f32(float %a) { -; CHECK: test_vrecpes_f32 -; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a) - ret float %0 -} - -define double @test_vrecped_f64(double %a) { -; CHECK: test_vrecped_f64 -; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a) - ret double %0 -} - -declare float @llvm.aarch64.neon.vrecpe.f32(float) -declare double @llvm.aarch64.neon.vrecpe.f64(double) - -define float @test_vrecpxs_f32(float %a) { -; CHECK: test_vrecpxs_f32 -; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a) - ret float %0 -} - -define double @test_vrecpxd_f64(double %a) { -; CHECK: test_vrecpxd_f64 -; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a) - ret double %0 -} - -declare float @llvm.aarch64.neon.vrecpx.f32(float) -declare double @llvm.aarch64.neon.vrecpx.f64(double) - -define float @test_vrsqrtes_f32(float %a) { -; CHECK: test_vrsqrtes_f32 -; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}} -entry: - %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a) - ret float %0 -} - -define double @test_vrsqrted_f64(double %a) { -; CHECK: test_vrsqrted_f64 -; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a) - ret double %0 -} - -declare float @llvm.aarch64.neon.vrsqrte.f32(float) -declare double @llvm.aarch64.neon.vrsqrte.f64(double) diff --git a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll deleted file mode 100644 index 2b94d7524e..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll +++ /dev/null @@ -1,216 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling. Duplicates various arm64 tests. - -declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) - -define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { -; CHECK: test_addp_v1i64: -; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) - ret <1 x i64> %val -} - -declare float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float>) - -define float @test_faddp_f32(<2 x float> %a) { -; CHECK: test_faddp_f32: -; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double>) - -define double @test_faddp_f64(<2 x double> %a) { -; CHECK: test_faddp_f64: -; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) - ret double %val -} - - -declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) - -define float @test_fmaxp_f32(<2 x float> %a) { -; CHECK: test_fmaxp_f32: -; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double>) - -define double @test_fmaxp_f64(<2 x double> %a) { -; CHECK: test_fmaxp_f64: -; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) - ret double %val -} - -declare float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float>) - -define float @test_fminp_f32(<2 x float> %a) { -; CHECK: test_fminp_f32: -; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double>) - -define double @test_fminp_f64(<2 x double> %a) { -; CHECK: test_fminp_f64: -; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) - ret double %val -} - -declare float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float>) - -define float @test_fmaxnmp_f32(<2 x float> %a) { -; CHECK: test_fmaxnmp_f32: -; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double>) - -define double @test_fmaxnmp_f64(<2 x double> %a) { -; CHECK: test_fmaxnmp_f64: -; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) - ret double %val -} - -declare float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float>) - -define float @test_fminnmp_f32(<2 x float> %a) { -; CHECK: test_fminnmp_f32: -; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %val = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) - ret float %val -} - -declare double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double>) - -define double @test_fminnmp_f64(<2 x double> %a) { -; CHECK: test_fminnmp_f64: -; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %val = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) - ret double %val -} - -define float @test_vaddv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vaddv_f32 -; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define float @test_vaddvq_f32(<4 x float> %a) { -; CHECK-LABEL: test_vaddvq_f32 -; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float> %a) - ret float %1 -} - -define double @test_vaddvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vaddvq_f64 -; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vmaxv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vmaxv_f32 -; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define double @test_vmaxvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vmaxvq_f64 -; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vminv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vminv_f32 -; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define double @test_vminvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vminvq_f64 -; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define double @test_vmaxnmvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vmaxnmvq_f64 -; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vmaxnmv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vmaxnmv_f32 -; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define double @test_vminnmvq_f64(<2 x double> %a) { -; CHECK-LABEL: test_vminnmvq_f64 -; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) - ret double %1 -} - -define float @test_vminnmv_f32(<2 x float> %a) { -; CHECK-LABEL: test_vminnmv_f32 -; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s - %1 = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) - ret float %1 -} - -define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vpaddq_s64 -; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) - ret <2 x i64> %1 -} - -define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vpaddq_u64 -; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) - ret <2 x i64> %1 -} - -define i64 @test_vaddvq_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vaddvq_s64 -; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -define i64 @test_vaddvq_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vaddvq_u64 -; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d - %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) - %2 = extractelement <1 x i64> %1, i32 0 - ret i64 %2 -} - -declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>) - -declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>) - -declare float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float>) diff --git a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll deleted file mode 100644 index ae097afb3a..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Duplicates arm64'd vshift.ll - -declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_urshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_srshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_urshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_srshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - - diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll deleted file mode 100644 index ea5f8f9286..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ /dev/null @@ -1,243 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling and arm64 does it differently. - -declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqadd_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqadd_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqsub_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqsub_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqadd_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqadd_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqsub_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqsub_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqadd_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqadd_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqsub_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - - -define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqsub_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqadd_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqadd_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqsub_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqsub_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define i8 @test_vuqaddb_s8(i8 %a, i8 %b) { -; CHECK: test_vuqaddb_s8 -; CHECK: suqadd {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vuqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0 - %vuqadd2.i = call <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8> %vuqadd.i, <1 x i8> %vuqadd1.i) - %0 = extractelement <1 x i8> %vuqadd2.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8>, <1 x i8>) - -define i16 @test_vuqaddh_s16(i16 %a, i16 %b) { -; CHECK: test_vuqaddh_s16 -; CHECK: suqadd {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vuqadd2.i = call <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16> %vuqadd.i, <1 x i16> %vuqadd1.i) - %0 = extractelement <1 x i16> %vuqadd2.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) - -define i32 @test_vuqadds_s32(i32 %a, i32 %b) { -; CHECK: test_vuqadds_s32 -; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vuqadd2.i = call <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32> %vuqadd.i, <1 x i32> %vuqadd1.i) - %0 = extractelement <1 x i32> %vuqadd2.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32>, <1 x i32>) - -define i64 @test_vuqaddd_s64(i64 %a, i64 %b) { -; CHECK: test_vuqaddd_s64 -; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vuqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vuqadd2.i = call <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64> %vuqadd.i, <1 x i64> %vuqadd1.i) - %0 = extractelement <1 x i64> %vuqadd2.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64>, <1 x i64>) - -define i8 @test_vsqaddb_u8(i8 %a, i8 %b) { -; CHECK: test_vsqaddb_u8 -; CHECK: usqadd {{b[0-9]+}}, {{b[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqadd1.i = insertelement <1 x i8> undef, i8 %b, i32 0 - %vsqadd2.i = call <1 x i8> @llvm.aarch64.neon.vsqadd.v1i8(<1 x i8> %vsqadd.i, <1 x i8> %vsqadd1.i) - %0 = extractelement <1 x i8> %vsqadd2.i, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vuqadd.v1i8(<1 x i8>, <1 x i8>) - -define i16 @test_vsqaddh_u16(i16 %a, i16 %b) { -; CHECK: test_vsqaddh_u16 -; CHECK: usqadd {{h[0-9]+}}, {{h[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqadd1.i = insertelement <1 x i16> undef, i16 %b, i32 0 - %vsqadd2.i = call <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16> %vsqadd.i, <1 x i16> %vsqadd1.i) - %0 = extractelement <1 x i16> %vsqadd2.i, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vuqadd.v1i16(<1 x i16>, <1 x i16>) - -define i32 @test_vsqadds_u32(i32 %a, i32 %b) { -; CHECK: test_vsqadds_u32 -; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqadd1.i = insertelement <1 x i32> undef, i32 %b, i32 0 - %vsqadd2.i = call <1 x i32> @llvm.aarch64.neon.vsqadd.v1i32(<1 x i32> %vsqadd.i, <1 x i32> %vsqadd1.i) - %0 = extractelement <1 x i32> %vsqadd2.i, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vuqadd.v1i32(<1 x i32>, <1 x i32>) - -define i64 @test_vsqaddd_u64(i64 %a, i64 %b) { -; CHECK: test_vsqaddd_u64 -; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} -entry: - %vsqadd.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqadd1.i = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsqadd2.i = call <1 x i64> @llvm.aarch64.neon.vsqadd.v1i64(<1 x i64> %vsqadd.i, <1 x i64> %vsqadd1.i) - %0 = extractelement <1 x i64> %vsqadd2.i, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vuqadd.v1i64(<1 x i64>, <1 x i64>) diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll deleted file mode 100644 index e78c55bfe1..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll +++ /dev/null @@ -1,95 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling and arm64 does it differently. - -declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqrshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqrshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqrshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqrshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqrshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqrshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - - ret <1 x i32> %tmp1 -} - -define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqrshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqrshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqrshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - - diff --git a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll deleted file mode 100644 index b7f956cf61..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll +++ /dev/null @@ -1,89 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; Intrinsic wrangling and arm64 does it differently. - -declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>) -declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>) - -define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_uqshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { -; CHECK: test_sqshl_v1i8_aarch64: - %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} - ret <1 x i8> %tmp1 -} - -declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>) -declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>) - -define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_uqshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { -; CHECK: test_sqshl_v1i16_aarch64: - %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} - ret <1 x i16> %tmp1 -} - -declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>) -declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>) - -define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_uqshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { -; CHECK: test_sqshl_v1i32_aarch64: - %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret <1 x i32> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_uqshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sqshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - - diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll deleted file mode 100644 index a2bdae5f52..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll +++ /dev/null @@ -1,532 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Intrinsic wrangling & arm64 does it differently. - -define i64 @test_vshrd_n_s64(i64 %a) { -; CHECK: test_vshrd_n_s64 -; CHECK: sshr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsshr1 = call <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64> %vsshr, i32 63) - %0 = extractelement <1 x i64> %vsshr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vshrds.n(<1 x i64>, i32) - -define i64 @test_vshrd_n_u64(i64 %a) { -; CHECK: test_vshrd_n_u64 -; CHECK: ushr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vushr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vushr1 = call <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64> %vushr, i32 63) - %0 = extractelement <1 x i64> %vushr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vshrdu.n(<1 x i64>, i32) - -define i64 @test_vrshrd_n_s64(i64 %a) { -; CHECK: test_vrshrd_n_s64 -; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63) - %0 = extractelement <1 x i64> %vsrshr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32) - -define i64 @test_vrshrd_n_u64(i64 %a) { -; CHECK: test_vrshrd_n_u64 -; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63) - %0 = extractelement <1 x i64> %vurshr1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32) - -define i64 @test_vsrad_n_s64(i64 %a, i64 %b) { -; CHECK: test_vsrad_n_s64 -; CHECK: ssra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vssra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vssra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vssra2 = call <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64> %vssra, <1 x i64> %vssra1, i32 63) - %0 = extractelement <1 x i64> %vssra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsrads.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vsrad_n_u64(i64 %a, i64 %b) { -; CHECK: test_vsrad_n_u64 -; CHECK: usra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vusra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vusra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vusra2 = call <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64> %vusra, <1 x i64> %vusra1, i32 63) - %0 = extractelement <1 x i64> %vusra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsradu.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vrsrad_n_s64(i64 %a, i64 %b) { -; CHECK: test_vrsrad_n_s64 -; CHECK: srsra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsrsra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsrsra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsrsra2 = call <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64> %vsrsra, <1 x i64> %vsrsra1, i32 63) - %0 = extractelement <1 x i64> %vsrsra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vrsrads.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vrsrad_n_u64(i64 %a, i64 %b) { -; CHECK: test_vrsrad_n_u64 -; CHECK: ursra {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vursra = insertelement <1 x i64> undef, i64 %a, i32 0 - %vursra1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vursra2 = call <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64> %vursra, <1 x i64> %vursra1, i32 63) - %0 = extractelement <1 x i64> %vursra2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vrsradu.n(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vshld_n_s64(i64 %a) { -; CHECK: test_vshld_n_s64 -; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63) - %0 = extractelement <1 x i64> %vshl1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64>, i32) - -define i64 @test_vshld_n_u64(i64 %a) { -; CHECK: test_vshld_n_u64 -; CHECK: shl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vshl1 = call <1 x i64> @llvm.aarch64.neon.vshld.n(<1 x i64> %vshl, i32 63) - %0 = extractelement <1 x i64> %vshl1, i32 0 - ret i64 %0 -} - -define i8 @test_vqshlb_n_s8(i8 %a) { -; CHECK: test_vqshlb_n_s8 -; CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, #7 -entry: - %vsqshl = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8> %vsqshl, i32 7) - %0 = extractelement <1 x i8> %vsqshl1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vqshls.n.v1i8(<1 x i8>, i32) - -define i16 @test_vqshlh_n_s16(i16 %a) { -; CHECK: test_vqshlh_n_s16 -; CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, #15 -entry: - %vsqshl = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16> %vsqshl, i32 15) - %0 = extractelement <1 x i16> %vsqshl1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vqshls.n.v1i16(<1 x i16>, i32) - -define i32 @test_vqshls_n_s32(i32 %a) { -; CHECK: test_vqshls_n_s32 -; CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, #31 -entry: - %vsqshl = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32> %vsqshl, i32 31) - %0 = extractelement <1 x i32> %vsqshl1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqshls.n.v1i32(<1 x i32>, i32) - -define i64 @test_vqshld_n_s64(i64 %a) { -; CHECK: test_vqshld_n_s64 -; CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsqshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64> %vsqshl, i32 63) - %0 = extractelement <1 x i64> %vsqshl1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vqshls.n.v1i64(<1 x i64>, i32) - -define i8 @test_vqshlb_n_u8(i8 %a) { -; CHECK: test_vqshlb_n_u8 -; CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, #7 -entry: - %vuqshl = insertelement <1 x i8> undef, i8 %a, i32 0 - %vuqshl1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8> %vuqshl, i32 7) - %0 = extractelement <1 x i8> %vuqshl1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vqshlu.n.v1i8(<1 x i8>, i32) - -define i16 @test_vqshlh_n_u16(i16 %a) { -; CHECK: test_vqshlh_n_u16 -; CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, #15 -entry: - %vuqshl = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqshl1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16> %vuqshl, i32 15) - %0 = extractelement <1 x i16> %vuqshl1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vqshlu.n.v1i16(<1 x i16>, i32) - -define i32 @test_vqshls_n_u32(i32 %a) { -; CHECK: test_vqshls_n_u32 -; CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, #31 -entry: - %vuqshl = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqshl1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32> %vuqshl, i32 31) - %0 = extractelement <1 x i32> %vuqshl1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vqshlu.n.v1i32(<1 x i32>, i32) - -define i64 @test_vqshld_n_u64(i64 %a) { -; CHECK: test_vqshld_n_u64 -; CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vuqshl = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqshl1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64> %vuqshl, i32 63) - %0 = extractelement <1 x i64> %vuqshl1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vqshlu.n.v1i64(<1 x i64>, i32) - -define i8 @test_vqshlub_n_s8(i8 %a) { -; CHECK: test_vqshlub_n_s8 -; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7 -entry: - %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7) - %0 = extractelement <1 x i8> %vsqshlu1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32) - -define i16 @test_vqshluh_n_s16(i16 %a) { -; CHECK: test_vqshluh_n_s16 -; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15 -entry: - %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15) - %0 = extractelement <1 x i16> %vsqshlu1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32) - -define i32 @test_vqshlus_n_s32(i32 %a) { -; CHECK: test_vqshlus_n_s32 -; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31 -entry: - %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31) - %0 = extractelement <1 x i32> %vsqshlu1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32) - -define i64 @test_vqshlud_n_s64(i64 %a) { -; CHECK: test_vqshlud_n_s64 -; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63) - %0 = extractelement <1 x i64> %vsqshlu1, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32) - -define i64 @test_vsrid_n_s64(i64 %a, i64 %b) { -; CHECK: test_vsrid_n_s64 -; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) - %0 = extractelement <1 x i64> %vsri2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vsrid_n_u64(i64 %a, i64 %b) { -; CHECK: test_vsrid_n_u64 -; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) - %0 = extractelement <1 x i64> %vsri2, i32 0 - ret i64 %0 -} - -define i64 @test_vslid_n_s64(i64 %a, i64 %b) { -; CHECK: test_vslid_n_s64 -; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) - %0 = extractelement <1 x i64> %vsli2, i32 0 - ret i64 %0 -} - -declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) - -define i64 @test_vslid_n_u64(i64 %a, i64 %b) { -; CHECK: test_vslid_n_u64 -; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 -entry: - %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) - %0 = extractelement <1 x i64> %vsli2, i32 0 - ret i64 %0 -} - -define i8 @test_vqshrnh_n_s16(i16 %a) { -; CHECK: test_vqshrnh_n_s16 -; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8) - %0 = extractelement <1 x i8> %vsqshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqshrns_n_s32(i32 %a) { -; CHECK: test_vqshrns_n_s32 -; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16) - %0 = extractelement <1 x i16> %vsqshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqshrnd_n_s64(i64 %a) { -; CHECK: test_vqshrnd_n_s64 -; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32) - %0 = extractelement <1 x i32> %vsqshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqshrnh_n_u16(i16 %a) { -; CHECK: test_vqshrnh_n_u16 -; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8) - %0 = extractelement <1 x i8> %vuqshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqshrns_n_u32(i32 %a) { -; CHECK: test_vqshrns_n_u32 -; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16) - %0 = extractelement <1 x i16> %vuqshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqshrnd_n_u64(i64 %a) { -; CHECK: test_vqshrnd_n_u64 -; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32) - %0 = extractelement <1 x i32> %vuqshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqrshrnh_n_s16(i16 %a) { -; CHECK: test_vqrshrnh_n_s16 -; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8) - %0 = extractelement <1 x i8> %vsqrshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqrshrns_n_s32(i32 %a) { -; CHECK: test_vqrshrns_n_s32 -; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16) - %0 = extractelement <1 x i16> %vsqrshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqrshrnd_n_s64(i64 %a) { -; CHECK: test_vqrshrnd_n_s64 -; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32) - %0 = extractelement <1 x i32> %vsqrshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqrshrnh_n_u16(i16 %a) { -; CHECK: test_vqrshrnh_n_u16 -; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0 - %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8) - %0 = extractelement <1 x i8> %vuqrshrn1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32) - -define i16 @test_vqrshrns_n_u32(i32 %a) { -; CHECK: test_vqrshrns_n_u32 -; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0 - %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16) - %0 = extractelement <1 x i16> %vuqrshrn1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32) - -define i32 @test_vqrshrnd_n_u64(i64 %a) { -; CHECK: test_vqrshrnd_n_u64 -; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0 - %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32) - %0 = extractelement <1 x i32> %vuqrshrn1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32) - -define i8 @test_vqshrunh_n_s16(i16 %a) { -; CHECK: test_vqshrunh_n_s16 -; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8) - %0 = extractelement <1 x i8> %vsqshrun1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32) - -define i16 @test_vqshruns_n_s32(i32 %a) { -; CHECK: test_vqshruns_n_s32 -; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16) - %0 = extractelement <1 x i16> %vsqshrun1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32) - -define i32 @test_vqshrund_n_s64(i64 %a) { -; CHECK: test_vqshrund_n_s64 -; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32) - %0 = extractelement <1 x i32> %vsqshrun1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32) - -define i8 @test_vqrshrunh_n_s16(i16 %a) { -; CHECK: test_vqrshrunh_n_s16 -; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8 -entry: - %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8) - %0 = extractelement <1 x i8> %vsqrshrun1, i32 0 - ret i8 %0 -} - -declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32) - -define i16 @test_vqrshruns_n_s32(i32 %a) { -; CHECK: test_vqrshruns_n_s32 -; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16 -entry: - %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16) - %0 = extractelement <1 x i16> %vsqrshrun1, i32 0 - ret i16 %0 -} - -declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32) - -define i32 @test_vqrshrund_n_s64(i64 %a) { -; CHECK: test_vqrshrund_n_s64 -; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32 -entry: - %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32) - %0 = extractelement <1 x i32> %vsqrshrun1, i32 0 - ret i32 %0 -} - -declare <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64>, i32) diff --git a/test/CodeGen/AArch64/neon-scalar-shift.ll b/test/CodeGen/AArch64/neon-scalar-shift.ll deleted file mode 100644 index cf3fc0c486..0000000000 --- a/test/CodeGen/AArch64/neon-scalar-shift.ll +++ /dev/null @@ -1,237 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Duplicates existing arm64 tests in vshift.ll and vcmp.ll - -declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_ushl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sshl_v1i64: - %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>) -declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>) - -define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_ushl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { -; CHECK: test_sshl_v1i64_aarch64: - %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - ret <1 x i64> %tmp1 -} - -define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: test_vtst_s64 -; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = and <1 x i64> %a, %b - %1 = icmp ne <1 x i64> %0, zeroinitializer - %vtst.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vtst.i -} - -define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: test_vtst_u64 -; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -entry: - %0 = and <1 x i64> %a, %b - %1 = icmp ne <1 x i64> %0, zeroinitializer - %vtst.i = sext <1 x i1> %1 to <1 x i64> - ret <1 x i64> %vtst.i -} - -define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: test_vsli_n_p64 -; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0 -entry: - %vsli_n2 = tail call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %a, <1 x i64> %b, i32 0) - ret <1 x i64> %vsli_n2 -} - -declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) - -define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vsliq_n_p64 -; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 -entry: - %vsli_n2 = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 0) - ret <2 x i64> %vsli_n2 -} - -declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) - -define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vrsqrte_u32 -; CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s -entry: - %vrsqrte1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) - ret <2 x i32> %vrsqrte1.i -} - -define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vrsqrteq_u32 -; CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s -entry: - %vrsqrte1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) - ret <4 x i32> %vrsqrte1.i -} - -define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { -; CHECK-LABEL: test_vqshl_n_s8 -; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 -entry: - %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) - ret <8 x i8> %vqshl_n -} - -declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) - -define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vqshlq_n_s8 -; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 -entry: - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) - ret <16 x i8> %vqshl_n -} - -declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) - -define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { -; CHECK-LABEL: test_vqshl_n_s16 -; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 -entry: - %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) - ret <4 x i16> %vqshl_n1 -} - -declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) - -define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vqshlq_n_s16 -; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 -entry: - %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) - ret <8 x i16> %vqshl_n1 -} - -declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) - -define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { -; CHECK-LABEL: test_vqshl_n_s32 -; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 -entry: - %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) - ret <2 x i32> %vqshl_n1 -} - -declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) - -define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vqshlq_n_s32 -; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 -entry: - %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) - ret <4 x i32> %vqshl_n1 -} - -declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) - -define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vqshlq_n_s64 -; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 -entry: - %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) - ret <2 x i64> %vqshl_n1 -} - -declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) - -define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { -; CHECK-LABEL: test_vqshl_n_u8 -; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 -entry: - %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) - ret <8 x i8> %vqshl_n -} - -declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) - -define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vqshlq_n_u8 -; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 -entry: - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) - ret <16 x i8> %vqshl_n -} - -declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) - -define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { -; CHECK-LABEL: test_vqshl_n_u16 -; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 -entry: - %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) - ret <4 x i16> %vqshl_n1 -} - -declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) - -define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vqshlq_n_u16 -; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 -entry: - %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) - ret <8 x i16> %vqshl_n1 -} - -declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) - -define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { -; CHECK-LABEL: test_vqshl_n_u32 -; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 -entry: - %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) - ret <2 x i32> %vqshl_n1 -} - -declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) - -define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vqshlq_n_u32 -; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 -entry: - %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) - ret <4 x i32> %vqshl_n1 -} - -declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) - -define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vqshlq_n_u64 -; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, -entry: - %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) - ret <2 x i64> %vqshl_n1 -} - -declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) - -declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) - -declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) diff --git a/test/CodeGen/AArch64/neon-select_cc.ll b/test/CodeGen/AArch64/neon-select_cc.ll deleted file mode 100644 index 57a819671b..0000000000 --- a/test/CodeGen/AArch64/neon-select_cc.ll +++ /dev/null @@ -1,202 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has separate copy of this test due to different codegen. -define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { -; CHECK-LABEL: test_select_cc_v8i8_i8: -; CHECK: and w0, w0, #0xff -; CHECK-NEXT: cmp w0, w1, uxtb -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.8b, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i8 %a, %b - %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d - ret <8x i8> %e -} - -define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { -; CHECK-LABEL: test_select_cc_v8i8_f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d - ret <8x i8> %e -} - -define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { -; CHECK-LABEL: test_select_cc_v8i8_f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d - ret <8x i8> %e -} - -define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { -; CHECK-LABEL: test_select_cc_v16i8_i8: -; CHECK: and w0, w0, #0xff -; CHECK-NEXT: cmp w0, w1, uxtb -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.16b, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i8 %a, %b - %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d - ret <16x i8> %e -} - -define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { -; CHECK-LABEL: test_select_cc_v16i8_f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d - ret <16x i8> %e -} - -define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { -; CHECK-LABEL: test_select_cc_v16i8_f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d - ret <16x i8> %e -} - -define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { -; CHECK-LABEL: test_select_cc_v4i16: -; CHECK: and w0, w0, #0xffff -; CHECK-NEXT: cmp w0, w1, uxth -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.4h, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i16 %a, %b - %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d - ret <4x i16> %e -} - -define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { -; CHECK-LABEL: test_select_cc_v8i16: -; CHECK: and w0, w0, #0xffff -; CHECK-NEXT: cmp w0, w1, uxth -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.8h, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i16 %a, %b - %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d - ret <8x i16> %e -} - -define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { -; CHECK-LABEL: test_select_cc_v2i32: -; CHECK: cmp w0, w1, uxtw -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.2s, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i32 %a, %b - %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d - ret <2x i32> %e -} - -define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { -; CHECK-LABEL: test_select_cc_v4i32: -; CHECK: cmp w0, w1, uxtw -; CHECK-NEXT: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i32 %a, %b - %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d - ret <4x i32> %e -} - -define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { -; CHECK-LABEL: test_select_cc_v1i64: -; CHECK: cmp x0, x1 -; CHECK-NEXT: csetm x0, eq -; CHECK-NEXT: fmov d{{[0-9]+}}, x0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i64 %a, %b - %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d - ret <1x i64> %e -} - -define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { -; CHECK-LABEL: test_select_cc_v2i64: -; CHECK: cmp x0, x1 -; CHECK-NEXT: csetm x0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.2d, x0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i64 %a, %b - %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d - ret <2x i64> %e -} - -define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { -; CHECK-LABEL: test_select_cc_v1f32: -; CHECK: fcmp s0, s1 -; CHECK-NEXT: fcsel s0, s2, s3, eq - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d - ret <1 x float> %e -} - -define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { -; CHECK-LABEL: test_select_cc_v2f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d - ret <2 x float> %e -} - -define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { -; CHECK-LABEL: test_select_cc_v4f32: -; CHECK: fcmeq v{{[0-9]+}}.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq float %a, %b - %e = select i1 %cmp31, <4x float> %c, <4x float> %d - ret <4x float> %e -} - -define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { -; CHECK-LABEL: test_select_cc_v4f32_icmp: -; CHECK: cmp w0, w1, uxtw -; CHECK: csetm w0, eq -; CHECK-NEXT: dup v{{[0-9]+}}.4s, w0 -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v0.16b, v1.16b - %cmp31 = icmp eq i32 %a, %b - %e = select i1 %cmp31, <4x float> %c, <4x float> %d - ret <4x float> %e -} - -define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { -; CHECK-LABEL: test_select_cc_v1f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v2.8b, v3.8b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d - ret <1 x double> %e -} - -define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { -; CHECK-LABEL: test_select_cc_v1f64_icmp: -; CHECK: cmp x0, x1 -; CHECK-NEXT: csetm x0, eq -; CHECK-NEXT: fmov d{{[0-9]+}}, x0 -; CHECK-NEXT: bsl v{{[0-9]+}}.8b, v0.8b, v1.8b - %cmp31 = icmp eq i64 %a, %b - %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d - ret <1 x double> %e -} - -define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { -; CHECK-LABEL: test_select_cc_v2f64: -; CHECK: fcmeq v{{[0-9]+}}.2d, v0.2d, v1.2d -; CHECK-NEXT: dup v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0] -; CHECK-NEXT: bsl v{{[0-9]+}}.16b, v2.16b, v3.16b - %cmp31 = fcmp oeq double %a, %b - %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d - ret <2 x double> %e -} diff --git a/test/CodeGen/AArch64/neon-shift-left-long.ll b/test/CodeGen/AArch64/neon-shift-left-long.ll index d16b131559..1d9c92c999 100644 --- a/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) { diff --git a/test/CodeGen/AArch64/neon-shift.ll b/test/CodeGen/AArch64/neon-shift.ll deleted file mode 100644 index 088200d972..0000000000 --- a/test/CodeGen/AArch64/neon-shift.ll +++ /dev/null @@ -1,172 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 already has these tests: pure intrinsics & trivial shifts. - -declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) -declare <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>) - -define <8 x i8> @test_uqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_uqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: ushl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -define <8 x i8> @test_sqshl_v8i8(<8 x i8> %lhs, <8 x i8> %rhs) { -; CHECK: test_sqshl_v8i8: - %tmp1 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) -; CHECK: sshl v0.8b, v0.8b, v1.8b - ret <8 x i8> %tmp1 -} - -declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) -declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>) - -define <16 x i8> @test_ushl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_ushl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: ushl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -define <16 x i8> @test_sshl_v16i8(<16 x i8> %lhs, <16 x i8> %rhs) { -; CHECK: test_sshl_v16i8: - %tmp1 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) -; CHECK: sshl v0.16b, v0.16b, v1.16b - ret <16 x i8> %tmp1 -} - -declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>) -declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>) - -define <4 x i16> @test_ushl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_ushl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: ushl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -define <4 x i16> @test_sshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { -; CHECK: test_sshl_v4i16: - %tmp1 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) -; CHECK: sshl v0.4h, v0.4h, v1.4h - ret <4 x i16> %tmp1 -} - -declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) -declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>) - -define <8 x i16> @test_ushl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_ushl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: ushl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -define <8 x i16> @test_sshl_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK: test_sshl_v8i16: - %tmp1 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) -; CHECK: sshl v0.8h, v0.8h, v1.8h - ret <8 x i16> %tmp1 -} - -declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>) -declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>) - -define <2 x i32> @test_ushl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_ushl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: ushl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -define <2 x i32> @test_sshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { -; CHECK: test_sshl_v2i32: - %tmp1 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) -; CHECK: sshl v0.2s, v0.2s, v1.2s - ret <2 x i32> %tmp1 -} - -declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>) - -define <4 x i32> @test_ushl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_ushl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: ushl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK: test_sshl_v4i32: - %tmp1 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) -; CHECK: sshl v0.4s, v0.4s, v1.4s - ret <4 x i32> %tmp1 -} - -declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) -declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) - -define <2 x i64> @test_ushl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_ushl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: ushl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - -define <2 x i64> @test_sshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; CHECK: test_sshl_v2i64: - %tmp1 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %lhs, <2 x i64> %rhs) -; CHECK: sshl v0.2d, v0.2d, v1.2d - ret <2 x i64> %tmp1 -} - - -define <8 x i8> @test_shl_v8i8(<8 x i8> %a) { -; CHECK: test_shl_v8i8: -; CHECK: shl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %tmp = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - ret <8 x i8> %tmp -} - -define <4 x i16> @test_shl_v4i16(<4 x i16> %a) { -; CHECK: test_shl_v4i16: -; CHECK: shl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %tmp = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> - ret <4 x i16> %tmp -} - -define <2 x i32> @test_shl_v2i32(<2 x i32> %a) { -; CHECK: test_shl_v2i32: -; CHECK: shl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %tmp = shl <2 x i32> %a, <i32 3, i32 3> - ret <2 x i32> %tmp -} - -define <16 x i8> @test_shl_v16i8(<16 x i8> %a) { -; CHECK: test_shl_v16i8: -; CHECK: shl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %tmp = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - ret <16 x i8> %tmp -} - -define <8 x i16> @test_shl_v8i16(<8 x i16> %a) { -; CHECK: test_shl_v8i16: -; CHECK: shl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %tmp = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - ret <8 x i16> %tmp -} - -define <4 x i32> @test_shl_v4i32(<4 x i32> %a) { -; CHECK: test_shl_v4i32: -; CHECK: shl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %tmp = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> - ret <4 x i32> %tmp -} - -define <2 x i64> @test_shl_v2i64(<2 x i64> %a) { -; CHECK: test_shl_v2i64: -; CHECK: shl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63 - %tmp = shl <2 x i64> %a, <i64 63, i64 63> - ret <2 x i64> %tmp -} - diff --git a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll deleted file mode 100644 index 628a6760c9..0000000000 --- a/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll +++ /dev/null @@ -1,334 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has all tests not involving v1iN. - -define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: shl.v8i8: -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = shl <8 x i8> %a, %b - ret <8 x i8> %c -} - -define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: shl.v4i16: -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = shl <4 x i16> %a, %b - ret <4 x i16> %c -} - -define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: shl.v2i32: -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = shl <2 x i32> %a, %b - ret <2 x i32> %c -} - -define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: shl.v1i64: -; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %c = shl <1 x i64> %a, %b - ret <1 x i64> %c -} - -define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shl.v16i8: -; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %c = shl <16 x i8> %a, %b - ret <16 x i8> %c -} - -define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shl.v8i16: -; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %c = shl <8 x i16> %a, %b - ret <8 x i16> %c -} - -define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: shl.v4i32: -; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %c = shl <4 x i32> %a, %b - ret <4 x i32> %c -} - -define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: shl.v2i64: -; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %c = shl <2 x i64> %a, %b - ret <2 x i64> %c -} - -define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: lshr.v8i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = lshr <8 x i8> %a, %b - ret <8 x i8> %c -} - -define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: lshr.v4i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = lshr <4 x i16> %a, %b - ret <4 x i16> %c -} - -define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: lshr.v2i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = lshr <2 x i32> %a, %b - ret <2 x i32> %c -} - -define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: lshr.v1i64: -; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %c = lshr <1 x i64> %a, %b - ret <1 x i64> %c -} - -define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: lshr.v16i8: -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %c = lshr <16 x i8> %a, %b - ret <16 x i8> %c -} - -define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: lshr.v8i16: -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h -; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %c = lshr <8 x i16> %a, %b - ret <8 x i16> %c -} - -define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: lshr.v4i32: -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %c = lshr <4 x i32> %a, %b - ret <4 x i32> %c -} - -define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: lshr.v2i64: -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %c = lshr <2 x i64> %a, %b - ret <2 x i64> %c -} - -define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: ashr.v8i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = ashr <8 x i8> %a, %b - ret <8 x i8> %c -} - -define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-LABEL: ashr.v4i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = ashr <4 x i16> %a, %b - ret <4 x i16> %c -} - -define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: ashr.v2i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = ashr <2 x i32> %a, %b - ret <2 x i32> %c -} - -define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: ashr.v1i64: -; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %c = ashr <1 x i64> %a, %b - ret <1 x i64> %c -} - -define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: ashr.v16i8: -; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b -; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b - %c = ashr <16 x i8> %a, %b - ret <16 x i8> %c -} - -define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: ashr.v8i16: -; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h -; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h - %c = ashr <8 x i16> %a, %b - ret <8 x i16> %c -} - -define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: ashr.v4i32: -; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s -; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %c = ashr <4 x i32> %a, %b - ret <4 x i32> %c -} - -define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: ashr.v2i64: -; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d -; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %c = ashr <2 x i64> %a, %b - ret <2 x i64> %c -} - -define <1 x i64> @shl.v1i64.0(<1 x i64> %a) { -; CHECK-LABEL: shl.v1i64.0: -; CHECK-NOT: shl d{{[0-9]+}}, d{{[0-9]+}}, #0 - %c = shl <1 x i64> %a, zeroinitializer - ret <1 x i64> %c -} - -define <2 x i32> @shl.v2i32.0(<2 x i32> %a) { -; CHECK-LABEL: shl.v2i32.0: -; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 - %c = shl <2 x i32> %a, zeroinitializer - ret <2 x i32> %c -} - -; The following test cases test shl/ashr/lshr with v1i8/v1i16/v1i32 types - -define <1 x i8> @shl.v1i8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: shl.v1i8: -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = shl <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @shl.v1i16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: shl.v1i16: -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = shl <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @shl.v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: shl.v1i32: -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = shl <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @ashr.v1i8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: ashr.v1i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = ashr <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @ashr.v1i16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: ashr.v1i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = ashr <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @ashr.v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: ashr.v1i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = ashr <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @lshr.v1i8(<1 x i8> %a, <1 x i8> %b) { -; CHECK-LABEL: lshr.v1i8: -; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b -; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %c = lshr <1 x i8> %a, %b - ret <1 x i8> %c -} - -define <1 x i16> @lshr.v1i16(<1 x i16> %a, <1 x i16> %b) { -; CHECK-LABEL: lshr.v1i16: -; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h -; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h - %c = lshr <1 x i16> %a, %b - ret <1 x i16> %c -} - -define <1 x i32> @lshr.v1i32(<1 x i32> %a, <1 x i32> %b) { -; CHECK-LABEL: lshr.v1i32: -; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s -; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %c = lshr <1 x i32> %a, %b - ret <1 x i32> %c -} - -define <1 x i8> @shl.v1i8.imm(<1 x i8> %a) { -; CHECK-LABEL: shl.v1i8.imm: -; CHECK: shl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 - %c = shl <1 x i8> %a, <i8 3> - ret <1 x i8> %c -} - -define <1 x i16> @shl.v1i16.imm(<1 x i16> %a) { -; CHECK-LABEL: shl.v1i16.imm: -; CHECK: shl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #5 - %c = shl <1 x i16> %a, <i16 5> - ret <1 x i16> %c -} - -define <1 x i32> @shl.v1i32.imm(<1 x i32> %a) { -; CHECK-LABEL: shl.v1i32.imm: -; CHECK-NOT: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 - %c = shl <1 x i32> %a, zeroinitializer - ret <1 x i32> %c -} - -define <1 x i8> @ashr.v1i8.imm(<1 x i8> %a) { -; CHECK-LABEL: ashr.v1i8.imm: -; CHECK: sshr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 - %c = ashr <1 x i8> %a, <i8 3> - ret <1 x i8> %c -} - -define <1 x i16> @ashr.v1i16.imm(<1 x i16> %a) { -; CHECK-LABEL: ashr.v1i16.imm: -; CHECK: sshr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 - %c = ashr <1 x i16> %a, <i16 10> - ret <1 x i16> %c -} - -define <1 x i32> @ashr.v1i32.imm(<1 x i32> %a) { -; CHECK-LABEL: ashr.v1i32.imm: -; CHECK: sshr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31 - %c = ashr <1 x i32> %a, <i32 31> - ret <1 x i32> %c -} - -define <1 x i8> @lshr.v1i8.imm(<1 x i8> %a) { -; CHECK-LABEL: lshr.v1i8.imm: -; CHECK: ushr v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, #3 - %c = lshr <1 x i8> %a, <i8 3> - ret <1 x i8> %c -} - -define <1 x i16> @lshr.v1i16.imm(<1 x i16> %a) { -; CHECK-LABEL: lshr.v1i16.imm: -; CHECK: ushr v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #10 - %c = lshr <1 x i16> %a, <i16 10> - ret <1 x i16> %c -} - -define <1 x i32> @lshr.v1i32.imm(<1 x i32> %a) { -; CHECK-LABEL: lshr.v1i32.imm: -; CHECK: ushr v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #31 - %c = lshr <1 x i32> %a, <i32 31> - ret <1 x i32> %c -} diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll deleted file mode 100644 index a3b160413f..0000000000 --- a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll +++ /dev/null @@ -1,2317 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - -; arm64 already has these. Essentially just a copy/paste from Clang output from -; arm_neon.h - -define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) { -; CHECK-LABEL: test_ldst1_v16i8: -; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %tmp = load <16 x i8>* %ptr - store <16 x i8> %tmp, <16 x i8>* %ptr2 - ret void -} - -define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) { -; CHECK-LABEL: test_ldst1_v8i16: -; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %tmp = load <8 x i16>* %ptr - store <8 x i16> %tmp, <8 x i16>* %ptr2 - ret void -} - -define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) { -; CHECK-LABEL: test_ldst1_v4i32: -; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %tmp = load <4 x i32>* %ptr - store <4 x i32> %tmp, <4 x i32>* %ptr2 - ret void -} - -define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) { -; CHECK-LABEL: test_ldst1_v2i64: -; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %tmp = load <2 x i64>* %ptr - store <2 x i64> %tmp, <2 x i64>* %ptr2 - ret void -} - -define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) { -; CHECK-LABEL: test_ldst1_v8i8: -; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %tmp = load <8 x i8>* %ptr - store <8 x i8> %tmp, <8 x i8>* %ptr2 - ret void -} - -define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) { -; CHECK-LABEL: test_ldst1_v4i16: -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %tmp = load <4 x i16>* %ptr - store <4 x i16> %tmp, <4 x i16>* %ptr2 - ret void -} - -define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) { -; CHECK-LABEL: test_ldst1_v2i32: -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %tmp = load <2 x i32>* %ptr - store <2 x i32> %tmp, <2 x i32>* %ptr2 - ret void -} - -define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) { -; CHECK-LABEL: test_ldst1_v1i64: -; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] -; CHECK: st1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %tmp = load <1 x i64>* %ptr - store <1 x i64> %tmp, <1 x i64>* %ptr2 - ret void -} - -%struct.int8x16x2_t = type { [2 x <16 x i8>] } -%struct.int16x8x2_t = type { [2 x <8 x i16>] } -%struct.int32x4x2_t = type { [2 x <4 x i32>] } -%struct.int64x2x2_t = type { [2 x <2 x i64>] } -%struct.float32x4x2_t = type { [2 x <4 x float>] } -%struct.float64x2x2_t = type { [2 x <2 x double>] } -%struct.int8x8x2_t = type { [2 x <8 x i8>] } -%struct.int16x4x2_t = type { [2 x <4 x i16>] } -%struct.int32x2x2_t = type { [2 x <2 x i32>] } -%struct.int64x1x2_t = type { [2 x <1 x i64>] } -%struct.float32x2x2_t = type { [2 x <2 x float>] } -%struct.float64x1x2_t = type { [2 x <1 x double>] } -%struct.int8x16x3_t = type { [3 x <16 x i8>] } -%struct.int16x8x3_t = type { [3 x <8 x i16>] } -%struct.int32x4x3_t = type { [3 x <4 x i32>] } -%struct.int64x2x3_t = type { [3 x <2 x i64>] } -%struct.float32x4x3_t = type { [3 x <4 x float>] } -%struct.float64x2x3_t = type { [3 x <2 x double>] } -%struct.int8x8x3_t = type { [3 x <8 x i8>] } -%struct.int16x4x3_t = type { [3 x <4 x i16>] } -%struct.int32x2x3_t = type { [3 x <2 x i32>] } -%struct.int64x1x3_t = type { [3 x <1 x i64>] } -%struct.float32x2x3_t = type { [3 x <2 x float>] } -%struct.float64x1x3_t = type { [3 x <1 x double>] } -%struct.int8x16x4_t = type { [4 x <16 x i8>] } -%struct.int16x8x4_t = type { [4 x <8 x i16>] } -%struct.int32x4x4_t = type { [4 x <4 x i32>] } -%struct.int64x2x4_t = type { [4 x <2 x i64>] } -%struct.float32x4x4_t = type { [4 x <4 x float>] } -%struct.float64x2x4_t = type { [4 x <2 x double>] } -%struct.int8x8x4_t = type { [4 x <8 x i8>] } -%struct.int16x4x4_t = type { [4 x <4 x i16>] } -%struct.int32x2x4_t = type { [4 x <2 x i32>] } -%struct.int64x1x4_t = type { [4 x <1 x i64>] } -%struct.float32x2x4_t = type { [4 x <2 x float>] } -%struct.float64x1x4_t = type { [4 x <1 x double>] } - - -define <16 x i8> @test_vld1q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld1q_s8 -; CHECK: ld1 { v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1) - ret <16 x i8> %vld1 -} - -define <8 x i16> @test_vld1q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld1q_s16 -; CHECK: ld1 { v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2) - ret <8 x i16> %vld1 -} - -define <4 x i32> @test_vld1q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld1q_s32 -; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4) - ret <4 x i32> %vld1 -} - -define <2 x i64> @test_vld1q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld1q_s64 -; CHECK: ld1 { v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8) - ret <2 x i64> %vld1 -} - -define <4 x float> @test_vld1q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld1q_f32 -; CHECK: ld1 { v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4) - ret <4 x float> %vld1 -} - -define <2 x double> @test_vld1q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld1q_f64 -; CHECK: ld1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8) - ret <2 x double> %vld1 -} - -define <8 x i8> @test_vld1_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld1_s8 -; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1) - ret <8 x i8> %vld1 -} - -define <4 x i16> @test_vld1_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld1_s16 -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2) - ret <4 x i16> %vld1 -} - -define <2 x i32> @test_vld1_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld1_s32 -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4) - ret <2 x i32> %vld1 -} - -define <1 x i64> @test_vld1_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld1_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8) - ret <1 x i64> %vld1 -} - -define <2 x float> @test_vld1_f32(float* readonly %a) { -; CHECK-LABEL: test_vld1_f32 -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4) - ret <2 x float> %vld1 -} - -define <1 x double> @test_vld1_f64(double* readonly %a) { -; CHECK-LABEL: test_vld1_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8) - ret <1 x double> %vld1 -} - -define <8 x i8> @test_vld1_p8(i8* readonly %a) { -; CHECK-LABEL: test_vld1_p8 -; CHECK: ld1 { v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1) - ret <8 x i8> %vld1 -} - -define <4 x i16> @test_vld1_p16(i16* readonly %a) { -; CHECK-LABEL: test_vld1_p16 -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2) - ret <4 x i16> %vld1 -} - -define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld2q_s8 -; CHECK: ld2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1) - %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1 - ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld2q_s16 -; CHECK: ld2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2) - %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1 - ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld2q_s32 -; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1 - ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld2q_s64 -; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1 - ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld2q_f32 -; CHECK: ld2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1 - ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld2q_f64 -; CHECK: ld2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1 - ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld2_s8 -; CHECK: ld2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1) - %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1 - ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld2_s16 -; CHECK: ld2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2) - %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1 - ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld2_s32 -; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1 - ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld2_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1 - ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) { -; CHECK-LABEL: test_vld2_f32 -; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4) - %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1 - ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) { -; CHECK-LABEL: test_vld2_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8) - %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1 - %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1 - ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld3q_s8 -; CHECK: ld3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1) - %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2 - ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld3q_s16 -; CHECK: ld3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2) - %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2 - ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld3q_s32 -; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2 - ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld3q_s64 -; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2 - ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld3q_f32 -; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2 - ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld3q_f64 -; CHECK: ld3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2 - ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld3_s8 -; CHECK: ld3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1) - %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2 - ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld3_s16 -; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2) - %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2 - ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld3_s32 -; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2 - ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld3_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2 - ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) { -; CHECK-LABEL: test_vld3_f32 -; CHECK: ld3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4) - %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2 - ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) { -; CHECK-LABEL: test_vld3_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8) - %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2 - %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2 - ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld4q_s8 -; CHECK: ld4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}] - %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1) - %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3 - ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld4q_s16 -; CHECK: ld4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2) - %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3 - ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld4q_s32 -; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3 - ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld4q_s64 -; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3 - ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) { -; CHECK-LABEL: test_vld4q_f32 -; CHECK: ld4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3 - ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) { -; CHECK-LABEL: test_vld4q_f64 -; CHECK: ld4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3 - ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) { -; CHECK-LABEL: test_vld4_s8 -; CHECK: ld4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}] - %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1) - %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3 - ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) { -; CHECK-LABEL: test_vld4_s16 -; CHECK: ld4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2) - %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3 - ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) { -; CHECK-LABEL: test_vld4_s32 -; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3 - ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) { -; CHECK-LABEL: test_vld4_s64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3 - ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) { -; CHECK-LABEL: test_vld4_f32 -; CHECK: ld4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4) - %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3 - ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) { -; CHECK-LABEL: test_vld4_f64 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8) - %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3 - %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3 - ret %struct.float64x1x4_t %.fca.0.3.insert -} - -declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) -declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32) -declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) -declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) -declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) -declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32) -declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) -declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32) - -define void @test_vst1q_s8(i8* %a, <16 x i8> %b) { -; CHECK-LABEL: test_vst1q_s8 -; CHECK: st1 { v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1) - ret void -} - -define void @test_vst1q_s16(i16* %a, <8 x i16> %b) { -; CHECK-LABEL: test_vst1q_s16 -; CHECK: st1 { v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2) - ret void -} - -define void @test_vst1q_s32(i32* %a, <4 x i32> %b) { -; CHECK-LABEL: test_vst1q_s32 -; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4) - ret void -} - -define void @test_vst1q_s64(i64* %a, <2 x i64> %b) { -; CHECK-LABEL: test_vst1q_s64 -; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8) - ret void -} - -define void @test_vst1q_f32(float* %a, <4 x float> %b) { -; CHECK-LABEL: test_vst1q_f32 -; CHECK: st1 { v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4) - ret void -} - -define void @test_vst1q_f64(double* %a, <2 x double> %b) { -; CHECK-LABEL: test_vst1q_f64 -; CHECK: st1 { v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8) - ret void -} - -define void @test_vst1_s8(i8* %a, <8 x i8> %b) { -; CHECK-LABEL: test_vst1_s8 -; CHECK: st1 { v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1) - ret void -} - -define void @test_vst1_s16(i16* %a, <4 x i16> %b) { -; CHECK-LABEL: test_vst1_s16 -; CHECK: st1 { v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2) - ret void -} - -define void @test_vst1_s32(i32* %a, <2 x i32> %b) { -; CHECK-LABEL: test_vst1_s32 -; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4) - ret void -} - -define void @test_vst1_s64(i64* %a, <1 x i64> %b) { -; CHECK-LABEL: test_vst1_s64 -; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8) - ret void -} - -define void @test_vst1_f32(float* %a, <2 x float> %b) { -; CHECK-LABEL: test_vst1_f32 -; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4) - ret void -} - -define void @test_vst1_f64(double* %a, <1 x double> %b) { -; CHECK-LABEL: test_vst1_f64 -; CHECK: st1 { v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8) - ret void -} - -define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s8 -; CHECK: st2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1) - ret void -} - -define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s16 -; CHECK: st2 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2) - ret void -} - -define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s32 -; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2q_s64 -; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2q_f32 -; CHECK: st2 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2q_f64 -; CHECK: st2 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2_s8 -; CHECK: st2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1) - ret void -} - -define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2_s16 -; CHECK: st2 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2) - ret void -} - -define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2_s32 -; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2_s64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2_f32 -; CHECK: st2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4) - ret void -} - -define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2_f64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8) - ret void -} - -define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s8 -; CHECK: st3 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1) - ret void -} - -define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s16 -; CHECK: st3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2) - ret void -} - -define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s32 -; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3q_s64 -; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3q_f32 -; CHECK: st3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3q_f64 -; CHECK: st3 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3_s8 -; CHECK: st3 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1) - ret void -} - -define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3_s16 -; CHECK: st3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2) - ret void -} - -define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3_s32 -; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3_s64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3_f32 -; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4) - ret void -} - -define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3_f64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8) - ret void -} - -define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s8 -; CHECK: st4 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 - tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1) - ret void -} - -define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s16 -; CHECK: st4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2) - ret void -} - -define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s32 -; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4q_s64 -; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8) - ret void -} - -define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4q_f32 -; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4q_f64 -; CHECK: st4 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8) - ret void -} - -define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4_s8 -; CHECK: st4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 - tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1) - ret void -} - -define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4_s16 -; CHECK: st4 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 - %1 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2) - ret void -} - -define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4_s32 -; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 - %1 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4_s64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 - %1 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8) - ret void -} - -define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4_f32 -; CHECK: st4 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 - %1 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4) - ret void -} - -define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4_f64 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 - %1 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8) - ret void -} - -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) -declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) -declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32) -declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) -declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) -declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) -declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32) -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32) -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32) -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32) - -define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) { -; CHECK-LABEL: test_vld1q_s8_x2 -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 - %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1 - %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0 - %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1 - ret %struct.int8x16x2_t %5 -} - -define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) { -; CHECK-LABEL: test_vld1q_s16_x2 -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0 - %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1 - %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0 - %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1 - ret %struct.int16x8x2_t %6 -} - -define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) { -; CHECK-LABEL: test_vld1q_s32_x2 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0 - %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1 - %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0 - %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1 - ret %struct.int32x4x2_t %6 -} - -define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) { -; CHECK-LABEL: test_vld1q_s64_x2 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0 - %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1 - %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0 - %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1 - ret %struct.int64x2x2_t %6 -} - -define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) { -; CHECK-LABEL: test_vld1q_f32_x2 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float> } %2, 0 - %4 = extractvalue { <4 x float>, <4 x float> } %2, 1 - %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0 - %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1 - ret %struct.float32x4x2_t %6 -} - - -define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) { -; CHECK-LABEL: test_vld1q_f64_x2 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8) - %3 = extractvalue { <2 x double>, <2 x double> } %2, 0 - %4 = extractvalue { <2 x double>, <2 x double> } %2, 1 - %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0 - %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1 - ret %struct.float64x2x2_t %6 -} - -define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) { -; CHECK-LABEL: test_vld1_s8_x2 -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1 - %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0 - %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1 - ret %struct.int8x8x2_t %5 -} - -define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) { -; CHECK-LABEL: test_vld1_s16_x2 -; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0 - %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1 - %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0 - %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1 - ret %struct.int16x4x2_t %6 -} - -define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) { -; CHECK-LABEL: test_vld1_s32_x2 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0 - %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1 - %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0 - %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1 - ret %struct.int32x2x2_t %6 -} - -define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) { -; CHECK-LABEL: test_vld1_s64_x2 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8) - %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0 - %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1 - %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0 - %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1 - ret %struct.int64x1x2_t %6 -} - -define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) { -; CHECK-LABEL: test_vld1_f32_x2 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4) - %3 = extractvalue { <2 x float>, <2 x float> } %2, 0 - %4 = extractvalue { <2 x float>, <2 x float> } %2, 1 - %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0 - %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1 - ret %struct.float32x2x2_t %6 -} - -define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) { -; CHECK-LABEL: test_vld1_f64_x2 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8) - %3 = extractvalue { <1 x double>, <1 x double> } %2, 0 - %4 = extractvalue { <1 x double>, <1 x double> } %2, 1 - %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0 - %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1 - ret %struct.float64x1x2_t %6 -} - -define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) { -; CHECK-LABEL: test_vld1q_s8_x3 -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, -; [{{x[0-9]+|sp}}] - %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 - %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 - %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 - %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0 - %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1 - %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2 - ret %struct.int8x16x3_t %7 -} - -define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) { -; CHECK-LABEL: test_vld1q_s16_x3 -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0 - %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1 - %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2 - %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0 - %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1 - %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2 - ret %struct.int16x8x3_t %8 -} - -define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) { -; CHECK-LABEL: test_vld1q_s32_x3 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0 - %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1 - %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2 - %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0 - %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1 - %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2 - ret %struct.int32x4x3_t %8 -} - -define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) { -; CHECK-LABEL: test_vld1q_s64_x3 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0 - %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1 - %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2 - %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0 - %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1 - %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2 - ret %struct.int64x2x3_t %8 -} - -define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) { -; CHECK-LABEL: test_vld1q_f32_x3 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0 - %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1 - %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2 - %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0 - %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1 - %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2 - ret %struct.float32x4x3_t %8 -} - - -define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) { -; CHECK-LABEL: test_vld1q_f64_x3 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8) - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0 - %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1 - %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2 - %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0 - %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1 - %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2 - ret %struct.float64x2x3_t %8 -} - -define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) { -; CHECK-LABEL: test_vld1_s8_x3 -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, -; [{{x[0-9]+|sp}}] - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0 - %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1 - %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2 - ret %struct.int8x8x3_t %7 -} - -define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) { -; CHECK-LABEL: test_vld1_s16_x3 -; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0 - %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1 - %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2 - %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0 - %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1 - %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2 - ret %struct.int16x4x3_t %8 -} - -define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) { - %1 = bitcast i32* %a to i8* -; CHECK-LABEL: test_vld1_s32_x3 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0 - %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1 - %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2 - %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0 - %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1 - %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2 - ret %struct.int32x2x3_t %8 -} - -define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) { -; CHECK-LABEL: test_vld1_s64_x3 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8) - %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0 - %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1 - %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2 - %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0 - %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1 - %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2 - ret %struct.int64x1x3_t %8 -} - -define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) { -; CHECK-LABEL: test_vld1_f32_x3 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4) - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0 - %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1 - %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2 - %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0 - %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1 - %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2 - ret %struct.float32x2x3_t %8 -} - - -define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) { -; CHECK-LABEL: test_vld1_f64_x3 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8) - %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0 - %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1 - %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2 - %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0 - %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1 - %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2 - ret %struct.float64x1x3_t %8 -} - -define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) { -; CHECK-LABEL: test_vld1q_s8_x4 -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, -; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 - %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 - %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 - %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3 - %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0 - %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1 - %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2 - %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3 - ret %struct.int8x16x4_t %9 -} - -define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) { -; CHECK-LABEL: test_vld1q_s16_x4 -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, -; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0 - %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1 - %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2 - %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3 - %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0 - %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1 - %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2 - %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3 - ret %struct.int16x8x4_t %10 -} - -define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) { -; CHECK-LABEL: test_vld1q_s32_x4 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0 - %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1 - %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2 - %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3 - %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0 - %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1 - %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2 - %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3 - ret %struct.int32x4x4_t %10 -} - -define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) { -; CHECK-LABEL: test_vld1q_s64_x4 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0 - %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1 - %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2 - %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3 - %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0 - %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1 - %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2 - %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3 - ret %struct.int64x2x4_t %10 -} - -define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) { -; CHECK-LABEL: test_vld1q_f32_x4 -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0 - %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1 - %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2 - %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3 - %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0 - %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1 - %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2 - %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3 - ret %struct.float32x4x4_t %10 -} - -define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) { -; CHECK-LABEL: test_vld1q_f64_x4 -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8) - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0 - %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1 - %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2 - %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3 - %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0 - %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1 - %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2 - %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3 - ret %struct.float64x2x4_t %10 -} - -define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) { -; CHECK-LABEL: test_vld1_s8_x4 -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, -; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3 - %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0 - %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1 - %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2 - %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3 - ret %struct.int8x8x4_t %9 -} - -define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) { -; CHECK-LABEL: test_vld1_s16_x4 -; CHECK: ld1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, -; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0 - %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1 - %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2 - %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3 - %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0 - %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1 - %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2 - %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3 - ret %struct.int16x4x4_t %10 -} - -define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) { -; CHECK-LABEL: test_vld1_s32_x4 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast i32* %a to i8* - %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0 - %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1 - %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2 - %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3 - %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0 - %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1 - %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2 - %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3 - ret %struct.int32x2x4_t %10 -} - -define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) { -; CHECK-LABEL: test_vld1_s64_x4 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast i64* %a to i8* - %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8) - %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0 - %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1 - %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2 - %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3 - %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0 - %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1 - %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2 - %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3 - ret %struct.int64x1x4_t %10 -} - -define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) { -; CHECK-LABEL: test_vld1_f32_x4 -; CHECK: ld1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = bitcast float* %a to i8* - %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4) - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0 - %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1 - %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2 - %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3 - %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0 - %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1 - %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2 - %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3 - ret %struct.float32x2x4_t %10 -} - - -define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) { -; CHECK-LABEL: test_vld1_f64_x4 -; CHECK: ld1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = bitcast double* %a to i8* - %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8) - %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0 - %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1 - %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2 - %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3 - %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0 - %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1 - %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2 - %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3 - ret %struct.float64x1x4_t %10 -} - -define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b) { -; CHECK-LABEL: test_vst1q_s8_x2 -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <16 x i8>] %b, 0 - %2 = extractvalue [2 x <16 x i8>] %b, 1 - tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1) - ret void -} - -define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b) { -; CHECK-LABEL: test_vst1q_s16_x2 -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <8 x i16>] %b, 0 - %2 = extractvalue [2 x <8 x i16>] %b, 1 - %3 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2) - ret void -} - -define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b) { -; CHECK-LABEL: test_vst1q_s32_x2 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <4 x i32>] %b, 0 - %2 = extractvalue [2 x <4 x i32>] %b, 1 - %3 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4) - ret void -} - -define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b) { -; CHECK-LABEL: test_vst1q_s64_x2 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x i64>] %b, 0 - %2 = extractvalue [2 x <2 x i64>] %b, 1 - %3 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8) - ret void -} - -define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b) { -; CHECK-LABEL: test_vst1q_f32_x2 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <4 x float>] %b, 0 - %2 = extractvalue [2 x <4 x float>] %b, 1 - %3 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4) - ret void -} - - -define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b) { -; CHECK-LABEL: test_vst1q_f64_x2 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x double>] %b, 0 - %2 = extractvalue [2 x <2 x double>] %b, 1 - %3 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8) - ret void -} - -define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b) { -; CHECK-LABEL: test_vst1_s8_x2 -; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1) - ret void -} - -define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b) { -; CHECK-LABEL: test_vst1_s16_x2 -; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <4 x i16>] %b, 0 - %2 = extractvalue [2 x <4 x i16>] %b, 1 - %3 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2) - ret void -} - -define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b) { -; CHECK-LABEL: test_vst1_s32_x2 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x i32>] %b, 0 - %2 = extractvalue [2 x <2 x i32>] %b, 1 - %3 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4) - ret void -} - -define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b) { -; CHECK-LABEL: test_vst1_s64_x2 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <1 x i64>] %b, 0 - %2 = extractvalue [2 x <1 x i64>] %b, 1 - %3 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8) - ret void -} - -define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b) { -; CHECK-LABEL: test_vst1_f32_x2 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <2 x float>] %b, 0 - %2 = extractvalue [2 x <2 x float>] %b, 1 - %3 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4) - ret void -} - -define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b) { -; CHECK-LABEL: test_vst1_f64_x2 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [2 x <1 x double>] %b, 0 - %2 = extractvalue [2 x <1 x double>] %b, 1 - %3 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8) - ret void -} - -define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b) { -; CHECK-LABEL: test_vst1q_s8_x3 -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <16 x i8>] %b, 0 - %2 = extractvalue [3 x <16 x i8>] %b, 1 - %3 = extractvalue [3 x <16 x i8>] %b, 2 - tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1) - ret void -} - -define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b) { -; CHECK-LABEL: test_vst1q_s16_x3 -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <8 x i16>] %b, 0 - %2 = extractvalue [3 x <8 x i16>] %b, 1 - %3 = extractvalue [3 x <8 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2) - ret void -} - -define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b) { -; CHECK-LABEL: test_vst1q_s32_x3 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <4 x i32>] %b, 0 - %2 = extractvalue [3 x <4 x i32>] %b, 1 - %3 = extractvalue [3 x <4 x i32>] %b, 2 - %4 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4) - ret void -} - -define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b) { -; CHECK-LABEL: test_vst1q_s64_x3 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x i64>] %b, 0 - %2 = extractvalue [3 x <2 x i64>] %b, 1 - %3 = extractvalue [3 x <2 x i64>] %b, 2 - %4 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8) - ret void -} - -define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b) { -; CHECK-LABEL: test_vst1q_f32_x3 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <4 x float>] %b, 0 - %2 = extractvalue [3 x <4 x float>] %b, 1 - %3 = extractvalue [3 x <4 x float>] %b, 2 - %4 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4) - ret void -} - -define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b) { -; CHECK-LABEL: test_vst1q_f64_x3 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x double>] %b, 0 - %2 = extractvalue [3 x <2 x double>] %b, 1 - %3 = extractvalue [3 x <2 x double>] %b, 2 - %4 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8) - ret void -} - -define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b) { -; CHECK-LABEL: test_vst1_s8_x3 -; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <8 x i8>] %b, 0 - %2 = extractvalue [3 x <8 x i8>] %b, 1 - %3 = extractvalue [3 x <8 x i8>] %b, 2 - tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1) - ret void -} - -define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b) { -; CHECK-LABEL: test_vst1_s16_x3 -; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <4 x i16>] %b, 0 - %2 = extractvalue [3 x <4 x i16>] %b, 1 - %3 = extractvalue [3 x <4 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2) - ret void -} - -define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b) { -; CHECK-LABEL: test_vst1_s32_x3 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x i32>] %b, 0 - %2 = extractvalue [3 x <2 x i32>] %b, 1 - %3 = extractvalue [3 x <2 x i32>] %b, 2 - %4 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4) - ret void -} - -define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b) { -; CHECK-LABEL: test_vst1_s64_x3 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <1 x i64>] %b, 0 - %2 = extractvalue [3 x <1 x i64>] %b, 1 - %3 = extractvalue [3 x <1 x i64>] %b, 2 - %4 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8) - ret void -} - -define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b) { -; CHECK-LABEL: test_vst1_f32_x3 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <2 x float>] %b, 0 - %2 = extractvalue [3 x <2 x float>] %b, 1 - %3 = extractvalue [3 x <2 x float>] %b, 2 - %4 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4) - ret void -} - -define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b) { -; CHECK-LABEL: test_vst1_f64_x3 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, -; [{{x[0-9]+|sp}}] - %1 = extractvalue [3 x <1 x double>] %b, 0 - %2 = extractvalue [3 x <1 x double>] %b, 1 - %3 = extractvalue [3 x <1 x double>] %b, 2 - %4 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8) - ret void -} - -define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b) { -; CHECK-LABEL: test_vst1q_s8_x4 -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, -; v{{[0-9]+}}.16b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <16 x i8>] %b, 0 - %2 = extractvalue [4 x <16 x i8>] %b, 1 - %3 = extractvalue [4 x <16 x i8>] %b, 2 - %4 = extractvalue [4 x <16 x i8>] %b, 3 - tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1) - ret void -} - -define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b) { -; CHECK-LABEL: test_vst1q_s16_x4 -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, -; v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <8 x i16>] %b, 0 - %2 = extractvalue [4 x <8 x i16>] %b, 1 - %3 = extractvalue [4 x <8 x i16>] %b, 2 - %4 = extractvalue [4 x <8 x i16>] %b, 3 - %5 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2) - ret void -} - -define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b) { -; CHECK-LABEL: test_vst1q_s32_x4 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <4 x i32>] %b, 0 - %2 = extractvalue [4 x <4 x i32>] %b, 1 - %3 = extractvalue [4 x <4 x i32>] %b, 2 - %4 = extractvalue [4 x <4 x i32>] %b, 3 - %5 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4) - ret void -} - -define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b) { -; CHECK-LABEL: test_vst1q_s64_x4 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x i64>] %b, 0 - %2 = extractvalue [4 x <2 x i64>] %b, 1 - %3 = extractvalue [4 x <2 x i64>] %b, 2 - %4 = extractvalue [4 x <2 x i64>] %b, 3 - %5 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8) - ret void -} - -define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b) { -; CHECK-LABEL: test_vst1q_f32_x4 -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, -; v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <4 x float>] %b, 0 - %2 = extractvalue [4 x <4 x float>] %b, 1 - %3 = extractvalue [4 x <4 x float>] %b, 2 - %4 = extractvalue [4 x <4 x float>] %b, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4) - ret void -} - -define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b) { -; CHECK-LABEL: test_vst1q_f64_x4 -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, -; v{{[0-9]+}}.2d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x double>] %b, 0 - %2 = extractvalue [4 x <2 x double>] %b, 1 - %3 = extractvalue [4 x <2 x double>] %b, 2 - %4 = extractvalue [4 x <2 x double>] %b, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8) - ret void -} - -define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b) { -; CHECK-LABEL: test_vst1_s8_x4 -; CHECK: st1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, -; v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <8 x i8>] %b, 0 - %2 = extractvalue [4 x <8 x i8>] %b, 1 - %3 = extractvalue [4 x <8 x i8>] %b, 2 - %4 = extractvalue [4 x <8 x i8>] %b, 3 - tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1) - ret void -} - -define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b) { -; CHECK-LABEL: test_vst1_s16_x4 -; CHECK: st1 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, -; v{{[0-9]+}}.4h }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <4 x i16>] %b, 0 - %2 = extractvalue [4 x <4 x i16>] %b, 1 - %3 = extractvalue [4 x <4 x i16>] %b, 2 - %4 = extractvalue [4 x <4 x i16>] %b, 3 - %5 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2) - ret void -} - -define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b) { -; CHECK-LABEL: test_vst1_s32_x4 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x i32>] %b, 0 - %2 = extractvalue [4 x <2 x i32>] %b, 1 - %3 = extractvalue [4 x <2 x i32>] %b, 2 - %4 = extractvalue [4 x <2 x i32>] %b, 3 - %5 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4) - ret void -} - -define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b) { -; CHECK-LABEL: test_vst1_s64_x4 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <1 x i64>] %b, 0 - %2 = extractvalue [4 x <1 x i64>] %b, 1 - %3 = extractvalue [4 x <1 x i64>] %b, 2 - %4 = extractvalue [4 x <1 x i64>] %b, 3 - %5 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8) - ret void -} - -define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b) { -; CHECK-LABEL: test_vst1_f32_x4 -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, -; v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <2 x float>] %b, 0 - %2 = extractvalue [4 x <2 x float>] %b, 1 - %3 = extractvalue [4 x <2 x float>] %b, 2 - %4 = extractvalue [4 x <2 x float>] %b, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4) - ret void -} - -define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b) { -; CHECK-LABEL: test_vst1_f64_x4 -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, -; v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}] - %1 = extractvalue [4 x <1 x double>] %b, 0 - %2 = extractvalue [4 x <1 x double>] %b, 1 - %3 = extractvalue [4 x <1 x double>] %b, 2 - %4 = extractvalue [4 x <1 x double>] %b, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8) - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32) -declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32) -declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32) -declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32) -declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32) -declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32) -declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32) -declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32) -declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32) diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll deleted file mode 100644 index 75c2a82ab5..0000000000 --- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ /dev/null @@ -1,2300 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; interesting parts copied into arm64 directory as aarch64-neon-simd-ldst-one.ll - -%struct.uint8x16x2_t = type { [2 x <16 x i8>] } -%struct.poly8x16x2_t = type { [2 x <16 x i8>] } -%struct.uint8x16x3_t = type { [3 x <16 x i8>] } -%struct.int8x16x2_t = type { [2 x <16 x i8>] } -%struct.int16x8x2_t = type { [2 x <8 x i16>] } -%struct.int32x4x2_t = type { [2 x <4 x i32>] } -%struct.int64x2x2_t = type { [2 x <2 x i64>] } -%struct.float32x4x2_t = type { [2 x <4 x float>] } -%struct.float64x2x2_t = type { [2 x <2 x double>] } -%struct.int8x8x2_t = type { [2 x <8 x i8>] } -%struct.int16x4x2_t = type { [2 x <4 x i16>] } -%struct.int32x2x2_t = type { [2 x <2 x i32>] } -%struct.int64x1x2_t = type { [2 x <1 x i64>] } -%struct.float32x2x2_t = type { [2 x <2 x float>] } -%struct.float64x1x2_t = type { [2 x <1 x double>] } -%struct.int8x16x3_t = type { [3 x <16 x i8>] } -%struct.int16x8x3_t = type { [3 x <8 x i16>] } -%struct.int32x4x3_t = type { [3 x <4 x i32>] } -%struct.int64x2x3_t = type { [3 x <2 x i64>] } -%struct.float32x4x3_t = type { [3 x <4 x float>] } -%struct.float64x2x3_t = type { [3 x <2 x double>] } -%struct.int8x8x3_t = type { [3 x <8 x i8>] } -%struct.int16x4x3_t = type { [3 x <4 x i16>] } -%struct.int32x2x3_t = type { [3 x <2 x i32>] } -%struct.int64x1x3_t = type { [3 x <1 x i64>] } -%struct.float32x2x3_t = type { [3 x <2 x float>] } -%struct.float64x1x3_t = type { [3 x <1 x double>] } -%struct.int8x16x4_t = type { [4 x <16 x i8>] } -%struct.int16x8x4_t = type { [4 x <8 x i16>] } -%struct.int32x4x4_t = type { [4 x <4 x i32>] } -%struct.int64x2x4_t = type { [4 x <2 x i64>] } -%struct.float32x4x4_t = type { [4 x <4 x float>] } -%struct.float64x2x4_t = type { [4 x <2 x double>] } -%struct.int8x8x4_t = type { [4 x <8 x i8>] } -%struct.int16x4x4_t = type { [4 x <4 x i16>] } -%struct.int32x2x4_t = type { [4 x <2 x i32>] } -%struct.int64x1x4_t = type { [4 x <1 x i64>] } -%struct.float32x2x4_t = type { [4 x <2 x float>] } -%struct.float64x1x4_t = type { [4 x <1 x double>] } - -define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { -; CHECK-LABEL: test_ld_from_poll_v16i8 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16> - ret <16 x i8> %b -} - -define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { -; CHECK-LABEL: test_ld_from_poll_v8i16 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> - ret <8 x i16> %b -} - -define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { -; CHECK-LABEL: test_ld_from_poll_v4i32 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4> - ret <4 x i32> %b -} - -define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { -; CHECK-LABEL: test_ld_from_poll_v2i64 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <2 x i64> %a, <i64 1, i64 2> - ret <2 x i64> %b -} - -define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { -; CHECK-LABEL: test_ld_from_poll_v4f32 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0> - ret <4 x float> %b -} - -define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { -; CHECK-LABEL: test_ld_from_poll_v2f64 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = fadd <2 x double> %a, <double 1.0, double 2.0> - ret <2 x double> %b -} - -define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { -; CHECK-LABEL: test_ld_from_poll_v8i8 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8> - ret <8 x i8> %b -} - -define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { -; CHECK-LABEL: test_ld_from_poll_v4i16 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4> - ret <4 x i16> %b -} - -define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { -; CHECK-LABEL: test_ld_from_poll_v2i32 -; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} -; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] -entry: - %b = add <2 x i32> %a, <i32 1, i32 2> - ret <2 x i32> %b -} - -define <16 x i8> @test_vld1q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld1q_dup_s8 -; CHECK: ld1r { {{v[0-9]+}}.16b }, [x0] -entry: - %0 = load i8* %a, align 1 - %1 = insertelement <16 x i8> undef, i8 %0, i32 0 - %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - ret <16 x i8> %lane -} - -define <8 x i16> @test_vld1q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld1q_dup_s16 -; CHECK: ld1r { {{v[0-9]+}}.8h }, [x0] -entry: - %0 = load i16* %a, align 2 - %1 = insertelement <8 x i16> undef, i16 %0, i32 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - ret <8 x i16> %lane -} - -define <4 x i32> @test_vld1q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld1q_dup_s32 -; CHECK: ld1r { {{v[0-9]+}}.4s }, [x0] -entry: - %0 = load i32* %a, align 4 - %1 = insertelement <4 x i32> undef, i32 %0, i32 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - ret <4 x i32> %lane -} - -define <2 x i64> @test_vld1q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld1q_dup_s64 -; CHECK: ld1r { {{v[0-9]+}}.2d }, [x0] -entry: - %0 = load i64* %a, align 8 - %1 = insertelement <2 x i64> undef, i64 %0, i32 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - ret <2 x i64> %lane -} - -define <4 x float> @test_vld1q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld1q_dup_f32 -; CHECK: ld1r { {{v[0-9]+}}.4s }, [x0] -entry: - %0 = load float* %a, align 4 - %1 = insertelement <4 x float> undef, float %0, i32 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - ret <4 x float> %lane -} - -define <2 x double> @test_vld1q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld1q_dup_f64 -; CHECK: ld1r { {{v[0-9]+}}.2d }, [x0] -entry: - %0 = load double* %a, align 8 - %1 = insertelement <2 x double> undef, double %0, i32 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - ret <2 x double> %lane -} - -define <8 x i8> @test_vld1_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld1_dup_s8 -; CHECK: ld1r { {{v[0-9]+}}.8b }, [x0] -entry: - %0 = load i8* %a, align 1 - %1 = insertelement <8 x i8> undef, i8 %0, i32 0 - %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - ret <8 x i8> %lane -} - -define <4 x i16> @test_vld1_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld1_dup_s16 -; CHECK: ld1r { {{v[0-9]+}}.4h }, [x0] -entry: - %0 = load i16* %a, align 2 - %1 = insertelement <4 x i16> undef, i16 %0, i32 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - ret <4 x i16> %lane -} - -define <2 x i32> @test_vld1_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld1_dup_s32 -; CHECK: ld1r { {{v[0-9]+}}.2s }, [x0] -entry: - %0 = load i32* %a, align 4 - %1 = insertelement <2 x i32> undef, i32 %0, i32 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - ret <2 x i32> %lane -} - -define <1 x i64> @test_vld1_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld1_dup_s64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load i64* %a, align 8 - %1 = insertelement <1 x i64> undef, i64 %0, i32 0 - ret <1 x i64> %1 -} - -define <2 x float> @test_vld1_dup_f32(float* %a) { -; CHECK-LABEL: test_vld1_dup_f32 -; CHECK: ld1r { {{v[0-9]+}}.2s }, [x0] -entry: - %0 = load float* %a, align 4 - %1 = insertelement <2 x float> undef, float %0, i32 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - ret <2 x float> %lane -} - -define <1 x double> @test_vld1_dup_f64(double* %a) { -; CHECK-LABEL: test_vld1_dup_f64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load double* %a, align 8 - %1 = insertelement <1 x double> undef, double %0, i32 0 - ret <1 x double> %1 -} - -define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { -; As there is a store operation depending on %1, LD1R pattern can't be selected. -; So LDR and FMOV should be emitted. -; CHECK-LABEL: testDUP.v1i64 -; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] -; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}] - %1 = load i64* %a, align 8 - store i64 %1, i64* %b, align 8 - %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 - ret <1 x i64> %vecinit.i -} - -define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { -; As there is a store operation depending on %1, LD1R pattern can't be selected. -; So LDR and FMOV should be emitted. -; CHECK-LABEL: testDUP.v1f64 -; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] -; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] - %1 = load double* %a, align 8 - store double %1, double* %b, align 8 - %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 - ret <1 x double> %vecinit.i -} - -define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld2q_dup_s8 -; CHECK: ld2r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0] -entry: - %vld_dup = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 0 - %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer - %1 = extractvalue { <16 x i8>, <16 x i8> } %vld_dup, 1 - %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1 - ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.int16x8x2_t @test_vld2q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld2q_dup_s16 -; CHECK: ld2r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i16>, <8 x i16> } %vld_dup, 1 - %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1 - ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld2q_dup_s32 -; CHECK: ld2r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i32>, <4 x i32> } %vld_dup, 1 - %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1 - ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld2q_dup_s64 -; CHECK: ld2r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8) - %1 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i64>, <2 x i64> } %vld_dup, 1 - %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1 - ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld2q_dup_f32 -; CHECK: ld2r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, i32 0, i32 4) - %1 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x float>, <4 x float> } %vld_dup, 1 - %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1 - ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld2q_dup_f64 -; CHECK: ld2r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %1 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x double>, <2 x double> } %vld_dup, 1 - %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1 - ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld2_dup_s8 -; CHECK: ld2r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0] -entry: - %vld_dup = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 0 - %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer - %1 = extractvalue { <8 x i8>, <8 x i8> } %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1 - ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld2_dup_s16 -; CHECK: ld2r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i16>, <4 x i16> } %vld_dup, 1 - %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1 - ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld2_dup_s32 -; CHECK: ld2r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i32>, <2 x i32> } %vld_dup, 1 - %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1 - ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld2_dup_s64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld_dup, 1 - %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1 - ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_dup_f32(float* %a) { -; CHECK-LABEL: test_vld2_dup_f32 -; CHECK: ld2r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, i32 0, i32 4) - %1 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x float>, <2 x float> } %vld_dup, 1 - %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1 - ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_dup_f64(double* %a) { -; CHECK-LABEL: test_vld2_dup_f64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld_dup, 1 - %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1 - ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld3q_dup_s8 -; CHECK: ld3r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0] -entry: - %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0 - %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer - %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1 - %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2 - %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2 - ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.int16x8x3_t @test_vld3q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld3q_dup_s16 -; CHECK: ld3r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1 - %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2 - %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2 - ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld3q_dup_s32 -; CHECK: ld3r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1 - %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2 - %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2 - ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld3q_dup_s64 -; CHECK: ld3r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8) - %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1 - %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2 - %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2 - ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld3q_dup_f32 -; CHECK: ld3r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4) - %1 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1 - %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2 - %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2 - ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld3q_dup_f64 -; CHECK: ld3r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %1 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1 - %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2 - %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2 - ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld3_dup_s8 -; CHECK: ld3r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0] -entry: - %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0 - %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer - %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2 - %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2 - ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld3_dup_s16 -; CHECK: ld3r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1 - %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2 - %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2 - ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld3_dup_s32 -; CHECK: ld3r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1 - %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2 - %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2 - ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld3_dup_s64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2 - %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2 - ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_dup_f32(float* %a) { -; CHECK-LABEL: test_vld3_dup_f32 -; CHECK: ld3r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) - %1 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1 - %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2 - %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2 - ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_dup_f64(double* %a) { -; CHECK-LABEL: test_vld3_dup_f64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2 - %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2 - ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld4q_dup_s8 -; CHECK: ld4r { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, [x0] -entry: - %vld_dup = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 0 - %lane = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer - %1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 1 - %lane1 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer - %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 2 - %lane2 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld_dup, 3 - %lane3 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %lane3, 0, 3 - ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld4q_dup_s16 -; CHECK: ld4r { {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 0 - %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 1 - %lane1 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 2 - %lane2 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer - %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld_dup, 3 - %lane3 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %lane3, 0, 3 - ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld4q_dup_s32 -; CHECK: ld4r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 0 - %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 1 - %lane1 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 2 - %lane2 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer - %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld_dup, 3 - %lane3 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %lane3, 0, 3 - ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld4q_dup_s64 -; CHECK: ld4r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, <2 x i64> undef, i32 0, i32 8) - %1 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 0 - %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 1 - %lane1 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 2 - %lane2 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld_dup, 3 - %lane3 = shufflevector <2 x i64> %4, <2 x i64> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %lane3, 0, 3 - ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_dup_f32(float* %a) { -; CHECK-LABEL: test_vld4q_dup_f32 -; CHECK: ld4r { {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> undef, <4 x float> undef, <4 x float> undef, <4 x float> undef, i32 0, i32 4) - %1 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 0 - %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 1 - %lane1 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 2 - %lane2 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> zeroinitializer - %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld_dup, 3 - %lane3 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %lane3, 0, 3 - ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_dup_f64(double* %a) { -; CHECK-LABEL: test_vld4q_dup_f64 -; CHECK: ld4r { {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %1 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 0 - %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 1 - %lane1 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 2 - %lane2 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld_dup, 3 - %lane3 = shufflevector <2 x double> %4, <2 x double> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %lane3, 0, 3 - ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_dup_s8(i8* %a) { -; CHECK-LABEL: test_vld4_dup_s8 -; CHECK: ld4r { {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b }, [x0] -entry: - %vld_dup = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 0 - %lane = shufflevector <8 x i8> %0, <8 x i8> undef, <8 x i32> zeroinitializer - %1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 1 - %lane1 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 2 - %lane2 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld_dup, 3 - %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %lane3, 0, 3 - ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_dup_s16(i16* %a) { -; CHECK-LABEL: test_vld4_dup_s16 -; CHECK: ld4r { {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h }, [x0] -entry: - %0 = bitcast i16* %a to i8* - %vld_dup = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %1 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 0 - %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer - %2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 1 - %lane1 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> zeroinitializer - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 2 - %lane2 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer - %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld_dup, 3 - %lane3 = shufflevector <4 x i16> %4, <4 x i16> undef, <4 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %lane3, 0, 3 - ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_dup_s32(i32* %a) { -; CHECK-LABEL: test_vld4_dup_s32 -; CHECK: ld4r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast i32* %a to i8* - %vld_dup = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %1 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 0 - %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 1 - %lane1 = shufflevector <2 x i32> %2, <2 x i32> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 2 - %lane2 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld_dup, 3 - %lane3 = shufflevector <2 x i32> %4, <2 x i32> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %lane3, 0, 3 - ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_dup_s64(i64* %a) { -; CHECK-LABEL: test_vld4_dup_s64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast i64* %a to i8* - %vld_dup = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 2 - %vld_dup.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld_dup, 3 - %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld_dup.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld_dup.fca.3.extract, 0, 3 - ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_dup_f32(float* %a) { -; CHECK-LABEL: test_vld4_dup_f32 -; CHECK: ld4r { {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s }, [x0] -entry: - %0 = bitcast float* %a to i8* - %vld_dup = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) - %1 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 0 - %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer - %2 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 1 - %lane1 = shufflevector <2 x float> %2, <2 x float> undef, <2 x i32> zeroinitializer - %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 2 - %lane2 = shufflevector <2 x float> %3, <2 x float> undef, <2 x i32> zeroinitializer - %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld_dup, 3 - %lane3 = shufflevector <2 x float> %4, <2 x float> undef, <2 x i32> zeroinitializer - %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %lane, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %lane1, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %lane2, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %lane3, 0, 3 - ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_dup_f64(double* %a) { -; CHECK-LABEL: test_vld4_dup_f64 -; CHECK: ld1 { {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d }, [x0] -entry: - %0 = bitcast double* %a to i8* - %vld_dup = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %0, i32 8) - %vld_dup.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 0 - %vld_dup.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 1 - %vld_dup.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 2 - %vld_dup.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld_dup, 3 - %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld_dup.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld_dup.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld_dup.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld_dup.fca.3.extract, 0, 3 - ret %struct.float64x1x4_t %.fca.0.3.insert -} - -define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) { -; CHECK-LABEL: test_vld1q_lane_s8 -; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = load i8* %a, align 1 - %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 - ret <16 x i8> %vld1_lane -} - -define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) { -; CHECK-LABEL: test_vld1q_lane_s16 -; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = load i16* %a, align 2 - %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7 - ret <8 x i16> %vld1_lane -} - -define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) { -; CHECK-LABEL: test_vld1q_lane_s32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load i32* %a, align 4 - %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3 - ret <4 x i32> %vld1_lane -} - -define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) { -; CHECK-LABEL: test_vld1q_lane_s64 -; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = load i64* %a, align 8 - %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1 - ret <2 x i64> %vld1_lane -} - -define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) { -; CHECK-LABEL: test_vld1q_lane_f32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load float* %a, align 4 - %vld1_lane = insertelement <4 x float> %b, float %0, i32 3 - ret <4 x float> %vld1_lane -} - -define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) { -; CHECK-LABEL: test_vld1q_lane_f64 -; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = load double* %a, align 8 - %vld1_lane = insertelement <2 x double> %b, double %0, i32 1 - ret <2 x double> %vld1_lane -} - -define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) { -; CHECK-LABEL: test_vld1_lane_s8 -; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = load i8* %a, align 1 - %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 - ret <8 x i8> %vld1_lane -} - -define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) { -; CHECK-LABEL: test_vld1_lane_s16 -; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = load i16* %a, align 2 - %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3 - ret <4 x i16> %vld1_lane -} - -define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) { -; CHECK-LABEL: test_vld1_lane_s32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load i32* %a, align 4 - %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1 - ret <2 x i32> %vld1_lane -} - -define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) { -; CHECK-LABEL: test_vld1_lane_s64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load i64* %a, align 8 - %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0 - ret <1 x i64> %vld1_lane -} - -define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) { -; CHECK-LABEL: test_vld1_lane_f32 -; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = load float* %a, align 4 - %vld1_lane = insertelement <2 x float> %b, float %0, i32 1 - ret <2 x float> %vld1_lane -} - -define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) { -; CHECK-LABEL: test_vld1_lane_f64 -; CHECK: ld1r { {{v[0-9]+}}.1d }, [x0] -entry: - %0 = load double* %a, align 8 - %vld1_lane = insertelement <1 x double> undef, double %0, i32 0 - ret <1 x double> %vld1_lane -} - -define %struct.int16x8x2_t @test_vld2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_s16 -; CHECK: ld2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - %vld2_lane = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2) - %vld2_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_s32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - %vld2_lane = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_s64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - %vld2_lane = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_f32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - %vld2_lane = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vld2q_lane_f64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - %vld2_lane = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vld2_lane = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s16 -; CHECK: ld2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - %vld2_lane = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2) - %vld2_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - %vld2_lane = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_s64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - %vld2_lane = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_f32 -; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - %vld2_lane = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4) - %vld2_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vld2_lane_f64 -; CHECK: ld2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - %vld2_lane = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8) - %vld2_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int16x8x3_t @test_vld3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_s16 -; CHECK: ld3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_s32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_s64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_f32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vld3q_lane_f64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s8 -; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s16 -; CHECK: ld3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_s64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_f32 -; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vld3_lane_f64 -; CHECK: ld3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_lane_s8(i8* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s8 -; CHECK: ld4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s16 -; CHECK: ld4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_s64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_f32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vld4q_lane_f64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s8 -; CHECK: ld4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vld3_lane = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s16 -; CHECK: ld4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - %vld3_lane = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2) - %vld3_lane.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - %vld3_lane = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_s64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - %vld3_lane = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_f32 -; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - %vld3_lane = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4) - %vld3_lane.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vld4_lane_f64 -; CHECK: ld4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - %vld3_lane = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8) - %vld3_lane.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 2 - %vld3_lane.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld3_lane, 3 - %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld3_lane.fca.2.extract, 0, 2 - %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld3_lane.fca.3.extract, 0, 3 - ret %struct.float64x1x4_t %.fca.0.3.insert -} - -define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) { -; CHECK-LABEL: test_vst1q_lane_s8 -; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <16 x i8> %b, i32 15 - store i8 %0, i8* %a, align 1 - ret void -} - -define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) { -; CHECK-LABEL: test_vst1q_lane_s16 -; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <8 x i16> %b, i32 7 - store i16 %0, i16* %a, align 2 - ret void -} - -define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) { -; CHECK-LABEL: test_vst1q_lane_s32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <4 x i32> %b, i32 3 - store i32 %0, i32* %a, align 4 - ret void -} - -define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) { -; CHECK-LABEL: test_vst1q_lane_s64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x i64> %b, i32 1 - store i64 %0, i64* %a, align 8 - ret void -} - -define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) { -; CHECK-LABEL: test_vst1q_lane_f32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <4 x float> %b, i32 3 - store float %0, float* %a, align 4 - ret void -} - -define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) { -; CHECK-LABEL: test_vst1q_lane_f64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x double> %b, i32 1 - store double %0, double* %a, align 8 - ret void -} - -define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) { -; CHECK-LABEL: test_vst1_lane_s8 -; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <8 x i8> %b, i32 7 - store i8 %0, i8* %a, align 1 - ret void -} - -define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) { -; CHECK-LABEL: test_vst1_lane_s16 -; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <4 x i16> %b, i32 3 - store i16 %0, i16* %a, align 2 - ret void -} - -define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) { -; CHECK-LABEL: test_vst1_lane_s32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x i32> %b, i32 1 - store i32 %0, i32* %a, align 4 - ret void -} - -define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) { -; CHECK-LABEL: test_vst1_lane_s64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <1 x i64> %b, i32 0 - store i64 %0, i64* %a, align 8 - ret void -} - -define void @test_vst1_lane_f32(float* %a, <2 x float> %b) { -; CHECK-LABEL: test_vst1_lane_f32 -; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <2 x float> %b, i32 1 - store float %0, float* %a, align 4 - ret void -} - -define void @test_vst1_lane_f64(double* %a, <1 x double> %b) { -; CHECK-LABEL: test_vst1_lane_f64 -; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %0 = extractelement <1 x double> %b, i32 0 - store double %0, double* %a, align 8 - ret void -} - -define void @test_vst2q_lane_s8(i8* %a, [2 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s8 -; CHECK: st2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 15, i32 1) - ret void -} - -define void @test_vst2q_lane_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s16 -; CHECK: st2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 7, i32 2) - ret void -} - -define void @test_vst2q_lane_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 3, i32 4) - ret void -} - -define void @test_vst2q_lane_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_s64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 1, i32 8) - ret void -} - -define void @test_vst2q_lane_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_f32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 3, i32 4) - ret void -} - -define void @test_vst2q_lane_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2q_lane_f64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 1, i32 8) - ret void -} - -define void @test_vst2_lane_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s8 -; CHECK: st2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 - tail call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 7, i32 1) - ret void -} - -define void @test_vst2_lane_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s16 -; CHECK: st2 { {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 3, i32 2) - ret void -} - -define void @test_vst2_lane_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 1, i32 4) - ret void -} - -define void @test_vst2_lane_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_s64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 0, i32 8) - ret void -} - -define void @test_vst2_lane_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_f32 -; CHECK: st2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 1, i32 4) - ret void -} - -define void @test_vst2_lane_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst2_lane_f64 -; CHECK: st2 { {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 0, i32 8) - ret void -} - -define void @test_vst3q_lane_s8(i8* %a, [3 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s8 -; CHECK: st3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3lane.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 15, i32 1) - ret void -} - -define void @test_vst3q_lane_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s16 -; CHECK: st3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 7, i32 2) - ret void -} - -define void @test_vst3q_lane_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 3, i32 4) - ret void -} - -define void @test_vst3q_lane_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_s64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 1, i32 8) - ret void -} - -define void @test_vst3q_lane_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_f32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 3, i32 4) - ret void -} - -define void @test_vst3q_lane_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3q_lane_f64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 1, i32 8) - ret void -} - -define void @test_vst3_lane_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s8 -; CHECK: st3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 - tail call void @llvm.arm.neon.vst3lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 7, i32 1) - ret void -} - -define void @test_vst3_lane_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s16 -; CHECK: st3 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 3, i32 2) - ret void -} - -define void @test_vst3_lane_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 1, i32 4) - ret void -} - -define void @test_vst3_lane_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_s64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 0, i32 8) - ret void -} - -define void @test_vst3_lane_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_f32 -; CHECK: st3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 1, i32 4) - ret void -} - -define void @test_vst3_lane_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst3_lane_f64 -; CHECK: st3 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 0, i32 8) - ret void -} - -define void @test_vst4q_lane_s8(i16* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s8 -; CHECK: st4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v16i8(i8* %0, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 15, i32 2) - ret void -} - -define void @test_vst4q_lane_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s16 -; CHECK: st4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v8i16(i8* %0, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 7, i32 2) - ret void -} - -define void @test_vst4q_lane_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v4i32(i8* %0, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 3, i32 4) - ret void -} - -define void @test_vst4q_lane_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_s64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2i64(i8* %0, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 1, i32 8) - ret void -} - -define void @test_vst4q_lane_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_f32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v4f32(i8* %0, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 3, i32 4) - ret void -} - -define void @test_vst4q_lane_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4q_lane_f64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %0, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 1, i32 8) - ret void -} - -define void @test_vst4_lane_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s8 -; CHECK: st4 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 - tail call void @llvm.arm.neon.vst4lane.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 7, i32 1) - ret void -} - -define void @test_vst4_lane_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s16 -; CHECK: st4 { {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h, {{v[0-9]+}}.h }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 - %0 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v4i16(i8* %0, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 3, i32 2) - ret void -} - -define void @test_vst4_lane_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 - %0 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2i32(i8* %0, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 1, i32 4) - ret void -} - -define void @test_vst4_lane_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_s64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 - %0 = bitcast i64* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %0, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 0, i32 8) - ret void -} - -define void @test_vst4_lane_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_f32 -; CHECK: st4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 - %0 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %0, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 1, i32 4) - ret void -} - -define void @test_vst4_lane_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK-LABEL: test_vst4_lane_f64 -; CHECK: st4 { {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d, {{v[0-9]+}}.d }[{{[0-9]+}}], [x0] -entry: - %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 - %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 - %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 - %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 - %0 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v1f64(i8* %0, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 0, i32 8) - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) -declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32) -declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) -declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32) -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2i64(i8*, <2 x i64>, <2 x i64>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2f64(i8*, <2 x double>, <2 x double>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v1f64(i8*, <1 x double>, <1 x double>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) - -define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld2q_lane_s8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 - %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld2q_lane_u8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 - %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.uint8x16x2_t %.fca.0.1.insert -} - -define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld2q_lane_p8 -; CHECK: ld2 { {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 - %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) - %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 - %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 - %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 - ret %struct.poly8x16x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld3q_lane_s8 -; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 - %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 - %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { -; CHECK-LABEL: test_vld3q_lane_u8 -; CHECK: ld3 { {{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b }[15], [x0] -entry: - %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 - %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 - %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 - %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) - %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 - %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 - %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 - %.fca.0.0.insert = insertvalue %struct.uint8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 - %.fca.0.1.insert = insertvalue %struct.uint8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 - %.fca.0.2.insert = insertvalue %struct.uint8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 - ret %struct.uint8x16x3_t %.fca.0.2.insert -} - diff --git a/test/CodeGen/AArch64/neon-simd-ldst.ll b/test/CodeGen/AArch64/neon-simd-ldst.ll deleted file mode 100644 index 7c78b69334..0000000000 --- a/test/CodeGen/AArch64/neon-simd-ldst.ll +++ /dev/null @@ -1,165 +0,0 @@ -; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; Just intrinsic mashing. Duplicates existing arm64 tests. - -define void @test_ldstq_4v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldstq_4v -; CHECK: ld4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0] -; CHECK: st4 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0] -entry: - %tobool62 = icmp eq i32 %count, 0 - br i1 %tobool62, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.063, -1 - %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1) - %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3 - tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) - -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) - -define void @test_ldstq_3v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldstq_3v -; CHECK: ld3 { v0.16b, v1.16b, v2.16b }, [x0] -; CHECK: st3 { v0.16b, v1.16b, v2.16b }, [x0] -entry: - %tobool47 = icmp eq i32 %count, 0 - br i1 %tobool47, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.048, -1 - %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1) - %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2 - tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) - -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) - -define void @test_ldstq_2v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldstq_2v -; CHECK: ld2 { v0.16b, v1.16b }, [x0] -; CHECK: st2 { v0.16b, v1.16b }, [x0] -entry: - %tobool22 = icmp eq i32 %count, 0 - br i1 %tobool22, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.023, -1 - %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1) - %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1 - tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) - -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) - -define void @test_ldst_4v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldst_4v -; CHECK: ld4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] -; CHECK: st4 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0] -entry: - %tobool42 = icmp eq i32 %count, 0 - br i1 %tobool42, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.043, -1 - %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1) - %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0 - %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1 - %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2 - %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3 - tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) - -define void @test_ldst_3v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldst_3v -; CHECK: ld3 { v0.8b, v1.8b, v2.8b }, [x0] -; CHECK: st3 { v0.8b, v1.8b, v2.8b }, [x0] -entry: - %tobool32 = icmp eq i32 %count, 0 - br i1 %tobool32, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.033, -1 - %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1) - %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0 - %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1 - %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2 - tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) - -define void @test_ldst_2v(i8* noalias %io, i32 %count) { -; CHECK-LABEL: test_ldst_2v -; CHECK: ld2 { v0.8b, v1.8b }, [x0] -; CHECK: st2 { v0.8b, v1.8b }, [x0] -entry: - %tobool22 = icmp eq i32 %count, 0 - br i1 %tobool22, label %while.end, label %while.body - -while.body: ; preds = %entry, %while.body - %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ] - %dec = add i32 %count.addr.023, -1 - %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1) - %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0 - %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1 - tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1) - %tobool = icmp eq i32 %dec, 0 - br i1 %tobool, label %while.end, label %while.body - -while.end: ; preds = %while.body, %entry - ret void -} - -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) - diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll deleted file mode 100644 index 181c69c89b..0000000000 --- a/test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll +++ /dev/null @@ -1,355 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has equivalent tests to these in various files. - -;Check for a post-increment updating load. -define <4 x i16> @test_vld1_fx_update(i16** %ptr) nounwind { -; CHECK: test_vld1_fx_update -; CHECK: ld1 { v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}], #8 - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 2) - %tmp2 = getelementptr i16* %A, i32 4 - store i16* %tmp2, i16** %ptr - ret <4 x i16> %tmp1 -} - -;Check for a post-increment updating load with register increment. -define <2 x i32> @test_vld1_reg_update(i32** %ptr, i32 %inc) nounwind { -; CHECK: test_vld1_reg_update -; CHECK: ld1 { v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i32** %ptr - %tmp0 = bitcast i32* %A to i8* - %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 4) - %tmp2 = getelementptr i32* %A, i32 %inc - store i32* %tmp2, i32** %ptr - ret <2 x i32> %tmp1 -} - -define <2 x float> @test_vld2_fx_update(float** %ptr) nounwind { -; CHECK: test_vld2_fx_update -; CHECK: ld2 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], #16 - %A = load float** %ptr - %tmp0 = bitcast float* %A to i8* - %tmp1 = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 4) - %tmp2 = extractvalue { <2 x float>, <2 x float> } %tmp1, 0 - %tmp3 = getelementptr float* %A, i32 4 - store float* %tmp3, float** %ptr - ret <2 x float> %tmp2 -} - -define <16 x i8> @test_vld2_reg_update(i8** %ptr, i32 %inc) nounwind { -; CHECK: test_vld2_reg_update -; CHECK: ld2 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - %tmp0 = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1) - %tmp1 = extractvalue { <16 x i8>, <16 x i8> } %tmp0, 0 - %tmp2 = getelementptr i8* %A, i32 %inc - store i8* %tmp2, i8** %ptr - ret <16 x i8> %tmp1 -} - -define <4 x i32> @test_vld3_fx_update(i32** %ptr) nounwind { -; CHECK: test_vld3_fx_update -; CHECK: ld3 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], #48 - %A = load i32** %ptr - %tmp0 = bitcast i32* %A to i8* - %tmp1 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 4) - %tmp2 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %tmp1, 0 - %tmp3 = getelementptr i32* %A, i32 12 - store i32* %tmp3, i32** %ptr - ret <4 x i32> %tmp2 -} - -define <4 x i16> @test_vld3_reg_update(i16** %ptr, i32 %inc) nounwind { -; CHECK: test_vld3_reg_update -; CHECK: ld3 { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 2) - %tmp2 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %tmp1, 0 - %tmp3 = getelementptr i16* %A, i32 %inc - store i16* %tmp3, i16** %ptr - ret <4 x i16> %tmp2 -} - -define <8 x i16> @test_vld4_fx_update(i16** %ptr) nounwind { -; CHECK: test_vld4_fx_update -; CHECK: ld4 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], #64 - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - %tmp1 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8) - %tmp2 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %tmp1, 0 - %tmp3 = getelementptr i16* %A, i32 32 - store i16* %tmp3, i16** %ptr - ret <8 x i16> %tmp2 -} - -define <8 x i8> @test_vld4_reg_update(i8** %ptr, i32 %inc) nounwind { -; CHECK: test_vld4_reg_update -; CHECK: ld4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - %tmp0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1) - %tmp1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %tmp0, 0 - %tmp2 = getelementptr i8* %A, i32 %inc - store i8* %tmp2, i8** %ptr - ret <8 x i8> %tmp1 -} - -define void @test_vst1_fx_update(float** %ptr, <2 x float> %B) nounwind { -; CHECK: test_vst1_fx_update -; CHECK: st1 { v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}], #8 - %A = load float** %ptr - %tmp0 = bitcast float* %A to i8* - call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %B, i32 4) - %tmp2 = getelementptr float* %A, i32 2 - store float* %tmp2, float** %ptr - ret void -} - -define void @test_vst1_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind { -; CHECK: test_vst1_reg_update -; CHECK: st1 { v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %B, i32 2) - %tmp1 = getelementptr i16* %A, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst2_fx_update(i64** %ptr, <1 x i64> %B) nounwind { -; CHECK: test_vst2_fx_update -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [{{x[0-9]+|sp}}], #16 - %A = load i64** %ptr - %tmp0 = bitcast i64* %A to i8* - call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %B, <1 x i64> %B, i32 8) - %tmp1 = getelementptr i64* %A, i32 2 - store i64* %tmp1, i64** %ptr - ret void -} - -define void @test_vst2_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind { -; CHECK: test_vst2_reg_update -; CHECK: st2 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, i32 4) - %tmp0 = getelementptr i8* %A, i32 %inc - store i8* %tmp0, i8** %ptr - ret void -} - -define void @test_vst3_fx_update(i32** %ptr, <2 x i32> %B) nounwind { -; CHECK: test_vst3_fx_update -; CHECK: st3 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [{{x[0-9]+|sp}}], #24 - %A = load i32** %ptr - %tmp0 = bitcast i32* %A to i8* - call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %B, <2 x i32> %B, <2 x i32> %B, i32 4) - %tmp1 = getelementptr i32* %A, i32 6 - store i32* %tmp1, i32** %ptr - ret void -} - -define void @test_vst3_reg_update(i16** %ptr, <8 x i16> %B, i32 %inc) nounwind { -; CHECK: test_vst3_reg_update -; CHECK: st3 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i16** %ptr - %tmp0 = bitcast i16* %A to i8* - call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %B, <8 x i16> %B, <8 x i16> %B, i32 2) - %tmp1 = getelementptr i16* %A, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst4_fx_update(float** %ptr, <4 x float> %B) nounwind { -; CHECK: test_vst4_fx_update -; CHECK: st4 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [{{x[0-9]+|sp}}], #64 - %A = load float** %ptr - %tmp0 = bitcast float* %A to i8* - call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %B, <4 x float> %B, <4 x float> %B, <4 x float> %B, i32 4) - %tmp1 = getelementptr float* %A, i32 16 - store float* %tmp1, float** %ptr - ret void -} - -define void @test_vst4_reg_update(i8** %ptr, <8 x i8> %B, i32 %inc) nounwind { -; CHECK: test_vst4_reg_update -; CHECK: st4 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [{{x[0-9]+|sp}}], x{{[0-9]+}} - %A = load i8** %ptr - call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, <8 x i8> %B, i32 1) - %tmp0 = getelementptr i8* %A, i32 %inc - store i8* %tmp0, i8** %ptr - ret void -} - - -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) - -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) - -define <16 x i8> @test_vld1x2_fx_update(i8* %a, i8** %ptr) { -; CHECK: test_vld1x2_fx_update -; CHECK: ld1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], #32 - %1 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 - %tmp1 = getelementptr i8* %a, i32 32 - store i8* %tmp1, i8** %ptr - ret <16 x i8> %2 -} - -define <8 x i16> @test_vld1x2_reg_update(i16* %a, i16** %ptr, i32 %inc) { -; CHECK: test_vld1x2_reg_update -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0 - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret <8 x i16> %3 -} - -define <2 x i64> @test_vld1x3_fx_update(i64* %a, i64** %ptr) { -; CHECK: test_vld1x3_fx_update -; CHECK: ld1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}], #48 - %1 = bitcast i64* %a to i8* - %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8) - %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0 - %tmp1 = getelementptr i64* %a, i32 6 - store i64* %tmp1, i64** %ptr - ret <2 x i64> %3 -} - -define <8 x i16> @test_vld1x3_reg_update(i16* %a, i16** %ptr, i32 %inc) { -; CHECK: test_vld1x3_reg_update -; CHECK: ld1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast i16* %a to i8* - %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2) - %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0 - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret <8 x i16> %3 -} - -define <4 x float> @test_vld1x4_fx_update(float* %a, float** %ptr) { -; CHECK: test_vld1x4_fx_update -; CHECK: ld1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], #64 - %1 = bitcast float* %a to i8* - %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4) - %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0 - %tmp1 = getelementptr float* %a, i32 16 - store float* %tmp1, float** %ptr - ret <4 x float> %3 -} - -define <8 x i8> @test_vld1x4_reg_update(i8* readonly %a, i8** %ptr, i32 %inc) #0 { -; CHECK: test_vld1x4_reg_update -; CHECK: ld1 { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %tmp1 = getelementptr i8* %a, i32 %inc - store i8* %tmp1, i8** %ptr - ret <8 x i8> %2 -} - -define void @test_vst1x2_fx_update(i8* %a, [2 x <16 x i8>] %b.coerce, i8** %ptr) #2 { -; CHECK: test_vst1x2_fx_update -; CHECK: st1 { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], #32 - %1 = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %2 = extractvalue [2 x <16 x i8>] %b.coerce, 1 - tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1) - %tmp1 = getelementptr i8* %a, i32 32 - store i8* %tmp1, i8** %ptr - ret void -} - -define void @test_vst1x2_reg_update(i16* %a, [2 x <8 x i16>] %b.coerce, i16** %ptr, i32 %inc) #2 { -; CHECK: test_vst1x2_reg_update -; CHECK: st1 { v{{[0-9]+}}.8h, v{{[0-9]+}}.8h }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [2 x <8 x i16>] %b.coerce, 0 - %2 = extractvalue [2 x <8 x i16>] %b.coerce, 1 - %3 = bitcast i16* %a to i8* - tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2) - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst1x3_fx_update(i32* %a, [3 x <2 x i32>] %b.coerce, i32** %ptr) #2 { -; CHECK: test_vst1x3_fx_update -; CHECK: st1 { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], #24 - %1 = extractvalue [3 x <2 x i32>] %b.coerce, 0 - %2 = extractvalue [3 x <2 x i32>] %b.coerce, 1 - %3 = extractvalue [3 x <2 x i32>] %b.coerce, 2 - %4 = bitcast i32* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4) - %tmp1 = getelementptr i32* %a, i32 6 - store i32* %tmp1, i32** %ptr - ret void -} - -define void @test_vst1x3_reg_update(i64* %a, [3 x <1 x i64>] %b.coerce, i64** %ptr, i32 %inc) #2 { -; CHECK: test_vst1x3_reg_update -; CHECK: st1 { v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [3 x <1 x i64>] %b.coerce, 0 - %2 = extractvalue [3 x <1 x i64>] %b.coerce, 1 - %3 = extractvalue [3 x <1 x i64>] %b.coerce, 2 - %4 = bitcast i64* %a to i8* - tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8) - %tmp1 = getelementptr i64* %a, i32 %inc - store i64* %tmp1, i64** %ptr - ret void -} - -define void @test_vst1x4_fx_update(float* %a, [4 x <4 x float>] %b.coerce, float** %ptr) #2 { -; CHECK: test_vst1x4_fx_update -; CHECK: st1 { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], #64 - %1 = extractvalue [4 x <4 x float>] %b.coerce, 0 - %2 = extractvalue [4 x <4 x float>] %b.coerce, 1 - %3 = extractvalue [4 x <4 x float>] %b.coerce, 2 - %4 = extractvalue [4 x <4 x float>] %b.coerce, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4) - %tmp1 = getelementptr float* %a, i32 16 - store float* %tmp1, float** %ptr - ret void -} - -define void @test_vst1x4_reg_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr, i32 %inc) #2 { -; CHECK: test_vst1x4_reg_update -; CHECK: st1 { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [4 x <2 x double>] %b.coerce, 0 - %2 = extractvalue [4 x <2 x double>] %b.coerce, 1 - %3 = extractvalue [4 x <2 x double>] %b.coerce, 2 - %4 = extractvalue [4 x <2 x double>] %b.coerce, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8) - %tmp1 = getelementptr double* %a, i32 %inc - store double* %tmp1, double** %ptr - ret void -} - -declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32) -declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #3 -declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) #3 diff --git a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll deleted file mode 100644 index 75f57c5d2e..0000000000 --- a/test/CodeGen/AArch64/neon-simd-post-ldst-one.ll +++ /dev/null @@ -1,320 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has equivalents of these tests separately. - -define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) { -; CHECK-LABEL: test_vld2q_dup_fx_update -; CHECK: ld2r { v{{[0-9]+}}.16b, v{{[0-9]+}}.16b }, [x{{[0-9]+|sp}}], #2 - %1 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %a, <16 x i8> undef, <16 x i8> undef, i32 0, i32 1) - %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0 - %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer - %4 = extractvalue { <16 x i8>, <16 x i8> } %1, 1 - %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> zeroinitializer - %6 = insertvalue { [2 x <16 x i8>] } undef, <16 x i8> %3, 0, 0 - %7 = insertvalue { [2 x <16 x i8>] } %6, <16 x i8> %5, 0, 1 - %tmp1 = getelementptr i8* %a, i32 2 - store i8* %tmp1, i8** %ptr - ret { [2 x <16 x i8>] } %7 -} - -define { [2 x <4 x i32>] } @test_vld2q_dup_reg_update(i32* %a, i32** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld2q_dup_reg_update -; CHECK: ld2r { v{{[0-9]+}}.4s, v{{[0-9]+}}.4s }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast i32* %a to i8* - %2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8* %1, <4 x i32> undef, <4 x i32> undef, i32 0, i32 4) - %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0 - %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer - %5 = extractvalue { <4 x i32>, <4 x i32> } %2, 1 - %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> zeroinitializer - %7 = insertvalue { [2 x <4 x i32>] } undef, <4 x i32> %4, 0, 0 - %8 = insertvalue { [2 x <4 x i32>] } %7, <4 x i32> %6, 0, 1 - %tmp1 = getelementptr i32* %a, i32 %inc - store i32* %tmp1, i32** %ptr - ret { [2 x <4 x i32>] } %8 -} - -define { [3 x <4 x i16>] } @test_vld3_dup_fx_update(i16* %a, i16** %ptr) { -; CHECK-LABEL: test_vld3_dup_fx_update -; CHECK: ld3r { v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h }, [x{{[0-9]+|sp}}], #6 - %1 = bitcast i16* %a to i8* - %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %1, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) - %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0 - %4 = shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer - %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1 - %6 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> zeroinitializer - %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2 - %8 = shufflevector <4 x i16> %7, <4 x i16> undef, <4 x i32> zeroinitializer - %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %4, 0, 0 - %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %6, 0, 1 - %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2 - %tmp1 = getelementptr i16* %a, i32 3 - store i16* %tmp1, i16** %ptr - ret { [3 x <4 x i16>] } %11 -} - -define { [3 x <8 x i8>] } @test_vld3_dup_reg_update(i8* %a, i8** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld3_dup_reg_update -; CHECK: ld3r { v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) - %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 - %3 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer - %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1 - %5 = shufflevector <8 x i8> %4, <8 x i8> undef, <8 x i32> zeroinitializer - %6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2 - %7 = shufflevector <8 x i8> %6, <8 x i8> undef, <8 x i32> zeroinitializer - %8 = insertvalue { [3 x <8 x i8>] } undef, <8 x i8> %3, 0, 0 - %9 = insertvalue { [3 x <8 x i8>] } %8, <8 x i8> %5, 0, 1 - %10 = insertvalue { [3 x <8 x i8>] } %9, <8 x i8> %7, 0, 2 - %tmp1 = getelementptr i8* %a, i32 %inc - store i8* %tmp1, i8** %ptr - ret { [3 x <8 x i8>] }%10 -} - -define { [4 x <2 x i32>] } @test_vld4_dup_fx_update(i32* %a, i32** %ptr) #0 { -; CHECK-LABEL: test_vld4_dup_fx_update -; CHECK: ld4r { v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s }, [x{{[0-9]+|sp}}], #16 - %1 = bitcast i32* %a to i8* - %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %1, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) - %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0 - %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <2 x i32> zeroinitializer - %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1 - %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <2 x i32> zeroinitializer - %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2 - %8 = shufflevector <2 x i32> %7, <2 x i32> undef, <2 x i32> zeroinitializer - %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3 - %10 = shufflevector <2 x i32> %9, <2 x i32> undef, <2 x i32> zeroinitializer - %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %4, 0, 0 - %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %6, 0, 1 - %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %8, 0, 2 - %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3 - %tmp1 = getelementptr i32* %a, i32 4 - store i32* %tmp1, i32** %ptr - ret { [4 x <2 x i32>] } %14 -} - -define { [4 x <2 x double>] } @test_vld4_dup_reg_update(double* %a, double** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld4_dup_reg_update -; CHECK: ld4r { v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d }, [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = bitcast double* %a to i8* - %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %1, <2 x double> undef, <2 x double> undef, <2 x double> undef, <2 x double> undef, i32 0, i32 8) - %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0 - %4 = shufflevector <2 x double> %3, <2 x double> undef, <2 x i32> zeroinitializer - %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1 - %6 = shufflevector <2 x double> %5, <2 x double> undef, <2 x i32> zeroinitializer - %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2 - %8 = shufflevector <2 x double> %7, <2 x double> undef, <2 x i32> zeroinitializer - %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3 - %10 = shufflevector <2 x double> %9, <2 x double> undef, <2 x i32> zeroinitializer - %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %4, 0, 0 - %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %6, 0, 1 - %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %8, 0, 2 - %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3 - %tmp1 = getelementptr double* %a, i32 %inc - store double* %tmp1, double** %ptr - ret { [4 x <2 x double>] } %14 -} - -define { [2 x <8 x i8>] } @test_vld2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) { -; CHECK-LABEL: test_vld2_lane_fx_update -; CHECK: ld2 { v{{[0-9]+}}.b, v{{[0-9]+}}.b }[7], [x{{[0-9]+|sp}}], #2 - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) - %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0 - %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1 - %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0 - %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1 - %tmp1 = getelementptr i8* %a, i32 2 - store i8* %tmp1, i8** %ptr - ret { [2 x <8 x i8>] } %7 -} - -define { [2 x <8 x i8>] } @test_vld2_lane_reg_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld2_lane_reg_update -; CHECK: ld2 { v{{[0-9]+}}.b, v{{[0-9]+}}.b }[6], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - %3 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 6, i32 1) - %4 = extractvalue { <8 x i8>, <8 x i8> } %3, 0 - %5 = extractvalue { <8 x i8>, <8 x i8> } %3, 1 - %6 = insertvalue { [2 x <8 x i8>] } undef, <8 x i8> %4, 0, 0 - %7 = insertvalue { [2 x <8 x i8>] } %6, <8 x i8> %5, 0, 1 - %tmp1 = getelementptr i8* %a, i32 %inc - store i8* %tmp1, i8** %ptr - ret { [2 x <8 x i8>] } %7 -} - -define { [3 x <2 x float>] } @test_vld3_lane_fx_update(float* %a, [3 x <2 x float>] %b, float** %ptr) { -; CHECK-LABEL: test_vld3_lane_fx_update -; CHECK: ld3 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], #12 - %1 = extractvalue [3 x <2 x float>] %b, 0 - %2 = extractvalue [3 x <2 x float>] %b, 1 - %3 = extractvalue [3 x <2 x float>] %b, 2 - %4 = bitcast float* %a to i8* - %5 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 1, i32 4) - %6 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 0 - %7 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 1 - %8 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %5, 2 - %9 = insertvalue { [3 x <2 x float>] } undef, <2 x float> %6, 0, 0 - %10 = insertvalue { [3 x <2 x float>] } %9, <2 x float> %7, 0, 1 - %11 = insertvalue { [3 x <2 x float>] } %10, <2 x float> %8, 0, 2 - %tmp1 = getelementptr float* %a, i32 3 - store float* %tmp1, float** %ptr - ret { [3 x <2 x float>] } %11 -} - -define { [3 x <4 x i16>] } @test_vld3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld3_lane_reg_update -; CHECK: ld3 { v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h }[3], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [3 x <4 x i16>] %b, 0 - %2 = extractvalue [3 x <4 x i16>] %b, 1 - %3 = extractvalue [3 x <4 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - %5 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) - %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 0 - %7 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 1 - %8 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %5, 2 - %9 = insertvalue { [3 x <4 x i16>] } undef, <4 x i16> %6, 0, 0 - %10 = insertvalue { [3 x <4 x i16>] } %9, <4 x i16> %7, 0, 1 - %11 = insertvalue { [3 x <4 x i16>] } %10, <4 x i16> %8, 0, 2 - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret { [3 x <4 x i16>] } %11 -} - -define { [4 x <2 x i32>] } @test_vld4_lane_fx_update(i32* readonly %a, [4 x <2 x i32>] %b, i32** %ptr) { -; CHECK-LABEL: test_vld4_lane_fx_update -; CHECK: ld4 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], #16 - %1 = extractvalue [4 x <2 x i32>] %b, 0 - %2 = extractvalue [4 x <2 x i32>] %b, 1 - %3 = extractvalue [4 x <2 x i32>] %b, 2 - %4 = extractvalue [4 x <2 x i32>] %b, 3 - %5 = bitcast i32* %a to i8* - %6 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 1, i32 4) - %7 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 0 - %8 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 1 - %9 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 2 - %10 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %6, 3 - %11 = insertvalue { [4 x <2 x i32>] } undef, <2 x i32> %7, 0, 0 - %12 = insertvalue { [4 x <2 x i32>] } %11, <2 x i32> %8, 0, 1 - %13 = insertvalue { [4 x <2 x i32>] } %12, <2 x i32> %9, 0, 2 - %14 = insertvalue { [4 x <2 x i32>] } %13, <2 x i32> %10, 0, 3 - %tmp1 = getelementptr i32* %a, i32 4 - store i32* %tmp1, i32** %ptr - ret { [4 x <2 x i32>] } %14 -} - -define { [4 x <2 x double>] } @test_vld4_lane_reg_update(double* readonly %a, [4 x <2 x double>] %b, double** %ptr, i32 %inc) { -; CHECK-LABEL: test_vld4_lane_reg_update -; CHECK: ld4 { v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d }[1], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [4 x <2 x double>] %b, 0 - %2 = extractvalue [4 x <2 x double>] %b, 1 - %3 = extractvalue [4 x <2 x double>] %b, 2 - %4 = extractvalue [4 x <2 x double>] %b, 3 - %5 = bitcast double* %a to i8* - %6 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8) - %7 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 0 - %8 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 1 - %9 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 2 - %10 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %6, 3 - %11 = insertvalue { [4 x <2 x double>] } undef, <2 x double> %7, 0, 0 - %12 = insertvalue { [4 x <2 x double>] } %11, <2 x double> %8, 0, 1 - %13 = insertvalue { [4 x <2 x double>] } %12, <2 x double> %9, 0, 2 - %14 = insertvalue { [4 x <2 x double>] } %13, <2 x double> %10, 0, 3 - %tmp1 = getelementptr double* %a, i32 %inc - store double* %tmp1, double** %ptr - ret { [4 x <2 x double>] } %14 -} - -define void @test_vst2_lane_fx_update(i8* %a, [2 x <8 x i8>] %b, i8** %ptr) { -; CHECK-LABEL: test_vst2_lane_fx_update -; CHECK: st2 { v{{[0-9]+}}.b, v{{[0-9]+}}.b }[7], [x{{[0-9]+|sp}}], #2 - %1 = extractvalue [2 x <8 x i8>] %b, 0 - %2 = extractvalue [2 x <8 x i8>] %b, 1 - call void @llvm.arm.neon.vst2lane.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 7, i32 1) - %tmp1 = getelementptr i8* %a, i32 2 - store i8* %tmp1, i8** %ptr - ret void -} - -define void @test_vst2_lane_reg_update(i32* %a, [2 x <2 x i32>] %b.coerce, i32** %ptr, i32 %inc) { -; CHECK-LABEL: test_vst2_lane_reg_update -; CHECK: st2 { v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [2 x <2 x i32>] %b.coerce, 0 - %2 = extractvalue [2 x <2 x i32>] %b.coerce, 1 - %3 = bitcast i32* %a to i8* - tail call void @llvm.arm.neon.vst2lane.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 1, i32 4) - %tmp1 = getelementptr i32* %a, i32 %inc - store i32* %tmp1, i32** %ptr - ret void -} - -define void @test_vst3_lane_fx_update(float* %a, [3 x <4 x float>] %b, float** %ptr) { -; CHECK-LABEL: test_vst3_lane_fx_update -; CHECK: st3 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[3], [x{{[0-9]+|sp}}], #12 - %1 = extractvalue [3 x <4 x float>] %b, 0 - %2 = extractvalue [3 x <4 x float>] %b, 1 - %3 = extractvalue [3 x <4 x float>] %b, 2 - %4 = bitcast float* %a to i8* - call void @llvm.arm.neon.vst3lane.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 3, i32 4) - %tmp1 = getelementptr float* %a, i32 3 - store float* %tmp1, float** %ptr - ret void -} - -; Function Attrs: nounwind -define void @test_vst3_lane_reg_update(i16* %a, [3 x <4 x i16>] %b, i16** %ptr, i32 %inc) { -; CHECK-LABEL: test_vst3_lane_reg_update -; CHECK: st3 { v{{[0-9]+}}.h, v{{[0-9]+}}.h, v{{[0-9]+}}.h }[3], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [3 x <4 x i16>] %b, 0 - %2 = extractvalue [3 x <4 x i16>] %b, 1 - %3 = extractvalue [3 x <4 x i16>] %b, 2 - %4 = bitcast i16* %a to i8* - tail call void @llvm.arm.neon.vst3lane.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 3, i32 2) - %tmp1 = getelementptr i16* %a, i32 %inc - store i16* %tmp1, i16** %ptr - ret void -} - -define void @test_vst4_lane_fx_update(double* %a, [4 x <2 x double>] %b.coerce, double** %ptr) { -; CHECK-LABEL: test_vst4_lane_fx_update -; CHECK: st4 { v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d, v{{[0-9]+}}.d }[1], [x{{[0-9]+|sp}}], #32 - %1 = extractvalue [4 x <2 x double>] %b.coerce, 0 - %2 = extractvalue [4 x <2 x double>] %b.coerce, 1 - %3 = extractvalue [4 x <2 x double>] %b.coerce, 2 - %4 = extractvalue [4 x <2 x double>] %b.coerce, 3 - %5 = bitcast double* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 1, i32 8) - %tmp1 = getelementptr double* %a, i32 4 - store double* %tmp1, double** %ptr - ret void -} - - -define void @test_vst4_lane_reg_update(float* %a, [4 x <2 x float>] %b.coerce, float** %ptr, i32 %inc) { -; CHECK-LABEL: test_vst4_lane_reg_update -; CHECK: st4 { v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s, v{{[0-9]+}}.s }[1], [x{{[0-9]+|sp}}], x{{[0-9]+}} - %1 = extractvalue [4 x <2 x float>] %b.coerce, 0 - %2 = extractvalue [4 x <2 x float>] %b.coerce, 1 - %3 = extractvalue [4 x <2 x float>] %b.coerce, 2 - %4 = extractvalue [4 x <2 x float>] %b.coerce, 3 - %5 = bitcast float* %a to i8* - tail call void @llvm.arm.neon.vst4lane.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 1, i32 4) - %tmp1 = getelementptr float* %a, i32 %inc - store float* %tmp1, float** %ptr - ret void -} - -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) -declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32, i32) diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll deleted file mode 100644 index 5615e3c836..0000000000 --- a/test/CodeGen/AArch64/neon-simd-shift.ll +++ /dev/null @@ -1,1557 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has separate copy of parts that aren't pure intrinsic wrangling. - -define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) { -; CHECK: test_vshr_n_s8 -; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - ret <8 x i8> %vshr_n -} - -define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) { -; CHECK: test_vshr_n_s16 -; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> - ret <4 x i16> %vshr_n -} - -define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) { -; CHECK: test_vshr_n_s32 -; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3> - ret <2 x i32> %vshr_n -} - -define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { -; CHECK: test_vshrq_n_s8 -; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - ret <16 x i8> %vshr_n -} - -define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { -; CHECK: test_vshrq_n_s16 -; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - ret <8 x i16> %vshr_n -} - -define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { -; CHECK: test_vshrq_n_s32 -; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> - ret <4 x i32> %vshr_n -} - -define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) { -; CHECK: test_vshrq_n_s64 -; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3> - ret <2 x i64> %vshr_n -} - -define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) { -; CHECK: test_vshr_n_u8 -; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - ret <8 x i8> %vshr_n -} - -define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) { -; CHECK: test_vshr_n_u16 -; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> - ret <4 x i16> %vshr_n -} - -define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) { -; CHECK: test_vshr_n_u32 -; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3> - ret <2 x i32> %vshr_n -} - -define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { -; CHECK: test_vshrq_n_u8 -; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - ret <16 x i8> %vshr_n -} - -define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { -; CHECK: test_vshrq_n_u16 -; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - ret <8 x i16> %vshr_n -} - -define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { -; CHECK: test_vshrq_n_u32 -; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> - ret <4 x i32> %vshr_n -} - -define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) { -; CHECK: test_vshrq_n_u64 -; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3> - ret <2 x i64> %vshr_n -} - -define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsra_n_s8 -; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - %1 = add <8 x i8> %vsra_n, %a - ret <8 x i8> %1 -} - -define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsra_n_s16 -; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> - %1 = add <4 x i16> %vsra_n, %a - ret <4 x i16> %1 -} - -define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsra_n_s32 -; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3> - %1 = add <2 x i32> %vsra_n, %a - ret <2 x i32> %1 -} - -define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsraq_n_s8 -; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - %1 = add <16 x i8> %vsra_n, %a - ret <16 x i8> %1 -} - -define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsraq_n_s16 -; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - %1 = add <8 x i16> %vsra_n, %a - ret <8 x i16> %1 -} - -define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsraq_n_s32 -; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> - %1 = add <4 x i32> %vsra_n, %a - ret <4 x i32> %1 -} - -define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsraq_n_s64 -; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3> - %1 = add <2 x i64> %vsra_n, %a - ret <2 x i64> %1 -} - -define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsra_n_u8 -; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - %1 = add <8 x i8> %vsra_n, %a - ret <8 x i8> %1 -} - -define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsra_n_u16 -; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> - %1 = add <4 x i16> %vsra_n, %a - ret <4 x i16> %1 -} - -define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsra_n_u32 -; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3> - %1 = add <2 x i32> %vsra_n, %a - ret <2 x i32> %1 -} - -define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsraq_n_u8 -; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> - %1 = add <16 x i8> %vsra_n, %a - ret <16 x i8> %1 -} - -define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsraq_n_u16 -; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - %1 = add <8 x i16> %vsra_n, %a - ret <8 x i16> %1 -} - -define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsraq_n_u32 -; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> - %1 = add <4 x i32> %vsra_n, %a - ret <4 x i32> %1 -} - -define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsraq_n_u64 -; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3> - %1 = add <2 x i64> %vsra_n, %a - ret <2 x i64> %1 -} - -define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) { -; CHECK: test_vrshr_n_s8 -; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3) - ret <8 x i8> %vrshr_n -} - - -define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) { -; CHECK: test_vrshr_n_s16 -; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3) - ret <4 x i16> %vrshr_n -} - - -define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) { -; CHECK: test_vrshr_n_s32 -; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3) - ret <2 x i32> %vrshr_n -} - - -define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) { -; CHECK: test_vrshrq_n_s8 -; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3) - ret <16 x i8> %vrshr_n -} - - -define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) { -; CHECK: test_vrshrq_n_s16 -; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3) - ret <8 x i16> %vrshr_n -} - - -define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) { -; CHECK: test_vrshrq_n_s32 -; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3) - ret <4 x i32> %vrshr_n -} - - -define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) { -; CHECK: test_vrshrq_n_s64 -; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3) - ret <2 x i64> %vrshr_n -} - - -define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) { -; CHECK: test_vrshr_n_u8 -; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3) - ret <8 x i8> %vrshr_n -} - - -define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) { -; CHECK: test_vrshr_n_u16 -; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3) - ret <4 x i16> %vrshr_n -} - - -define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) { -; CHECK: test_vrshr_n_u32 -; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3) - ret <2 x i32> %vrshr_n -} - - -define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) { -; CHECK: test_vrshrq_n_u8 -; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3) - ret <16 x i8> %vrshr_n -} - - -define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) { -; CHECK: test_vrshrq_n_u16 -; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3) - ret <8 x i16> %vrshr_n -} - - -define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) { -; CHECK: test_vrshrq_n_u32 -; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3) - ret <4 x i32> %vrshr_n -} - - -define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) { -; CHECK: test_vrshrq_n_u64 -; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3) - ret <2 x i64> %vrshr_n -} - - -define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vrsra_n_s8 -; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3) - %vrsra_n = add <8 x i8> %1, %a - ret <8 x i8> %vrsra_n -} - -define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vrsra_n_s16 -; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3) - %vrsra_n = add <4 x i16> %1, %a - ret <4 x i16> %vrsra_n -} - -define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vrsra_n_s32 -; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3) - %vrsra_n = add <2 x i32> %1, %a - ret <2 x i32> %vrsra_n -} - -define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vrsraq_n_s8 -; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3) - %vrsra_n = add <16 x i8> %1, %a - ret <16 x i8> %vrsra_n -} - -define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsraq_n_s16 -; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3) - %vrsra_n = add <8 x i16> %1, %a - ret <8 x i16> %vrsra_n -} - -define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsraq_n_s32 -; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3) - %vrsra_n = add <4 x i32> %1, %a - ret <4 x i32> %vrsra_n -} - -define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsraq_n_s64 -; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3) - %vrsra_n = add <2 x i64> %1, %a - ret <2 x i64> %vrsra_n -} - -define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vrsra_n_u8 -; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3) - %vrsra_n = add <8 x i8> %1, %a - ret <8 x i8> %vrsra_n -} - -define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vrsra_n_u16 -; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3) - %vrsra_n = add <4 x i16> %1, %a - ret <4 x i16> %vrsra_n -} - -define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vrsra_n_u32 -; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3) - %vrsra_n = add <2 x i32> %1, %a - ret <2 x i32> %vrsra_n -} - -define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vrsraq_n_u8 -; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3) - %vrsra_n = add <16 x i8> %1, %a - ret <16 x i8> %vrsra_n -} - -define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vrsraq_n_u16 -; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3) - %vrsra_n = add <8 x i16> %1, %a - ret <8 x i16> %vrsra_n -} - -define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vrsraq_n_u32 -; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3) - %vrsra_n = add <4 x i32> %1, %a - ret <4 x i32> %vrsra_n -} - -define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vrsraq_n_u64 -; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3) - %vrsra_n = add <2 x i64> %1, %a - ret <2 x i64> %vrsra_n -} - -define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsri_n_s8 -; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsri_n -} - - -define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsri_n_s16 -; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3) - ret <4 x i16> %vsri -} - - -define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsri_n_s32 -; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3) - ret <2 x i32> %vsri -} - - -define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsriq_n_s8 -; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsri_n -} - - -define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsriq_n_s16 -; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) - ret <8 x i16> %vsri -} - - -define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsriq_n_s32 -; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3) - ret <4 x i32> %vsri -} - - -define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsriq_n_s64 -; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3) - ret <2 x i64> %vsri -} - -define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsri_n_p8 -; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsri_n -} - -define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsri_n_p16 -; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 - %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15) - ret <4 x i16> %vsri -} - -define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsriq_n_p8 -; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsri_n -} - -define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsriq_n_p16 -; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 - %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15) - ret <8 x i16> %vsri -} - -define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsli_n_s8 -; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsli_n -} - -define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsli_n_s16 -; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3) - ret <4 x i16> %vsli -} - -define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vsli_n_s32 -; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3) - ret <2 x i32> %vsli -} - -define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsliq_n_s8 -; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsli_n -} - -define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsliq_n_s16 -; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) - ret <8 x i16> %vsli -} - -define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vsliq_n_s32 -; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3) - ret <4 x i32> %vsli -} - -define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vsliq_n_s64 -; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3) - ret <2 x i64> %vsli -} - -define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vsli_n_p8 -; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) - ret <8 x i8> %vsli_n -} - -define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vsli_n_p16 -; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 - %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15) - ret <4 x i16> %vsli -} - -define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vsliq_n_p8 -; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) - ret <16 x i8> %vsli_n -} - -define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vsliq_n_p16 -; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 - %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15) - ret <8 x i16> %vsli -} - -define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { -; CHECK: test_vqshl_n_s8 -; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) - ret <8 x i8> %vqshl -} - - -define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { -; CHECK: test_vqshl_n_s16 -; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>) - ret <4 x i16> %vqshl -} - - -define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { -; CHECK: test_vqshl_n_s32 -; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>) - ret <2 x i32> %vqshl -} - - -define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { -; CHECK: test_vqshlq_n_s8 -; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) - ret <16 x i8> %vqshl_n -} - - -define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { -; CHECK: test_vqshlq_n_s16 -; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) - ret <8 x i16> %vqshl -} - - -define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { -; CHECK: test_vqshlq_n_s32 -; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) - ret <4 x i32> %vqshl -} - - -define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { -; CHECK: test_vqshlq_n_s64 -; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>) - ret <2 x i64> %vqshl -} - - -define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { -; CHECK: test_vqshl_n_u8 -; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) - ret <8 x i8> %vqshl_n -} - - -define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { -; CHECK: test_vqshl_n_u16 -; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>) - ret <4 x i16> %vqshl -} - - -define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { -; CHECK: test_vqshl_n_u32 -; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>) - ret <2 x i32> %vqshl -} - - -define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { -; CHECK: test_vqshlq_n_u8 -; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>) - ret <16 x i8> %vqshl_n -} - - -define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { -; CHECK: test_vqshlq_n_u16 -; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) - ret <8 x i16> %vqshl -} - - -define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { -; CHECK: test_vqshlq_n_u32 -; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>) - ret <4 x i32> %vqshl -} - - -define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { -; CHECK: test_vqshlq_n_u64 -; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>) - ret <2 x i64> %vqshl -} - -define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) { -; CHECK: test_vqshlu_n_s8 -; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 - %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3) - ret <8 x i8> %vqshlu -} - - -define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) { -; CHECK: test_vqshlu_n_s16 -; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 - %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3) - ret <4 x i16> %vqshlu -} - - -define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) { -; CHECK: test_vqshlu_n_s32 -; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 - %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3) - ret <2 x i32> %vqshlu -} - - -define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) { -; CHECK: test_vqshluq_n_s8 -; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 - %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3) - ret <16 x i8> %vqshlu -} - - -define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) { -; CHECK: test_vqshluq_n_s16 -; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 - %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3) - ret <8 x i16> %vqshlu -} - - -define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) { -; CHECK: test_vqshluq_n_s32 -; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 - %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3) - ret <4 x i32> %vqshlu -} - - -define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) { -; CHECK: test_vqshluq_n_s64 -; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 - %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3) - ret <2 x i64> %vqshlu -} - - -define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vshrn_n_s16 -; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - ret <8 x i8> %vshrn_n -} - -define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vshrn_n_s32 -; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - ret <4 x i16> %vshrn_n -} - -define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vshrn_n_s64 -; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %1 = ashr <2 x i64> %a, <i64 19, i64 19> - %vshrn_n = trunc <2 x i64> %1 to <2 x i32> - ret <2 x i32> %vshrn_n -} - -define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) { -; CHECK: test_vshrn_n_u16 -; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - ret <8 x i8> %vshrn_n -} - -define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) { -; CHECK: test_vshrn_n_u32 -; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - ret <4 x i16> %vshrn_n -} - -define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) { -; CHECK: test_vshrn_n_u64 -; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %1 = lshr <2 x i64> %a, <i64 19, i64 19> - %vshrn_n = trunc <2 x i64> %1 to <2 x i32> - ret <2 x i32> %vshrn_n -} - -define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vshrn_high_n_s16 -; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - %2 = bitcast <8 x i8> %a to <1 x i64> - %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> - %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %4 -} - -define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vshrn_high_n_s32 -; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - %2 = bitcast <4 x i16> %a to <1 x i64> - %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> - %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %4 -} - -define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vshrn_high_n_s64 -; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %2 = ashr <2 x i64> %b, <i64 19, i64 19> - %vshrn_n = trunc <2 x i64> %2 to <2 x i32> - %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> - %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %4 -} - -define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vshrn_high_n_u16 -; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> - %vshrn_n = trunc <8 x i16> %1 to <8 x i8> - %2 = bitcast <8 x i8> %a to <1 x i64> - %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> - %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %4 -} - -define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vshrn_high_n_u32 -; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> - %vshrn_n = trunc <4 x i32> %1 to <4 x i16> - %2 = bitcast <4 x i16> %a to <1 x i64> - %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> - %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %4 -} - -define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vshrn_high_n_u64 -; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %2 = lshr <2 x i64> %b, <i64 19, i64 19> - %vshrn_n = trunc <2 x i64> %2 to <2 x i32> - %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> - %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %4 -} - -define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) { -; CHECK: test_vqshrun_n_s16 -; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqshrun -} - - -define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) { -; CHECK: test_vqshrun_n_s32 -; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqshrun -} - -define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) { -; CHECK: test_vqshrun_n_s64 -; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqshrun -} - -define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqshrun_high_n_s16 -; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqshrun_high_n_s32 -; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqshrun_high_n_s64 -; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vrshrn_n_s16 -; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vrshrn -} - - -define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vrshrn_n_s32 -; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vrshrn -} - - -define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vrshrn_n_s64 -; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vrshrn -} - -define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vrshrn_high_n_s16 -; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vrshrn_high_n_s32 -; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vrshrn_high_n_s64 -; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) { -; CHECK: test_vqrshrun_n_s16 -; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqrshrun -} - -define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) { -; CHECK: test_vqrshrun_n_s32 -; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqrshrun -} - -define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) { -; CHECK: test_vqrshrun_n_s64 -; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqrshrun -} - -define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqrshrun_high_n_s16 -; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqrshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqrshrun_high_n_s32 -; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqrshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqrshrun_high_n_s64 -; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqrshrun to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vqshrn_n_s16 -; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqshrn -} - - -define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vqshrn_n_s32 -; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqshrn -} - - -define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vqshrn_n_s64 -; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqshrn -} - - -define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) { -; CHECK: test_vqshrn_n_u16 -; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqshrn -} - - -define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) { -; CHECK: test_vqshrn_n_u32 -; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqshrn -} - - -define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) { -; CHECK: test_vqshrn_n_u64 -; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqshrn -} - - -define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqshrn_high_n_s16 -; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqshrn_high_n_s32 -; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqshrn_high_n_s64 -; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqshrn_high_n_u16 -; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqshrn_high_n_u32 -; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqshrn_high_n_u64 -; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) { -; CHECK: test_vqrshrn_n_s16 -; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqrshrn -} - - -define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) { -; CHECK: test_vqrshrn_n_s32 -; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqrshrn -} - - -define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) { -; CHECK: test_vqrshrn_n_s64 -; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqrshrn -} - - -define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) { -; CHECK: test_vqrshrn_n_u16 -; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3) - ret <8 x i8> %vqrshrn -} - - -define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) { -; CHECK: test_vqrshrn_n_u32 -; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9) - ret <4 x i16> %vqrshrn -} - - -define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) { -; CHECK: test_vqrshrn_n_u64 -; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19) - ret <2 x i32> %vqrshrn -} - - -define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqrshrn_high_n_s16 -; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqrshrn_high_n_s32 -; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqrshrn_high_n_s64 -; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { -; CHECK: test_vqrshrn_high_n_u16 -; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 - %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3) - %1 = bitcast <8 x i8> %a to <1 x i64> - %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> - ret <16 x i8> %3 -} - -define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { -; CHECK: test_vqrshrn_high_n_u32 -; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 - %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9) - %1 = bitcast <4 x i16> %a to <1 x i64> - %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> - ret <8 x i16> %3 -} - -define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { -; CHECK: test_vqrshrn_high_n_u64 -; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 - %1 = bitcast <2 x i32> %a to <1 x i64> - %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19) - %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> - %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> - %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> - ret <4 x i32> %3 -} - -define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) { -; CHECK: test_vcvt_n_f32_s32 -; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31) - ret <2 x float> %vcvt -} - -define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) { -; CHECK: test_vcvtq_n_f32_s32 -; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31) - ret <4 x float> %vcvt -} - -define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) { -; CHECK: test_vcvtq_n_f64_s64 -; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50) - ret <2 x double> %vcvt -} - -define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) { -; CHECK: test_vcvt_n_f32_u32 -; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31) - ret <2 x float> %vcvt -} - -define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) { -; CHECK: test_vcvtq_n_f32_u32 -; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31) - ret <4 x float> %vcvt -} - -define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) { -; CHECK: test_vcvtq_n_f64_u64 -; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50) - ret <2 x double> %vcvt -} - -define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) { -; CHECK: test_vcvt_n_s32_f32 -; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31) - ret <2 x i32> %vcvt -} - -define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) { -; CHECK: test_vcvtq_n_s32_f32 -; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31) - ret <4 x i32> %vcvt -} - -define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) { -; CHECK: test_vcvtq_n_s64_f64 -; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50) - ret <2 x i64> %vcvt -} - -define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) { -; CHECK: test_vcvt_n_u32_f32 -; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 - %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31) - ret <2 x i32> %vcvt -} - -define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) { -; CHECK: test_vcvt_n_u32_f32 -; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 - %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31) - ret <4 x i32> %vcvt -} - -define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) { -; CHECK: test_vcvtq_n_u64_f64 -; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50 - %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50) - ret <2 x i64> %vcvt -} - -declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32) - -declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32) - -declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32) - -declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32) - -declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32) - -declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) - -declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) - -declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) - -declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) - -declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) - -declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) - -declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) - -declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) - -declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) - -declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) - -declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) - -declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) - -declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32) - -declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32) - -declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32) - -declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32) - -declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) - -declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) - -declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) - -declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) - -declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) - -declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) - -declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) - -declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) - -declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) - -declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) - -declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) - -declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) - -define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_n_s64_f64 -; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) - ret <1 x i64> %1 -} - -define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { -; CHECK-LABEL: test_vcvt_n_u64_f64 -; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) - ret <1 x i64> %1 -} - -define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_n_f64_s64 -; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) - ret <1 x double> %1 -} - -define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { -; CHECK-LABEL: test_vcvt_n_f64_u64 -; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 - %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) - ret <1 x double> %1 -} - -declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) -declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) -declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) -declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) diff --git a/test/CodeGen/AArch64/neon-simd-tbl.ll b/test/CodeGen/AArch64/neon-simd-tbl.ll deleted file mode 100644 index 53924923f7..0000000000 --- a/test/CodeGen/AArch64/neon-simd-tbl.ll +++ /dev/null @@ -1,829 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; This test is just intrinsic pumping. arm64 has its own tbl/tbx tests. - -declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8>, <8 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) - -declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8>, <16 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) - -define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtbl1_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl11.i -} - -define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) { -; CHECK: test_vqtbl1_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) - ret <8 x i8> %vtbl1.i -} - -define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl2_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 - %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl17.i -} - -define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl2_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl2.i -} - -define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl3_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) - ret <8 x i8> %vtbl212.i -} - -define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl3_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl3.i -} - -define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl4_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) - ret <8 x i8> %vtbl216.i -} - -define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl4_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl4.i -} - -define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vqtbl1q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %vtbl1.i -} - -define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl2q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl2.i -} - -define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl3q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl3.i -} - -define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl4q_s8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl4.i -} - -define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vtbx1_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx2_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) - ret <8 x i8> %vtbx17.i -} - -define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx3_s8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24> - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx4_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) - ret <8 x i8> %vtbx216.i -} - -define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK: test_vqtbx1_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) - ret <8 x i8> %vtbx1.i -} - -define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx2_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx2.i -} - -define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx3_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx3.i -} - -define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx4_s8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx4.i -} - -define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vqtbx1q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %vtbx1.i -} - -define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx2q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx2.i -} - -define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx3q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx3.i -} - -define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx4q_s8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx4.i -} - -define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtbl1_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl11.i -} - -define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) { -; CHECK: test_vqtbl1_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) - ret <8 x i8> %vtbl1.i -} - -define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl2_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 - %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl17.i -} - -define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl2_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl2.i -} - -define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl3_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) - ret <8 x i8> %vtbl212.i -} - -define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl3_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl3.i -} - -define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl4_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) - ret <8 x i8> %vtbl216.i -} - -define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl4_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl4.i -} - -define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vqtbl1q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %vtbl1.i -} - -define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl2q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl2.i -} - -define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl3q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl3.i -} - -define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl4q_u8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl4.i -} - -define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vtbx1_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx2_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) - ret <8 x i8> %vtbx17.i -} - -define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx3_u8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24> - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx4_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) - ret <8 x i8> %vtbx216.i -} - -define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK: test_vqtbx1_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) - ret <8 x i8> %vtbx1.i -} - -define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx2_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx2.i -} - -define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx3_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx3.i -} - -define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx4_u8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx4.i -} - -define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vqtbx1q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %vtbx1.i -} - -define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx2q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx2.i -} - -define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx3q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx3.i -} - -define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx4q_u8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx4.i -} - -define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vtbl1_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl11.i -} - -define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) { -; CHECK: test_vqtbl1_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %a, <8 x i8> %b) - ret <8 x i8> %vtbl1.i -} - -define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl2_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1 - %vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %b) - ret <8 x i8> %vtbl17.i -} - -define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl2_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl2.i -} - -define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl3_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b) - ret <8 x i8> %vtbl212.i -} - -define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl3_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl3.i -} - -define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vtbl4_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3 - %vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b) - ret <8 x i8> %vtbl216.i -} - -define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) { -; CHECK: test_vqtbl4_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b) - ret <8 x i8> %vtbl4.i -} - -define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vqtbl1q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %vtbl1.i -} - -define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl2q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1 - %vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl2.i -} - -define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl3q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2 - %vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl3.i -} - -define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) { -; CHECK: test_vqtbl4q_p8: -; CHECK: tbl {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0 - %__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1 - %__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2 - %__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3 - %vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b) - ret <16 x i8> %vtbl4.i -} - -define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { -; CHECK: test_vtbx1_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8> - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx2_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1 - %vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c) - ret <8 x i8> %vtbx17.i -} - -define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx3_p8: -; CHECK: tbl {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2 - %vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c) - %0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24> - %1 = sext <8 x i1> %0 to <8 x i8> - %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i) - ret <8 x i8> %vbsl.i -} - -define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vtbx4_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3 - %vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c) - ret <8 x i8> %vtbx216.i -} - -define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) { -; CHECK: test_vqtbx1_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) - ret <8 x i8> %vtbx1.i -} - -define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx2_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx2.i -} - -define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx3_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx3.i -} - -define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) { -; CHECK: test_vqtbx4_p8: -; CHECK: tbx {{v[0-9]+}}.8b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.8b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c) - ret <8 x i8> %vtbx4.i -} - -define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -; CHECK: test_vqtbx1q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) - ret <16 x i8> %vtbx1.i -} - -define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx2q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1 - %vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx2.i -} - -define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx3q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2 - %vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx3.i -} - -define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) { -; CHECK: test_vqtbx4q_p8: -; CHECK: tbx {{v[0-9]+}}.16b, { {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b }, {{v[0-9]+}}.16b -entry: - %__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0 - %__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1 - %__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2 - %__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3 - %vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c) - ret <16 x i8> %vtbx4.i -} - diff --git a/test/CodeGen/AArch64/neon-simd-vget.ll b/test/CodeGen/AArch64/neon-simd-vget.ll deleted file mode 100644 index 93d5e2ad34..0000000000 --- a/test/CodeGen/AArch64/neon-simd-vget.ll +++ /dev/null @@ -1,226 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -; arm64 has its own copy: aarch64-neon-simd-vget.ll - -define <8 x i8> @test_vget_high_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_high_s8: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_high_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_s16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_high_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_high_s32: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_high_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_high_s64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1> - ret <1 x i64> %shuffle.i -} - -define <8 x i8> @test_vget_high_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_high_u8: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_high_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_u16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_high_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_high_u32: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_high_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_high_u64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1> - ret <1 x i64> %shuffle.i -} - -define <1 x i64> @test_vget_high_p64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_high_p64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1> - ret <1 x i64> %shuffle.i -} - -define <4 x i16> @test_vget_high_f16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_f16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - ret <4 x i16> %shuffle.i -} - -define <2 x float> @test_vget_high_f32(<4 x float> %a) { -; CHECK-LABEL: test_vget_high_f32: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3> - ret <2 x float> %shuffle.i -} - -define <8 x i8> @test_vget_high_p8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_high_p8: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_high_p16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_high_p16: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> - ret <4 x i16> %shuffle.i -} - -define <1 x double> @test_vget_high_f64(<2 x double> %a) { -; CHECK-LABEL: test_vget_high_f64: -; CHECK: dup d0, {{v[0-9]+}}.d[1] -entry: - %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1> - ret <1 x double> %shuffle.i -} - -define <8 x i8> @test_vget_low_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_low_s8: -; CHECK: ret -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_low_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_s16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_low_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_low_s32: -; CHECK: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_low_s64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_low_s64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer - ret <1 x i64> %shuffle.i -} - -define <8 x i8> @test_vget_low_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_low_u8: -; CHECK: ret -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_low_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_u16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i16> %shuffle.i -} - -define <2 x i32> @test_vget_low_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vget_low_u32: -; CHECK: ret -entry: - %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> - ret <2 x i32> %shuffle.i -} - -define <1 x i64> @test_vget_low_u64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_low_u64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer - ret <1 x i64> %shuffle.i -} - -define <1 x i64> @test_vget_low_p64(<2 x i64> %a) { -; CHECK-LABEL: test_vget_low_p64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer - ret <1 x i64> %shuffle.i -} - -define <4 x i16> @test_vget_low_f16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_f16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i16> %shuffle.i -} - -define <2 x float> @test_vget_low_f32(<4 x float> %a) { -; CHECK-LABEL: test_vget_low_f32: -; CHECK: ret -entry: - %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> - ret <2 x float> %shuffle.i -} - -define <8 x i8> @test_vget_low_p8(<16 x i8> %a) { -; CHECK-LABEL: test_vget_low_p8: -; CHECK: ret -entry: - %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - ret <8 x i8> %shuffle.i -} - -define <4 x i16> @test_vget_low_p16(<8 x i16> %a) { -; CHECK-LABEL: test_vget_low_p16: -; CHECK: ret -entry: - %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - ret <4 x i16> %shuffle.i -} - -define <1 x double> @test_vget_low_f64(<2 x double> %a) { -; CHECK-LABEL: test_vget_low_f64: -; CHECK: ret -entry: - %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer - ret <1 x double> %shuffle.i -} diff --git a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll b/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll deleted file mode 100644 index 142b0a8bd5..0000000000 --- a/test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s -; not relevant for arm64: <1 x iN> isn't legal - -; This file tests the spill of FPR8/FPR16. The volatile loads/stores force the -; allocator to keep the value live until it's needed. - -%bigtype_v1i8 = type [20 x <1 x i8>] - -define void @spill_fpr8(%bigtype_v1i8* %addr) { -; CHECK-LABEL: spill_fpr8: -; CHECK: 1-byte Folded Spill -; CHECK: 1-byte Folded Reload - %val1 = load volatile %bigtype_v1i8* %addr - %val2 = load volatile %bigtype_v1i8* %addr - store volatile %bigtype_v1i8 %val1, %bigtype_v1i8* %addr - store volatile %bigtype_v1i8 %val2, %bigtype_v1i8* %addr - ret void -} - -%bigtype_v1i16 = type [20 x <1 x i16>] - -define void @spill_fpr16(%bigtype_v1i16* %addr) { -; CHECK-LABEL: spill_fpr16: -; CHECK: 2-byte Folded Spill -; CHECK: 2-byte Folded Reload - %val1 = load volatile %bigtype_v1i16* %addr - %val2 = load volatile %bigtype_v1i16* %addr - store volatile %bigtype_v1i16 %val1, %bigtype_v1i16* %addr - store volatile %bigtype_v1i16 %val2, %bigtype_v1i16* %addr - ret void -} diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll index dbaccacdf5..f15cd24e5d 100644 --- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll +++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s ; A vector TruncStore can not be selected. diff --git a/test/CodeGen/AArch64/neon-v1i1-setcc.ll b/test/CodeGen/AArch64/neon-v1i1-setcc.ll deleted file mode 100644 index 114e44ac8b..0000000000 --- a/test/CodeGen/AArch64/neon-v1i1-setcc.ll +++ /dev/null @@ -1,69 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; arm64 has a separate copy as aarch64-neon-v1i1-setcc.ll - -; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type -; is illegal in AArch64 backend, the legalizer tries to scalarize this node. -; As the v1i64 operands of SETCC are legal types, they will not be scalarized. -; Currently the type legalizer will have an assertion failure as it assumes all -; operands of SETCC have been legalized. -; FIXME: If the algorithm of type scalarization is improved and can legaize -; "v1i1 SETCC" correctly, these test cases are not needed. - -define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) { -; CHECK-LABEL: test_sext_extr_cmp_0: -; CHECK: cmge d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = icmp sge <1 x i64> %v1, %v2 - %2 = extractelement <1 x i1> %1, i32 0 - %vget_lane = sext i1 %2 to i64 - ret i64 %vget_lane -} - -define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) { -; CHECK-LABEL: test_sext_extr_cmp_1: -; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} - %1 = fcmp oeq <1 x double> %v1, %v2 - %2 = extractelement <1 x i1> %1, i32 0 - %vget_lane = sext i1 %2 to i64 - ret i64 %vget_lane -} - -define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_select_v1i1_0: -; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %1 = icmp eq <1 x i64> %v1, %v2 - %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 - ret <1 x i64> %res -} - -define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) { -; CHECK-LABEL: test_select_v1i1_1: -; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %1 = fcmp oeq <1 x double> %v1, %v2 - %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 - ret <1 x i64> %res -} - -define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) { -; CHECK-LABEL: test_select_v1i1_2: -; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} -; CHECK: bsl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b - %1 = icmp eq <1 x i64> %v1, %v2 - %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3 - ret <1 x double> %res -} - -define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { -; CHECK-LABEL: test_br_extr_cmp: -; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}} - %1 = icmp eq <1 x i64> %v1, %v2 - %2 = extractelement <1 x i1> %1, i32 0 - br i1 %2, label %if.end, label %if.then - -if.then: - ret i32 0; - -if.end: - ret i32 1; -} diff --git a/test/CodeGen/AArch64/neon-vector-list-spill.ll b/test/CodeGen/AArch64/neon-vector-list-spill.ll deleted file mode 100644 index 5df0aacb38..0000000000 --- a/test/CodeGen/AArch64/neon-vector-list-spill.ll +++ /dev/null @@ -1,176 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -; arm64 has separate copy as aarch64-neon-vector-list-spill.ll - -; FIXME: We should not generate ld/st for such register spill/fill, because the -; test case seems very simple and the register pressure is not high. If the -; spill/fill algorithm is optimized, this test case may not be triggered. And -; then we can delete it. -define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.DPairReg: -; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0 - %res = extractelement <2 x i32> %vld.extract, i32 1 - ret i32 %res -} - -define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.DTripleReg: -; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 - %res = extractelement <4 x i16> %vld.extract, i32 1 - ret i16 %res -} - -define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.DQuadReg: -; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 - %res = extractelement <4 x i16> %vld.extract, i32 0 - ret i16 %res -} - -define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.QPairReg: -; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 - %res = extractelement <4 x i32> %vld.extract, i32 1 - ret i32 %res -} - -define float @spill.QTripleReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.QTripleReg: -; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 - %res = extractelement <4 x float> %vld3.extract, i32 1 - ret float %res -} - -define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) { -; CHECK-LABEL: spill.QQuadReg: -; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -entry: - %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4) - %cmp = icmp eq i32 %arg2, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: - tail call void @foo() - br label %if.end - -if.end: - %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0 - %res = extractelement <16 x i8> %vld.extract, i32 1 - ret i8 %res -} - -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) - -declare void @foo() - -; FIXME: We should not generate ld/st for such register spill/fill, because the -; test case seems very simple and the register pressure is not high. If the -; spill/fill algorithm is optimized, this test case may not be triggered. And -; then we can delete it. -; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo -define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { - tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) - tail call void @foo() - %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1> - %1 = bitcast <2 x i64> %sv to <8 x i16> - %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> - %3 = mul <8 x i16> %2, %2 - ret <8 x i16> %3 -} - -; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo -define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { - tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) - tail call void @foo() - %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1> - %1 = bitcast <2 x i64> %sv to <8 x i16> - %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> - %3 = mul <8 x i16> %2, %2 - ret <8 x i16> %3 -} - -; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo -define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) { - tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8) - tail call void @foo() - %sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1> - %1 = bitcast <2 x i64> %sv to <8 x i16> - %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> - %3 = mul <8 x i16> %2, %2 - ret <8 x i16> %3 -} - -declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll index 399d1c1123..d2697910e6 100644 --- a/test/CodeGen/AArch64/pic-eh-stubs.ll +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -1,5 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s ; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll index 20e1b30d74..de29b1baa8 100644 --- a/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/test/CodeGen/AArch64/ragreedy-csr.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -regalloc=greedy -regalloc-csr-first-time-cost=15 | FileCheck %s ; This testing case is reduced from 197.parser prune_match function. diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll index 7f3ba7276b..58e0542d84 100644 --- a/test/CodeGen/AArch64/regress-bitcast-formals.ll +++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-apple-ios7.0 -verify-machineinstrs < %s | FileCheck %s ; CallingConv.td requires a bitcast for vector arguments. Make sure we're diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll index a7352d6815..313cdb1bf0 100644 --- a/test/CodeGen/AArch64/regress-f128csel-flags.ll +++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s ; We used to not mark NZCV as being used in the continuation basic-block diff --git a/test/CodeGen/AArch64/regress-fp128-livein.ll b/test/CodeGen/AArch64/regress-fp128-livein.ll index 5c2142aeee..141c0d862f 100644 --- a/test/CodeGen/AArch64/regress-fp128-livein.ll +++ b/test/CodeGen/AArch64/regress-fp128-livein.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB, diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll index 4a6ad55b67..e32ac8458f 100644 --- a/test/CodeGen/AArch64/regress-tail-livereg.ll +++ b/test/CodeGen/AArch64/regress-tail-livereg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s @var = global void()* zeroinitializer diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll index 1f8ad4503c..55c3bcdcdd 100644 --- a/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s --check-prefix CHECK-AARCH64 ; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix CHECK-ARM64 ; When generating DAG selection tables, TableGen used to only flag an @@ -13,7 +12,6 @@ declare void @bar(i8*) define i64 @test_chains() { -; CHECK-AARCH64-LABEL: test_chains: ; CHECK-ARM64-LABEL: test_chains: %locvar = alloca i8 @@ -26,10 +24,6 @@ define i64 @test_chains() { %inc.3 = add i64 %inc.2, 1 %inc.4 = trunc i64 %inc.3 to i8 store i8 %inc.4, i8* %locvar -; CHECK-AARCH64: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]] -; CHECK-AARCH64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 -; CHECK-AARCH64: strb {{w[0-9]+}}, [sp, [[LOCADDR]]] -; CHECK-AARCH64: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]] ; CHECK-ARM64: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]] ; CHECK-ARM64: add {{w[0-9]+}}, {{w[0-9]+}}, #1 @@ -39,6 +33,5 @@ define i64 @test_chains() { %ret.1 = load i8* %locvar %ret.2 = zext i8 %ret.1 to i64 ret i64 %ret.2 -; CHECK-AARCH64: ret ; CHECK-ARM64: ret } diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll index cfd94e1503..cc42b0c9df 100644 --- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll +++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s ; RUN: llc -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s @var = global i32 0 diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll deleted file mode 100644 index 8620ce14e9..0000000000 --- a/test/CodeGen/AArch64/regress-wzr-allocatable.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 - -; Skipping for arm64, there's no evidence it would ever have hit the same -; problem. - -; When WZR wasn't marked as reserved, this function tried to allocate -; it at O0 and then generated an internal fault (mostly incidentally) -; when it discovered that it was already in use for a multiplication. - -; I'm not really convinced this is a good test since it could easily -; stop testing what it does now with no-one any the wiser. However, I -; can't think of a better way to force the allocator to use WZR -; specifically. - -define void @test() nounwind { -entry: - br label %for.cond - -for.cond: ; preds = %for.body, %entry - br i1 undef, label %for.body, label %for.end - -for.body: ; preds = %for.cond - br label %for.cond - -for.end: ; preds = %for.cond - br label %for.cond6 - -for.cond6: ; preds = %for.body9, %for.end - br i1 undef, label %for.body9, label %while.cond30 - -for.body9: ; preds = %for.cond6 - store i16 0, i16* undef, align 2 - %0 = load i32* undef, align 4 - %1 = load i32* undef, align 4 - %mul15 = mul i32 %0, %1 - %add16 = add i32 %mul15, 32768 - %div = udiv i32 %add16, 65535 - %add17 = add i32 %div, 1 - store i32 %add17, i32* undef, align 4 - br label %for.cond6 - -while.cond30: ; preds = %for.cond6 - ret void -} diff --git a/test/CodeGen/AArch64/returnaddr.ll b/test/CodeGen/AArch64/returnaddr.ll index 3f7edcbaa8..b136f044ca 100644 --- a/test/CodeGen/AArch64/returnaddr.ll +++ b/test/CodeGen/AArch64/returnaddr.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i8* @rt0(i32 %x) nounwind readnone { diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll index 21c2688ca7..f06c8ecd28 100644 --- a/test/CodeGen/AArch64/setcc-takes-i32.ll +++ b/test/CodeGen/AArch64/setcc-takes-i32.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -o - %s | FileCheck %s ; Most important point here is that the promotion of the i1 works diff --git a/test/CodeGen/AArch64/sext_inreg.ll b/test/CodeGen/AArch64/sext_inreg.ll deleted file mode 100644 index 7873c6462d..0000000000 --- a/test/CodeGen/AArch64/sext_inreg.ll +++ /dev/null @@ -1,202 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - -; arm64: This test contains much that is unique and valuable. Unfortunately the -; bits that are unique aren't valuable and the bits that are valuable aren't -; unique. (weird ABI types vs bog-standard shifting & extensions). - -; For formal arguments, we have the following vector type promotion, -; v2i8 is promoted to v2i32(f64) -; v2i16 is promoted to v2i32(f64) -; v4i8 is promoted to v4i16(f64) -; v8i1 is promoted to v8i16(f128) - -define <2 x i8> @test_sext_inreg_v2i8i16(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i16 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %1 = sext <2 x i8> %v1 to <2 x i16> - %2 = sext <2 x i8> %v2 to <2 x i16> - %3 = shufflevector <2 x i16> %1, <2 x i16> %2, <2 x i32> <i32 0, i32 2> - %4 = trunc <2 x i16> %3 to <2 x i8> - ret <2 x i8> %4 -} - -define <2 x i8> @test_sext_inreg_v2i8i16_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i16_2 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %a1 = shl <2 x i32> %v1, <i32 24, i32 24> - %a2 = ashr <2 x i32> %a1, <i32 24, i32 24> - %b1 = shl <2 x i32> %v2, <i32 24, i32 24> - %b2 = ashr <2 x i32> %b1, <i32 24, i32 24> - %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> <i32 0, i32 2> - %d = trunc <2 x i32> %c to <2 x i8> - ret <2 x i8> %d -} - -define <2 x i8> @test_sext_inreg_v2i8i32(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i32 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %1 = sext <2 x i8> %v1 to <2 x i32> - %2 = sext <2 x i8> %v2 to <2 x i32> - %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> <i32 0, i32 2> - %4 = trunc <2 x i32> %3 to <2 x i8> - ret <2 x i8> %4 -} - -define <2 x i8> @test_sext_inreg_v2i8i64(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i8i64 -; CHECK: ushll v1.2d, v1.2s, #0 -; CHECK: ushll v0.2d, v0.2s, #0 -; CHECK: shl v0.2d, v0.2d, #56 -; CHECK: sshr v0.2d, v0.2d, #56 -; CHECK: shl v1.2d, v1.2d, #56 -; CHECK: sshr v1.2d, v1.2d, #56 - %1 = sext <2 x i8> %v1 to <2 x i64> - %2 = sext <2 x i8> %v2 to <2 x i64> - %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> - %4 = trunc <2 x i64> %3 to <2 x i8> - ret <2 x i8> %4 -} - -define <4 x i8> @test_sext_inreg_v4i8i16(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i8i16 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %1 = sext <4 x i8> %v1 to <4 x i16> - %2 = sext <4 x i8> %v2 to <4 x i16> - %3 = shufflevector <4 x i16> %1, <4 x i16> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> - %4 = trunc <4 x i16> %3 to <4 x i8> - ret <4 x i8> %4 -} - -define <4 x i8> @test_sext_inreg_v4i8i16_2(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i8i16_2 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h - %a1 = shl <4 x i16> %v1, <i16 8, i16 8, i16 8, i16 8> - %a2 = ashr <4 x i16> %a1, <i16 8, i16 8, i16 8, i16 8> - %b1 = shl <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8> - %b2 = ashr <4 x i16> %b1, <i16 8, i16 8, i16 8, i16 8> - %c = shufflevector <4 x i16> %a2, <4 x i16> %b2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> - %d = trunc <4 x i16> %c to <4 x i8> - ret <4 x i8> %d -} - -define <4 x i8> @test_sext_inreg_v4i8i32(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i8i32 -; CHECK: ushll v1.4s, v1.4h, #0 -; CHECK: ushll v0.4s, v0.4h, #0 -; CHECK: shl v0.4s, v0.4s, #24 -; CHECK: sshr v0.4s, v0.4s, #24 -; CHECK: shl v1.4s, v1.4s, #24 -; CHECK: sshr v1.4s, v1.4s, #24 - %1 = sext <4 x i8> %v1 to <4 x i32> - %2 = sext <4 x i8> %v2 to <4 x i32> - %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> - %4 = trunc <4 x i32> %3 to <4 x i8> - ret <4 x i8> %4 -} - -define <8 x i8> @test_sext_inreg_v8i8i16(<8 x i8> %v1, <8 x i8> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v8i8i16 -; CHECK: sshll v0.8h, v0.8b, #0 -; CHECK: sshll v1.8h, v1.8b, #0 - %1 = sext <8 x i8> %v1 to <8 x i16> - %2 = sext <8 x i8> %v2 to <8 x i16> - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> - %4 = trunc <8 x i16> %3 to <8 x i8> - ret <8 x i8> %4 -} - -define <8 x i1> @test_sext_inreg_v8i1i16(<8 x i1> %v1, <8 x i1> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v8i1i16 -; CHECK: ushll v1.8h, v1.8b, #0 -; CHECK: ushll v0.8h, v0.8b, #0 -; CHECK: shl v0.8h, v0.8h, #15 -; CHECK: sshr v0.8h, v0.8h, #15 -; CHECK: shl v1.8h, v1.8h, #15 -; CHECK: sshr v1.8h, v1.8h, #15 - %1 = sext <8 x i1> %v1 to <8 x i16> - %2 = sext <8 x i1> %v2 to <8 x i16> - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> - %4 = trunc <8 x i16> %3 to <8 x i1> - ret <8 x i1> %4 -} - -define <2 x i16> @test_sext_inreg_v2i16i32(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i16i32 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s - %1 = sext <2 x i16> %v1 to <2 x i32> - %2 = sext <2 x i16> %v2 to <2 x i32> - %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> <i32 0, i32 2> - %4 = trunc <2 x i32> %3 to <2 x i16> - ret <2 x i16> %4 -} - -define <2 x i16> @test_sext_inreg_v2i16i32_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i16i32_2 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s - %a1 = shl <2 x i32> %v1, <i32 16, i32 16> - %a2 = ashr <2 x i32> %a1, <i32 16, i32 16> - %b1 = shl <2 x i32> %v2, <i32 16, i32 16> - %b2 = ashr <2 x i32> %b1, <i32 16, i32 16> - %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> <i32 0, i32 2> - %d = trunc <2 x i32> %c to <2 x i16> - ret <2 x i16> %d -} - -define <2 x i16> @test_sext_inreg_v2i16i64(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i16i64 -; CHECK: ushll v1.2d, v1.2s, #0 -; CHECK: ushll v0.2d, v0.2s, #0 -; CHECK: shl v0.2d, v0.2d, #48 -; CHECK: sshr v0.2d, v0.2d, #48 -; CHECK: shl v1.2d, v1.2d, #48 -; CHECK: sshr v1.2d, v1.2d, #48 - %1 = sext <2 x i16> %v1 to <2 x i64> - %2 = sext <2 x i16> %v2 to <2 x i64> - %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> - %4 = trunc <2 x i64> %3 to <2 x i16> - ret <2 x i16> %4 -} - -define <4 x i16> @test_sext_inreg_v4i16i32(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v4i16i32 -; CHECK: sshll v0.4s, v0.4h, #0 -; CHECK: sshll v1.4s, v1.4h, #0 - %1 = sext <4 x i16> %v1 to <4 x i32> - %2 = sext <4 x i16> %v2 to <4 x i32> - %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> - %4 = trunc <4 x i32> %3 to <4 x i16> - ret <4 x i16> %4 -} - -define <2 x i32> @test_sext_inreg_v2i32i64(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone { -; CHECK-LABEL: test_sext_inreg_v2i32i64 -; CHECK: sshll v0.2d, v0.2s, #0 -; CHECK: sshll v1.2d, v1.2s, #0 - %1 = sext <2 x i32> %v1 to <2 x i64> - %2 = sext <2 x i32> %v2 to <2 x i64> - %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2> - %4 = trunc <2 x i64> %3 to <2 x i32> - ret <2 x i32> %4 -} - diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll index a08f8cbd70..85245718af 100644 --- a/test/CodeGen/AArch64/sibling-call.ll +++ b/test/CodeGen/AArch64/sibling-call.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -arm64-load-store-opt=0 | FileCheck %s declare void @callee_stack0() diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll index 1498eb5362..5ba1d8d0a8 100644 --- a/test/CodeGen/AArch64/sincos-expansion.ll +++ b/test/CodeGen/AArch64/sincos-expansion.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s define float @test_sincos_f32(float %f) { diff --git a/test/CodeGen/AArch64/sincospow-vector-expansion.ll b/test/CodeGen/AArch64/sincospow-vector-expansion.ll index baa73a3c71..38c8bb2d5e 100644 --- a/test/CodeGen/AArch64/sincospow-vector-expansion.ll +++ b/test/CodeGen/AArch64/sincospow-vector-expansion.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc -o - %s -verify-machineinstrs -mtriple=arm64-linux-gnu -mattr=+neon | FileCheck %s diff --git a/test/CodeGen/AArch64/stackpointer.ll b/test/CodeGen/AArch64/stackpointer.ll deleted file mode 100644 index 1f20692c8c..0000000000 --- a/test/CodeGen/AArch64/stackpointer.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnueabi | FileCheck %s -; arm64 has a separate copy of this test - -define i64 @get_stack() nounwind { -entry: -; CHECK-LABEL: get_stack: -; CHECK: mov x0, sp - %sp = call i64 @llvm.read_register.i64(metadata !0) - ret i64 %sp -} - -define void @set_stack(i64 %val) nounwind { -entry: -; CHECK-LABEL: set_stack: -; CHECK: mov sp, x0 - call void @llvm.write_register.i64(metadata !0, i64 %val) - ret void -} - -declare i64 @llvm.read_register.i64(metadata) nounwind -declare void @llvm.write_register.i64(metadata, i64) nounwind - -; register unsigned long current_stack_pointer asm("sp"); -; CHECK-NOT: .asciz "sp" -!0 = metadata !{metadata !"sp\00"} diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll index da05848dcc..b3841fac68 100644 --- a/test/CodeGen/AArch64/tail-call.ll +++ b/test/CodeGen/AArch64/tail-call.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -tailcallopt | FileCheck --check-prefix=CHECK-ARM64 %s declare fastcc void @callee_stack0() diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll deleted file mode 100644 index 80ed2181c4..0000000000 --- a/test/CodeGen/AArch64/tls-dynamic-together.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; arm64 has its own copy of this file, copied during implementation. - -; If the .tlsdesccall and blr parts are emitted completely separately (even with -; glue) then LLVM will separate them quite happily (with a spill at O0, hence -; the option). This is definitely wrong, so we make sure they are emitted -; together. - -@general_dynamic_var = external thread_local global i32 - -define i32 @test_generaldynamic() { -; CHECK-LABEL: test_generaldynamic: - - %val = load i32* @general_dynamic_var - ret i32 %val - -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr {{x[0-9]+}} -} diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll deleted file mode 100644 index 0fb84c823b..0000000000 --- a/test/CodeGen/AArch64/tls-dynamics.ll +++ /dev/null @@ -1,121 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s -; arm64 has its own tls-dynamics.ll, copied from this one during implementation. -@general_dynamic_var = external thread_local global i32 - -define i32 @test_generaldynamic() { -; CHECK-LABEL: test_generaldynamic: - - %val = load i32* @general_dynamic_var - ret i32 %val - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 -; CHECK: ldr w0, [x[[TP]], x0] - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -define i32* @test_generaldynamic_addr() { -; CHECK-LABEL: test_generaldynamic_addr: - - ret i32* @general_dynamic_var - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:general_dynamic_var] -; CHECK: .tlsdesccall general_dynamic_var -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 -; CHECK: add x0, [[TP]], x0 - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -@local_dynamic_var = external thread_local(localdynamic) global i32 - -define i32 @test_localdynamic() { -; CHECK-LABEL: test_localdynamic: - - %val = load i32* @local_dynamic_var - ret i32 %val - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: ldr w0, [x0, [[DTP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -define i32* @test_localdynamic_addr() { -; CHECK-LABEL: test_localdynamic_addr: - - ret i32* @local_dynamic_var - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var -; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var - -; CHECK: add x0, x0, [[DTP_OFFSET]] - -; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_ADD_LO12_NC -; CHECK-RELOC-DAG: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_CALL - -} - -; The entire point of the local-dynamic access model is to have a single call to -; the expensive resolver. Make sure we achieve that goal. - -@local_dynamic_var2 = external thread_local(localdynamic) global i32 - -define i32 @test_localdynamic_deduplicate() { -; CHECK-LABEL: test_localdynamic_deduplicate: - - %val = load i32* @local_dynamic_var - %val2 = load i32* @local_dynamic_var2 - - %sum = add i32 %val, %val2 - ret i32 %sum - -; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ -; CHECK-DAG: add x0, x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_ -; CHECK-DAG: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], {{#?}}:tlsdesc_lo12:_TLS_MODULE_BASE_] -; CHECK: .tlsdesccall _TLS_MODULE_BASE_ -; CHECK-NEXT: blr [[CALLEE]] - -; CHECK-NOT: _TLS_MODULE_BASE_ - -; CHECK: ret -} diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll deleted file mode 100644 index 61600380c2..0000000000 --- a/test/CodeGen/AArch64/tls-execs.ll +++ /dev/null @@ -1,64 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s -; arm64 has its own copy of tls-execs.ll, copied from this one during implementation. - -@initial_exec_var = external thread_local(initialexec) global i32 - -define i32 @test_initial_exec() { -; CHECK-LABEL: test_initial_exec: - %val = load i32* @initial_exec_var - -; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var -; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var] -; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 -; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC - - ret i32 %val -} - -define i32* @test_initial_exec_addr() { -; CHECK-LABEL: test_initial_exec_addr: - ret i32* @initial_exec_var - -; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var -; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var] -; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] - -; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC - -} - -@local_exec_var = thread_local(initialexec) global i32 0 - -define i32 @test_local_exec() { -; CHECK-LABEL: test_local_exec: - %val = load i32* @local_exec_var - -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [A,A,0xa0'A',0x92'A'] -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 -; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC - - ret i32 %val -} - -define i32* @test_local_exec_addr() { -; CHECK-LABEL: test_local_exec_addr: - ret i32* @local_exec_var - -; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var -; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var -; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 -; CHECK: add x0, [[TP]], [[TP_OFFSET]] - -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 -; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC -} diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll index b6e2b19fb8..8a2fe26803 100644 --- a/test/CodeGen/AArch64/tst-br.ll +++ b/test/CodeGen/AArch64/tst-br.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s ; We've got the usual issues with LLVM reordering blocks here. The diff --git a/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll b/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll deleted file mode 100644 index 60cc6e40b3..0000000000 --- a/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll +++ /dev/null @@ -1,172 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - | FileCheck %s -; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - | FileCheck %s --check-prefix=BE-STRICT-ALIGN - -;; Check element-aligned 128-bit vector load/store - integer -define <16 x i8> @qwordint (<16 x i8>* %head.v16i8, <8 x i16>* %head.v8i16, <4 x i32>* %head.v4i32, <2 x i64>* %head.v2i64, - <16 x i8>* %tail.v16i8, <8 x i16>* %tail.v8i16, <4 x i32>* %tail.v4i32, <2 x i64>* %tail.v2i64) { -; CHECK-LABEL: qwordint -; CHECK: ld1 { v0.16b }, [x0] -; CHECK: ld1 { v1.8h }, [x1] -; CHECK: ld1 { v2.4s }, [x2] -; CHECK: ld1 { v3.2d }, [x3] -; CHECK: st1 { v0.16b }, [x4] -; CHECK: st1 { v1.8h }, [x5] -; CHECK: st1 { v2.4s }, [x6] -; CHECK: st1 { v3.2d }, [x7] -; BE-STRICT-ALIGN-LABEL: qwordint -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: str -entry: - %val.v16i8 = load <16 x i8>* %head.v16i8, align 1 - %val.v8i16 = load <8 x i16>* %head.v8i16, align 2 - %val.v4i32 = load <4 x i32>* %head.v4i32, align 4 - %val.v2i64 = load <2 x i64>* %head.v2i64, align 8 - store <16 x i8> %val.v16i8, <16 x i8>* %tail.v16i8, align 1 - store <8 x i16> %val.v8i16, <8 x i16>* %tail.v8i16, align 2 - store <4 x i32> %val.v4i32, <4 x i32>* %tail.v4i32, align 4 - store <2 x i64> %val.v2i64, <2 x i64>* %tail.v2i64, align 8 - ret <16 x i8> %val.v16i8 -} - -;; Check element-aligned 128-bit vector load/store - floating point -define <4 x float> @qwordfloat (<4 x float>* %head.v4f32, <2 x double>* %head.v2f64, - <4 x float>* %tail.v4f32, <2 x double>* %tail.v2f64) { -; CHECK-LABEL: qwordfloat -; CHECK: ld1 { v0.4s }, [x0] -; CHECK: ld1 { v1.2d }, [x1] -; CHECK: st1 { v0.4s }, [x2] -; CHECK: st1 { v1.2d }, [x3] -; BE-STRICT-ALIGN-LABEL: qwordfloat -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: str -entry: - %val.v4f32 = load <4 x float>* %head.v4f32, align 4 - %val.v2f64 = load <2 x double>* %head.v2f64, align 8 - store <4 x float> %val.v4f32, <4 x float>* %tail.v4f32, align 4 - store <2 x double> %val.v2f64, <2 x double>* %tail.v2f64, align 8 - ret <4 x float> %val.v4f32 -} - -;; Check element-aligned 64-bit vector load/store - integer -define <8 x i8> @dwordint (<8 x i8>* %head.v8i8, <4 x i16>* %head.v4i16, <2 x i32>* %head.v2i32, <1 x i64>* %head.v1i64, - <8 x i8>* %tail.v8i8, <4 x i16>* %tail.v4i16, <2 x i32>* %tail.v2i32, <1 x i64>* %tail.v1i64) { -; CHECK-LABEL: dwordint -; CHECK: ld1 { v0.8b }, [x0] -; CHECK: ld1 { v1.4h }, [x1] -; CHECK: ld1 { v2.2s }, [x2] -; CHECK: ld1 { v3.1d }, [x3] -; CHECK: st1 { v0.8b }, [x4] -; CHECK: st1 { v1.4h }, [x5] -; CHECK: st1 { v2.2s }, [x6] -; CHECK: st1 { v3.1d }, [x7] -; BE-STRICT-ALIGN-LABEL: dwordint -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ld1 { v1.1d }, [x3] -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: st1 { v1.1d }, [x7] -entry: - %val.v8i8 = load <8 x i8>* %head.v8i8, align 1 - %val.v4i16 = load <4 x i16>* %head.v4i16, align 2 - %val.v2i32 = load <2 x i32>* %head.v2i32, align 4 - %val.v1i64 = load <1 x i64>* %head.v1i64, align 8 - store <8 x i8> %val.v8i8, <8 x i8>* %tail.v8i8 , align 1 - store <4 x i16> %val.v4i16, <4 x i16>* %tail.v4i16, align 2 - store <2 x i32> %val.v2i32, <2 x i32>* %tail.v2i32, align 4 - store <1 x i64> %val.v1i64, <1 x i64>* %tail.v1i64, align 8 - ret <8 x i8> %val.v8i8 -} - -;; Check element-aligned 64-bit vector load/store - floating point -define <2 x float> @dwordfloat (<2 x float>* %head.v2f32, <1 x double>* %head.v1f64, - <2 x float>* %tail.v2f32, <1 x double>* %tail.v1f64) { -; CHECK-LABEL: dwordfloat -; CHECK: ld1 { v0.2s }, [x0] -; CHECK: ld1 { v1.1d }, [x1] -; CHECK: st1 { v0.2s }, [x2] -; CHECK: st1 { v1.1d }, [x3] -; BE-STRICT-ALIGN-LABEL: dwordfloat -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: ld1 { v1.1d }, [x1] -; BE-STRICT-ALIGN: str -; BE-STRICT-ALIGN: st1 { v1.1d }, [x3] -entry: - %val.v2f32 = load <2 x float>* %head.v2f32, align 4 - %val.v1f64 = load <1 x double>* %head.v1f64, align 8 - store <2 x float> %val.v2f32, <2 x float>* %tail.v2f32, align 4 - store <1 x double> %val.v1f64, <1 x double>* %tail.v1f64, align 8 - ret <2 x float> %val.v2f32 -} - -;; Check load/store of 128-bit vectors with less-than 16-byte alignment -define <2 x i64> @align2vi64 (<2 x i64>* %head.byte, <2 x i64>* %head.half, <2 x i64>* %head.word, <2 x i64>* %head.dword, - <2 x i64>* %tail.byte, <2 x i64>* %tail.half, <2 x i64>* %tail.word, <2 x i64>* %tail.dword) { -; CHECK-LABEL: align2vi64 -; CHECK: ld1 { v0.2d }, [x0] -; CHECK: ld1 { v1.2d }, [x1] -; CHECK: ld1 { v2.2d }, [x2] -; CHECK: ld1 { v3.2d }, [x3] -; CHECK: st1 { v0.2d }, [x4] -; CHECK: st1 { v1.2d }, [x5] -; CHECK: st1 { v2.2d }, [x6] -; CHECK: st1 { v3.2d }, [x7] -; BE-STRICT-ALIGN-LABEL: align2vi64 -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -entry: - %val.byte = load <2 x i64>* %head.byte, align 1 - %val.half = load <2 x i64>* %head.half, align 2 - %val.word = load <2 x i64>* %head.word, align 4 - %val.dword = load <2 x i64>* %head.dword, align 8 - store <2 x i64> %val.byte, <2 x i64>* %tail.byte, align 1 - store <2 x i64> %val.half, <2 x i64>* %tail.half, align 2 - store <2 x i64> %val.word, <2 x i64>* %tail.word, align 4 - store <2 x i64> %val.dword, <2 x i64>* %tail.dword, align 8 - ret <2 x i64> %val.byte - } - -;; Check load/store of 64-bit vectors with less-than 8-byte alignment -define <2 x float> @align2vf32 (<2 x float>* %head.byte, <2 x float>* %head.half, <2 x float>* %head.word, <2 x float>* %head.dword, - <2 x float>* %tail.byte, <2 x float>* %tail.half, <2 x float>* %tail.word, <2 x float>* %tail.dword) { -; CHECK-LABEL: align2vf32 -; CHECK: ld1 { v0.2s }, [x0] -; CHECK: ld1 { v1.2s }, [x1] -; CHECK: ld1 { v2.2s }, [x2] -; CHECK: st1 { v0.2s }, [x4] -; CHECK: st1 { v1.2s }, [x5] -; CHECK: st1 { v2.2s }, [x6] -; BE-STRICT-ALIGN-LABEL: align2vf32 -; BE-STRICT-ALIGN: ldrb -; BE-STRICT-ALIGN: ldrh -; BE-STRICT-ALIGN: ldr -; BE-STRICT-ALIGN: strb -; BE-STRICT-ALIGN: strh -; BE-STRICT-ALIGN: str -entry: - %val.byte = load <2 x float>* %head.byte, align 1 - %val.half = load <2 x float>* %head.half, align 2 - %val.word = load <2 x float>* %head.word, align 4 - store <2 x float> %val.byte, <2 x float>* %tail.byte, align 1 - store <2 x float> %val.half, <2 x float>* %tail.half, align 2 - store <2 x float> %val.word, <2 x float>* %tail.word, align 4 - ret <2 x float> %val.byte -} diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll deleted file mode 100644 index 7b85227cbd..0000000000 --- a/test/CodeGen/AArch64/variadic.ll +++ /dev/null @@ -1,241 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s -; arm64 has its own copy of this file, ported during implementation (variadic-aapcs.ll) - -%va_list = type {i8*, i8*, i8*, i32, i32} - -@var = global %va_list zeroinitializer - -declare void @llvm.va_start(i8*) - -define void @test_simple(i32 %n, ...) { -; CHECK-LABEL: test_simple: -; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #112] -; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] -; CHECK: str x7, [x[[GPRBASE]], #48] - -; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK-NOFP: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] -; CHECK-NOFP: str x7, [x[[GPRBASE]], #48] -; CHECK-NOFP-NOT: str q7, -; CHECK-NOFP: str x1, [sp, #[[GPRFROMSP]]] - -; Omit the middle ones - -; CHECK: str q0, [sp] -; CHECK: str x1, [sp, #[[GPRFROMSP]]] -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var - -; CHECK-NOFP-NOT: str q0, [sp] -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: movn [[VR_OFFS:w[0-9]+]], #127 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: movn [[GR_OFFS:w[0-9]+]], #55 -; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #55 -; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56 -; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - - ret void -} - -define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { -; CHECK-LABEL: test_fewargs: -; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #96] -; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] -; CHECK: str x7, [x[[GPRBASE]], #32] - -; CHECK-NOFP: sub sp, sp, #[[STACKSIZE:[0-9]+]] -; CHECK-NOFP-NOT: str q7, -; CHECK-NOFP: mov x[[GPRBASE:[0-9]+]], sp -; CHECK-NOFP: str x7, [x[[GPRBASE]], #24] - -; Omit the middle ones - -; CHECK: str q1, [sp] -; CHECK: str x3, [sp, #[[GPRFROMSP]]] - -; CHECK-NOFP-NOT: str q1, [sp] -; CHECK-NOFP: str x4, [sp] - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: movn [[VR_OFFS:w[0-9]+]], #111 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: movn [[GR_OFFS:w[0-9]+]], #39 -; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40 -; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: movn [[GR_OFFS:w[0-9]+]], #31 -; CHECK-NOFP: str [[GR_OFFS]], [x[[VA_LIST]], #24] -; CHECK-NOFP: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #32 -; CHECK-NOFP: str [[GR_TOP]], [x[[VA_LIST]], #8] -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - - ret void -} - -define void @test_nospare([8 x i64], [8 x float], ...) { -; CHECK-LABEL: test_nospare: - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK-NOT: sub sp, sp -; CHECK: mov [[STACK:x[0-9]+]], sp -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP-NOT: sub sp, sp -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #64 -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - ret void -} - -; If there are non-variadic arguments on the stack (here two i64s) then the -; __stack field should point just past them. -define void @test_offsetstack([10 x i64], [3 x float], ...) { -; CHECK-LABEL: test_offsetstack: -; CHECK: sub sp, sp, #80 -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #64] - -; CHECK-NOT: str x{{[0-9]+}}, - -; CHECK-NOFP-NOT: str q7, -; CHECK-NOT: str x7, - -; Omit the middle ones - -; CHECK: str q3, [sp] - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: movn [[VR_OFFS:w[0-9]+]], #79 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: str wzr, [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; CHECK: add [[STACK:x[0-9]+]], sp, #96 -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #40 -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24] - ret void -} - -declare void @llvm.va_end(i8*) - -define void @test_va_end() nounwind { -; CHECK-LABEL: test_va_end: -; CHECK-NEXT: BB#0 -; CHECK-NOFP: BB#0 - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_end(i8* %addr) - - ret void -; CHECK-NEXT: ret -; CHECK-NOFP-NEXT: ret -} - -declare void @llvm.va_copy(i8* %dest, i8* %src) - -@second_list = global %va_list zeroinitializer - -define void @test_va_copy() { -; CHECK-LABEL: test_va_copy: - %srcaddr = bitcast %va_list* @var to i8* - %dstaddr = bitcast %va_list* @second_list to i8* - call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr) - -; Check beginning and end again: - -; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list -; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] -; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] -; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] -; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24] - -; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list -; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24] -; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list] -; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24] - - ret void -; CHECK: ret -; CHECK-NOFP: ret -} - -%struct.s_3i = type { i32, i32, i32 } - -; This checks that, if the last named argument is not a multiple of 8 bytes, -; and is allocated on the stack, that __va_list.__stack is initialised to the -; first 8-byte aligned location above it. -define void @test_va_odd_struct_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, [1 x i64], %struct.s_3i* byval nocapture readnone align 4 %h, ...) { -; CHECK-LABEL: test_va_odd_struct_on_stack: - -; CHECK: sub sp, sp, #128 -; CHECK: mov x[[FPRBASE:[0-9]+]], sp -; CHECK: str q7, [x[[FPRBASE]], #112] - -; CHECK-NOT: str x{{[0-9]+}}, - -; CHECK-NOFP-NOT: str q7, -; CHECK-NOT: str x7, - -; Omit the middle ones - -; CHECK: str q0, [sp] - - %addr = bitcast %va_list* @var to i8* - call void @llvm.va_start(i8* %addr) -; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; CHECK: movn [[VR_OFFS:w[0-9]+]], #127 -; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] -; CHECK: str wzr, [x[[VA_LIST]], #24] -; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128 -; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] -; This constant would be #140 if it was not 8-byte aligned -; CHECK: add [[STACK:x[0-9]+]], sp, #144 -; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] - -; CHECK-NOFP: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var -; This constant would be #12 if it was not 8-byte aligned -; CHECK-NOFP: add [[STACK:x[0-9]+]], sp, #16 -; CHECK-NOFP: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #28] -; CHECK-NOFP: str wzr, [x[[VA_LIST]], #24] - ret void -} diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll index c4073cba08..44072c67d9 100644 --- a/test/CodeGen/AArch64/zero-reg.ll +++ b/test/CodeGen/AArch64/zero-reg.ll @@ -1,4 +1,3 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-linux-gnu | FileCheck %s @var32 = global i32 0 diff --git a/test/DebugInfo/AArch64/cfi-frame.ll b/test/DebugInfo/AArch64/cfi-frame.ll deleted file mode 100644 index 7290ddf357..0000000000 --- a/test/DebugInfo/AArch64/cfi-frame.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-WITH-FP - -@bigspace = global [8 x i64] zeroinitializer - -declare void @use_addr(i8*) - -define void @test_frame([8 x i64] %val) { -; CHECK: test_frame: -; CHECK: .cfi_startproc - - %var = alloca i8, i32 1000000 -; CHECK: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]] -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]] - -; Make sure the prologue is reasonably efficient -; CHECK-NEXT: stp x29, x30, [sp, -; CHECK-NEXT: stp x25, x26, [sp, -; CHECK-NEXT: stp x23, x24, [sp, -; CHECK-NEXT: stp x21, x22, [sp, -; CHECK-NEXT: stp x19, x20, [sp, -; CHECK-NEXT: sub sp, sp, #160 -; CHECK-NEXT: sub sp, sp, #244, lsl #12 -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_def_cfa sp, 1000080 -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_offset x30, -8 -; CHECK-NEXT: .Ltmp -; CHECK-NEXT: .cfi_offset x29, -16 -; [...] -; CHECK: .cfi_offset x19, -80 - -; CHECK: bl use_addr - call void @use_addr(i8* %var) - - store [8 x i64] %val, [8 x i64]* @bigspace - ret void -; CHECK: ret -; CHECK: .cfi_endproc -} - -; CHECK-WITH-FP: test_frame: - -; CHECK-WITH-FP: sub sp, sp, #[[SP_INIT_ADJ:[0-9]+]] -; CHECK-WITH-FP-NEXT: .Ltmp -; CHECK-WITH-FP-NEXT: .cfi_def_cfa sp, [[SP_INIT_ADJ]] - -; CHECK-WITH-FP: stp x29, x30, [sp, [[OFFSET:#[0-9]+]]] -; CHECK-WITH-FP-NEXT: add x29, sp, [[OFFSET]] -; CHECK-WITH-FP-NEXT: .Ltmp -; CHECK-WITH-FP-NEXT: .cfi_def_cfa x29, 16 - - ; We shouldn't emit any kind of update for the second stack adjustment if the - ; FP is in use. -; CHECK-WITH-FP-NOT: .cfi_def_cfa_offset - -; CHECK-WITH-FP: bl use_addr diff --git a/test/DebugInfo/AArch64/lit.local.cfg b/test/DebugInfo/AArch64/lit.local.cfg index 9a66a00189..a75a42b6f7 100644 --- a/test/DebugInfo/AArch64/lit.local.cfg +++ b/test/DebugInfo/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if not 'AArch64' in targets: +if not 'ARM64' in targets: config.unsupported = True diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll deleted file mode 100644 index 9f432d9f2c..0000000000 --- a/test/DebugInfo/AArch64/variable-loc.ll +++ /dev/null @@ -1,94 +0,0 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim -filetype=obj < %s \ -; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=DEBUG %s - -; This is a regression test making sure the location of variables is correct in -; debugging information, even if they're addressed via the frame pointer. - -; In case it needs, regenerating, the following suffices: -; int printf(const char *, ...); -; void populate_array(int *, int); -; int sum_array(int *, int); - -; int main() { -; int main_arr[100], val; -; populate_array(main_arr, 100); -; val = sum_array(main_arr, 100); -; printf("Total is %d\n", val); -; return 0; -; } - - ; First make sure main_arr is where we expect it: sp + 4 == x29 - 412: -; CHECK: main: -; CHECK: sub sp, sp, #432 -; CHECK: stp x29, x30, [sp, #416] -; CHECK: add x29, sp, #416 -; CHECK: add {{x[0-9]+}}, sp, #4 - -; DEBUG: DW_TAG_variable -; DEBUG-NEXT: DW_AT_name {{.*}} "main_arr" -; Rather hard-coded, but 0x91 => DW_OP_fbreg and 0xe47c is LEB128 encoded -412. -; DEBUG: DW_AT_location {{.*}}(<0x3> 91 e4 7c ) - -target datalayout = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-f128:128:128-n32:64-S128" -target triple = "aarch64-none-linux-gnu" - -@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 1 - -declare void @populate_array(i32*, i32) nounwind - -declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone - -declare i32 @sum_array(i32*, i32) nounwind - -define i32 @main() nounwind { -entry: - %retval = alloca i32, align 4 - %main_arr = alloca [100 x i32], align 4 - %val = alloca i32, align 4 - store i32 0, i32* %retval - call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22 - call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24 - %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25 - call void @populate_array(i32* %arraydecay, i32 100), !dbg !25 - %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26 - %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26 - store i32 %call, i32* %val, align 4, !dbg !26 - %0 = load i32* %val, align 4, !dbg !27 - %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27 - ret i32 0, !dbg !28 -} - -declare i32 @printf(i8*, ...) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!30} - -!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99] -!1 = metadata !{} -!3 = metadata !{metadata !5, metadata !11, metadata !14} -!5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array] -!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!8 = metadata !{null, metadata !9, metadata !10} -!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] -!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array] -!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!13 = metadata !{metadata !10, metadata !9, metadata !10} -!14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main] -!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!16 = metadata !{metadata !10} -!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19] -!18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c] -!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int] -!20 = metadata !{i32 786465, i64 0, i64 99} ; [ DW_TAG_subrange_type ] [0, 99] -!22 = metadata !{i32 19, i32 7, metadata !18, null} -!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20] -!24 = metadata !{i32 20, i32 7, metadata !18, null} -!25 = metadata !{i32 22, i32 3, metadata !18, null} -!26 = metadata !{i32 23, i32 9, metadata !18, null} -!27 = metadata !{i32 24, i32 3, metadata !18, null} -!28 = metadata !{i32 26, i32 3, metadata !18, null} -!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"} -!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s index 6c7fbf5b87..03b930d539 100644 --- a/test/MC/AArch64/adrp-relocation.s +++ b/test/MC/AArch64/adrp-relocation.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s // RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj -o - %s| llvm-readobj -r - | FileCheck %s .text // These should produce an ADRP/ADD pair to calculate the address of diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s index 213dc00f0a..c6cb6b01f8 100644 --- a/test/MC/AArch64/basic-a64-diagnostics.s +++ b/test/MC/AArch64/basic-a64-diagnostics.s @@ -1,5 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-AARCH64 < %t %s // RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ERROR-ARM64 < %t %s diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s index 9a4ec81aae..72156bc9c5 100644 --- a/test/MC/AArch64/basic-a64-instructions.s +++ b/test/MC/AArch64/basic-a64-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 // RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding -mattr=+fp-armv8 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 .globl _func @@ -128,7 +127,6 @@ _func: // CHECK: adds w19, w17, w1, uxtx // encoding: [0x33,0x62,0x21,0x2b] // CHECK: adds w2, w5, w1, sxtb #1 // encoding: [0xa2,0x84,0x21,0x2b] // CHECK: adds w26, wsp, w19, sxth // encoding: [0xfa,0xa3,0x33,0x2b] -// CHECK-AARCH64: adds wzr, w2, w3, sxtw // encoding: [0x5f,0xc0,0x23,0x2b] // CHECK-ARM64: cmn w2, w3, sxtw // encoding: [0x5f,0xc0,0x23,0x2b] // CHECK: adds w2, w3, w5, sxtx // encoding: [0x62,0xe0,0x25,0x2b] @@ -257,7 +255,6 @@ _func: // CHECK: sub sp, x3, x7, lsl #4 // encoding: [0x7f,0x70,0x27,0xcb] // CHECK: add w2, wsp, w3, lsl #1 // encoding: [0xe2,0x47,0x23,0x0b] // CHECK: cmp wsp, w9 // encoding: [0xff,0x43,0x29,0x6b] -// CHECK-AARCH64: adds wzr, wsp, w3, lsl #4 // encoding: [0xff,0x53,0x23,0x2b] // CHECK-ARM64: cmn wsp, w3, lsl #4 // encoding: [0xff,0x53,0x23,0x2b] // CHECK: subs x3, sp, x9, lsl #2 // encoding: [0xe3,0x6b,0x29,0xeb] @@ -352,8 +349,6 @@ _func: // A relocation check (default to lo12, which is the only sane relocation anyway really) add x0, x4, #:lo12:var -// CHECK-AARCH64: add x0, x4, #:lo12:var // encoding: [0x80'A',A,A,0x91'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:var, kind: fixup_a64_add_lo12 // CHECK-ARM64: add x0, x4, :lo12:var // encoding: [0x80,0bAAAAAA00,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :lo12:var, kind: fixup_arm64_add_imm12 @@ -489,7 +484,6 @@ _func: sub w4, w6, wzr // CHECK: sub w3, w5, w7 // encoding: [0xa3,0x00,0x07,0x4b] // CHECK: sub wzr, w3, w5 // encoding: [0x7f,0x00,0x05,0x4b] -// CHECK-AARCH64: sub w20, wzr, w4 // encoding: [0xf4,0x03,0x04,0x4b] // CHECK-ARM64: neg w20, w4 // encoding: [0xf4,0x03,0x04,0x4b] // CHECK: sub w4, w6, wzr // encoding: [0xc4,0x00,0x1f,0x4b] @@ -520,7 +514,6 @@ _func: sub x4, x6, xzr // CHECK: sub x3, x5, x7 // encoding: [0xa3,0x00,0x07,0xcb] // CHECK: sub xzr, x3, x5 // encoding: [0x7f,0x00,0x05,0xcb] -// CHECK-AARCH64: sub x20, xzr, x4 // encoding: [0xf4,0x03,0x04,0xcb] // CHECK-ARM64: neg x20, x4 // encoding: [0xf4,0x03,0x04,0xcb] // CHECK: sub x4, x6, xzr // encoding: [0xc4,0x00,0x1f,0xcb] @@ -551,7 +544,6 @@ _func: subs w4, w6, wzr // CHECK: subs w3, w5, w7 // encoding: [0xa3,0x00,0x07,0x6b] // CHECK: {{subs wzr,|cmp}} w3, w5 // encoding: [0x7f,0x00,0x05,0x6b] -// CHECK-AARCH64: subs w20, wzr, w4 // encoding: [0xf4,0x03,0x04,0x6b] // CHECK-ARM64: negs w20, w4 // encoding: [0xf4,0x03,0x04,0x6b] // CHECK: subs w4, w6, wzr // encoding: [0xc4,0x00,0x1f,0x6b] @@ -582,7 +574,6 @@ _func: subs x4, x6, xzr // CHECK: subs x3, x5, x7 // encoding: [0xa3,0x00,0x07,0xeb] // CHECK: {{subs xzr,|cmp}} x3, x5 // encoding: [0x7f,0x00,0x05,0xeb] -// CHECK-AARCH64: subs x20, xzr, x4 // encoding: [0xf4,0x03,0x04,0xeb] // CHECK-ARM64: negs x20, x4 // encoding: [0xf4,0x03,0x04,0xeb] // CHECK: subs x4, x6, xzr // encoding: [0xc4,0x00,0x1f,0xeb] @@ -722,9 +713,6 @@ _func: neg w29, w30 neg w30, wzr neg wzr, w0 -// CHECK-AARCH64: sub w29, wzr, w30 // encoding: [0xfd,0x03,0x1e,0x4b] -// CHECK-AARCH64: sub w30, wzr, wzr // encoding: [0xfe,0x03,0x1f,0x4b] -// CHECK-AARCH64: sub wzr, wzr, w0 // encoding: [0xff,0x03,0x00,0x4b] // CHECK-ARM64: neg w29, w30 // encoding: [0xfd,0x03,0x1e,0x4b] // CHECK-ARM64: neg w30, wzr // encoding: [0xfe,0x03,0x1f,0x4b] // CHECK-ARM64: neg wzr, w0 // encoding: [0xff,0x03,0x00,0x4b] @@ -732,9 +720,6 @@ _func: neg w28, w27, lsl #0 neg w26, w25, lsl #29 neg w24, w23, lsl #31 -// CHECK-AARCH64: sub w28, wzr, w27 // encoding: [0xfc,0x03,0x1b,0x4b] -// CHECK-AARCH64: neg w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x4b] -// CHECK-AARCH64: neg w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x4b] // CHECK-ARM64: neg w28, w27 // encoding: [0xfc,0x03,0x1b,0x4b] // CHECK-ARM64: neg w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x4b] @@ -757,9 +742,6 @@ _func: neg x29, x30 neg x30, xzr neg xzr, x0 -// CHECK-AARCH64: sub x29, xzr, x30 // encoding: [0xfd,0x03,0x1e,0xcb] -// CHECK-AARCH64: sub x30, xzr, xzr // encoding: [0xfe,0x03,0x1f,0xcb] -// CHECK-AARCH64: sub xzr, xzr, x0 // encoding: [0xff,0x03,0x00,0xcb] // CHECK-ARM64: neg x29, x30 // encoding: [0xfd,0x03,0x1e,0xcb] // CHECK-ARM64: neg x30, xzr // encoding: [0xfe,0x03,0x1f,0xcb] // CHECK-ARM64: neg xzr, x0 // encoding: [0xff,0x03,0x00,0xcb] @@ -767,9 +749,6 @@ _func: neg x28, x27, lsl #0 neg x26, x25, lsl #29 neg x24, x23, lsl #31 -// CHECK-AARCH64: sub x28, xzr, x27 // encoding: [0xfc,0x03,0x1b,0xcb] -// CHECK-AARCH64: neg x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xcb] -// CHECK-AARCH64: neg x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xcb] // CHECK-ARM64: neg x28, x27 // encoding: [0xfc,0x03,0x1b,0xcb] // CHECK-ARM64: neg x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xcb] @@ -792,9 +771,6 @@ _func: negs w29, w30 negs w30, wzr negs wzr, w0 -// CHECK-AARCH64: subs w29, wzr, w30 // encoding: [0xfd,0x03,0x1e,0x6b] -// CHECK-AARCH64: subs w30, wzr, wzr // encoding: [0xfe,0x03,0x1f,0x6b] -// CHECK-AARCH64: subs wzr, wzr, w0 // encoding: [0xff,0x03,0x00,0x6b] // CHECK-ARM64: negs w29, w30 // encoding: [0xfd,0x03,0x1e,0x6b] // CHECK-ARM64: negs w30, wzr // encoding: [0xfe,0x03,0x1f,0x6b] // CHECK-ARM64: cmp wzr, w0 // encoding: [0xff,0x03,0x00,0x6b] @@ -802,9 +778,6 @@ _func: negs w28, w27, lsl #0 negs w26, w25, lsl #29 negs w24, w23, lsl #31 -// CHECK-AARCH64: subs w28, wzr, w27 // encoding: [0xfc,0x03,0x1b,0x6b] -// CHECK-AARCH64: negs w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x6b] -// CHECK-AARCH64: negs w24, w23, lsl #31 // encoding: [0xf8,0x7f,0x17,0x6b] // CHECK-ARM64: negs w28, w27 // encoding: [0xfc,0x03,0x1b,0x6b] // CHECK-ARM64: negs w26, w25, lsl #29 // encoding: [0xfa,0x77,0x19,0x6b] @@ -827,9 +800,6 @@ _func: negs x29, x30 negs x30, xzr negs xzr, x0 -// CHECK-AARCH64: subs x29, xzr, x30 // encoding: [0xfd,0x03,0x1e,0xeb] -// CHECK-AARCH64: subs x30, xzr, xzr // encoding: [0xfe,0x03,0x1f,0xeb] -// CHECK-AARCH64: subs xzr, xzr, x0 // encoding: [0xff,0x03,0x00,0xeb] // CHECK-ARM64: negs x29, x30 // encoding: [0xfd,0x03,0x1e,0xeb] // CHECK-ARM64: negs x30, xzr // encoding: [0xfe,0x03,0x1f,0xeb] // CHECK-ARM64: cmp xzr, x0 // encoding: [0xff,0x03,0x00,0xeb] @@ -837,9 +807,6 @@ _func: negs x28, x27, lsl #0 negs x26, x25, lsl #29 negs x24, x23, lsl #31 -// CHECK-AARCH64: subs x28, xzr, x27 // encoding: [0xfc,0x03,0x1b,0xeb] -// CHECK-AARCH64: negs x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xeb] -// CHECK-AARCH64: negs x24, x23, lsl #31 // encoding: [0xf8,0x7f,0x17,0xeb] // CHECK-ARM64: negs x28, x27 // encoding: [0xfc,0x03,0x1b,0xeb] // CHECK-ARM64: negs x26, x25, lsl #29 // encoding: [0xfa,0x77,0x19,0xeb] @@ -970,10 +937,6 @@ _func: sbfm x3, x4, #63, #63 sbfm wzr, wzr, #31, #31 sbfm w12, w9, #0, #0 -// CHECK-AARCH64: sbfm x1, x2, #3, #4 // encoding: [0x41,0x10,0x43,0x93] -// CHECK-AARCH64: sbfm x3, x4, #63, #63 // encoding: [0x83,0xfc,0x7f,0x93] -// CHECK-AARCH64: sbfm wzr, wzr, #31, #31 // encoding: [0xff,0x7f,0x1f,0x13] -// CHECK-AARCH64: sbfm w12, w9, #0, #0 // encoding: [0x2c,0x01,0x00,0x13] // CHECK-ARM64: sbfx x1, x2, #3, #2 // encoding: [0x41,0x10,0x43,0x93] // CHECK-ARM64: asr x3, x4, #63 // encoding: [0x83,0xfc,0x7f,0x93] @@ -984,10 +947,6 @@ _func: ubfm xzr, x4, #0, #0 ubfm x4, xzr, #63, #5 ubfm x5, x6, #12, #63 -// CHECK-AARCH64: ubfm x4, x5, #12, #10 // encoding: [0xa4,0x28,0x4c,0xd3] -// CHECK-AARCH64: ubfm xzr, x4, #0, #0 // encoding: [0x9f,0x00,0x40,0xd3] -// CHECK-AARCH64: ubfm x4, xzr, #63, #5 // encoding: [0xe4,0x17,0x7f,0xd3] -// CHECK-AARCH64: ubfm x5, x6, #12, #63 // encoding: [0xc5,0xfc,0x4c,0xd3] // CHECK-ARM64: ubfiz x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xd3] // CHECK-ARM64: ubfx xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xd3] // CHECK-ARM64: ubfiz x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xd3] @@ -997,10 +956,6 @@ _func: bfm xzr, x4, #0, #0 bfm x4, xzr, #63, #5 bfm x5, x6, #12, #63 -// CHECK-AARCH64: bfm x4, x5, #12, #10 // encoding: [0xa4,0x28,0x4c,0xb3] -// CHECK-AARCH64: bfm xzr, x4, #0, #0 // encoding: [0x9f,0x00,0x40,0xb3] -// CHECK-AARCH64: bfm x4, xzr, #63, #5 // encoding: [0xe4,0x17,0x7f,0xb3] -// CHECK-AARCH64: bfm x5, x6, #12, #63 // encoding: [0xc5,0xfc,0x4c,0xb3] // CHECK-ARM64: bfi x4, x5, #52, #11 // encoding: [0xa4,0x28,0x4c,0xb3] // CHECK-ARM64: bfxil xzr, x4, #0, #1 // encoding: [0x9f,0x00,0x40,0xb3] // CHECK-ARM64: bfi x4, xzr, #1, #6 // encoding: [0xe4,0x17,0x7f,0xb3] @@ -1063,10 +1018,8 @@ _func: sbfiz xzr, xzr, #10, #11 // CHECK: {{sbfiz|sbfx}} w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x13] // CHECK: sbfiz x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0x93] -// CHECK-AARCH64: sbfiz x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0x93] // CHECK-ARM64: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] // CHECK: sbfiz x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0x93] -// CHECK-AARCH64: sbfiz w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x13] // CHECK-ARM64: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] // CHECK: sbfiz w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x13] // CHECK: sbfiz w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x13] @@ -1081,17 +1034,11 @@ _func: sbfx w13, w14, #29, #3 sbfx xzr, xzr, #10, #11 // CHECK: sbfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x13] -// CHECK-AARCH64: sbfx x2, x3, #63, #1 // encoding: [0x62,0xfc,0x7f,0x93] // CHECK-ARM64: asr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0x93] -// CHECK-AARCH64: sbfx x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0x93] // CHECK-ARM64: asr x19, x20, #0 // encoding: [0x93,0xfe,0x40,0x93] -// CHECK-AARCH64: sbfx x9, x10, #5, #59 // encoding: [0x49,0xfd,0x45,0x93] // CHECK-ARM64: asr x9, x10, #5 // encoding: [0x49,0xfd,0x45,0x93] -// CHECK-AARCH64: sbfx w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x13] // CHECK-ARM64: asr w9, w10, #0 // encoding: [0x49,0x7d,0x00,0x13] -// CHECK-AARCH64: sbfx w11, w12, #31, #1 // encoding: [0x8b,0x7d,0x1f,0x13] // CHECK-ARM64: asr w11, w12, #31 // encoding: [0x8b,0x7d,0x1f,0x13] -// CHECK-AARCH64: sbfx w13, w14, #29, #3 // encoding: [0xcd,0x7d,0x1d,0x13] // CHECK-ARM64: asr w13, w14, #29 // encoding: [0xcd,0x7d,0x1d,0x13] // CHECK: sbfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0x93] @@ -1103,14 +1050,6 @@ _func: bfi w11, w12, #31, #1 bfi w13, w14, #29, #3 bfi xzr, xzr, #10, #11 -// CHECK-AARCH64: bfi w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x33] -// CHECK-AARCH64: bfi x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xb3] -// CHECK-AARCH64: bfi x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xb3] -// CHECK-AARCH64: bfi x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0xb3] -// CHECK-AARCH64: bfi w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x33] -// CHECK-AARCH64: bfi w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x33] -// CHECK-AARCH64: bfi w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x33] -// CHECK-AARCH64: bfi xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xb3] // CHECK-ARM64: bfxil w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x33] // CHECK-ARM64: bfi x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xb3] @@ -1146,14 +1085,6 @@ _func: ubfiz w11, w12, #31, #1 ubfiz w13, w14, #29, #3 ubfiz xzr, xzr, #10, #11 -// CHECK-AARCH64: ubfiz w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] -// CHECK-AARCH64: ubfiz x2, x3, #63, #1 // encoding: [0x62,0x00,0x41,0xd3] -// CHECK-AARCH64: ubfiz x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xd3] -// CHECK-AARCH64: ubfiz x9, x10, #5, #59 // encoding: [0x49,0xe9,0x7b,0xd3] -// CHECK-AARCH64: ubfiz w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x53] -// CHECK-AARCH64: ubfiz w11, w12, #31, #1 // encoding: [0x8b,0x01,0x01,0x53] -// CHECK-AARCH64: ubfiz w13, w14, #29, #3 // encoding: [0xcd,0x09,0x03,0x53] -// CHECK-AARCH64: ubfiz xzr, xzr, #10, #11 // encoding: [0xff,0x2b,0x76,0xd3] // CHECK-ARM64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] // CHECK-ARM64: lsl x2, x3, #63 // encoding: [0x62,0x00,0x41,0xd3] @@ -1172,14 +1103,6 @@ _func: ubfx w11, w12, #31, #1 ubfx w13, w14, #29, #3 ubfx xzr, xzr, #10, #11 -// CHECK-AARCH64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] -// CHECK-AARCH64: ubfx x2, x3, #63, #1 // encoding: [0x62,0xfc,0x7f,0xd3] -// CHECK-AARCH64: ubfx x19, x20, #0, #64 // encoding: [0x93,0xfe,0x40,0xd3] -// CHECK-AARCH64: ubfx x9, x10, #5, #59 // encoding: [0x49,0xfd,0x45,0xd3] -// CHECK-AARCH64: ubfx w9, w10, #0, #32 // encoding: [0x49,0x7d,0x00,0x53] -// CHECK-AARCH64: ubfx w11, w12, #31, #1 // encoding: [0x8b,0x7d,0x1f,0x53] -// CHECK-AARCH64: ubfx w13, w14, #29, #3 // encoding: [0xcd,0x7d,0x1d,0x53] -// CHECK-AARCH64: ubfx xzr, xzr, #10, #11 // encoding: [0xff,0x53,0x4a,0xd3] // CHECK-ARM64: ubfx w9, w10, #0, #1 // encoding: [0x49,0x01,0x00,0x53] // CHECK-ARM64: lsr x2, x3, #63 // encoding: [0x62,0xfc,0x7f,0xd3] @@ -1197,14 +1120,6 @@ _func: cbz x5, lbl cbnz x2, lbl cbnz x26, lbl -// CHECK-AARCH64: cbz w5, lbl // encoding: [0x05'A',A,A,0x34'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbz x5, lbl // encoding: [0x05'A',A,A,0xb4'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbnz x2, lbl // encoding: [0x02'A',A,A,0xb5'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbnz x26, lbl // encoding: [0x1a'A',A,A,0xb5'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr // CHECK-ARM64: cbz w5, lbl // encoding: [0bAAA00101,A,A,0x34] // CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 // CHECK-ARM64: cbz x5, lbl // encoding: [0bAAA00101,A,A,0xb4] @@ -1216,10 +1131,6 @@ _func: cbz wzr, lbl cbnz xzr, lbl -// CHECK-AARCH64: cbz wzr, lbl // encoding: [0x1f'A',A,A,0x34'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: cbnz xzr, lbl // encoding: [0x1f'A',A,A,0xb5'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr // CHECK-ARM64: cbz wzr, lbl // encoding: [0bAAA11111,A,A,0x34] // CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 @@ -1256,40 +1167,6 @@ _func: b.gt lbl b.le lbl b.al lbl -// CHECK-AARCH64: b.eq lbl // encoding: [A,A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ne lbl // encoding: [0x01'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.mi lbl // encoding: [0x04'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.pl lbl // encoding: [0x05'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vs lbl // encoding: [0x06'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vc lbl // encoding: [0x07'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hi lbl // encoding: [0x08'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ls lbl // encoding: [0x09'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ge lbl // encoding: [0x0a'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lt lbl // encoding: [0x0b'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.gt lbl // encoding: [0x0c'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.le lbl // encoding: [0x0d'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.al lbl // encoding: [0x0e'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr // CHECK-ARM64: b.eq lbl // encoding: [0bAAA00000,A,A,0x54] // CHECK-ARM64: // fixup A - offset: 0, value: lbl, kind: fixup_arm64_pcrel_branch19 @@ -1344,40 +1221,6 @@ _func: bgt lbl ble lbl bal lbl -// CHECK-AARCH64: b.eq lbl // encoding: [A,A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ne lbl // encoding: [0x01'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hs lbl // encoding: [0x02'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lo lbl // encoding: [0x03'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.mi lbl // encoding: [0x04'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.pl lbl // encoding: [0x05'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vs lbl // encoding: [0x06'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.vc lbl // encoding: [0x07'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.hi lbl // encoding: [0x08'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ls lbl // encoding: [0x09'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.ge lbl // encoding: [0x0a'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.lt lbl // encoding: [0x0b'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.gt lbl // encoding: [0x0c'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.le lbl // encoding: [0x0d'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr -// CHECK-AARCH64: b.al lbl // encoding: [0x0e'A',A,A,0x54'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: lbl, kind: fixup_a64_condbr b.eq #0 b.lt #-4 @@ -2342,12 +2185,6 @@ _func: ldr w3, here ldr x29, there ldrsw xzr, everywhere -// CHECK-AARCH64: ldr w3, here // encoding: [0x03'A',A,A,0x18'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: here, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldr x29, there // encoding: [0x1d'A',A,A,0x58'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: there, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldrsw xzr, everywhere // encoding: [0x1f'A',A,A,0x98'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: everywhere, kind: fixup_a64_ld_prel // CHECK-ARM64: ldr w3, here // encoding: [0bAAA00011,A,A,0x18] // CHECK-ARM64: // fixup A - offset: 0, value: here, kind: fixup_arm64_ldr_pcrel_imm19 @@ -2359,12 +2196,6 @@ _func: ldr s0, who_knows ldr d0, i_dont ldr q0, there_must_be_a_better_way -// CHECK-AARCH64: ldr s0, who_knows // encoding: [A,A,A,0x1c'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: who_knows, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldr d0, i_dont // encoding: [A,A,A,0x5c'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: i_dont, kind: fixup_a64_ld_prel -// CHECK-AARCH64: ldr q0, there_must_be_a_better_way // encoding: [A,A,A,0x9c'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: there_must_be_a_better_way, kind: fixup_a64_ld_prel // CHECK-ARM64: ldr s0, who_knows // encoding: [0bAAA00000,A,A,0x1c] // CHECK-ARM64: // fixup A - offset: 0, value: who_knows, kind: fixup_arm64_ldr_pcrel_imm19 @@ -2380,10 +2211,6 @@ _func: prfm pldl1strm, nowhere prfm #22, somewhere -// CHECK-AARCH64: prfm pldl1strm, nowhere // encoding: [0x01'A',A,A,0xd8'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: nowhere, kind: fixup_a64_ld_prel -// CHECK-AARCH64: prfm #22, somewhere // encoding: [0x16'A',A,A,0xd8'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: somewhere, kind: fixup_a64_ld_prel // CHECK-ARM64: prfm pldl1strm, nowhere // encoding: [0bAAA00001,A,A,0xd8] // CHECK-ARM64: // fixup A - offset: 0, value: nowhere, kind: fixup_arm64_ldr_pcrel_imm19 @@ -2603,18 +2430,6 @@ _func: ldrsw x15, [x5, #:lo12:sym] ldr x15, [x5, #:lo12:sym] ldr q3, [x2, #:lo12:sym] -// CHECK-AARCH64: str x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,A,0xf9'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12 -// CHECK-AARCH64: ldrb w15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x40'A',0x39'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst8_lo12 -// CHECK-AARCH64: ldrsh x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x80'A',0x79'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst16_lo12 -// CHECK-AARCH64: ldrsw x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x80'A',0xb9'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst32_lo12 -// CHECK-AARCH64: ldr x15, [x5, #:lo12:sym] // encoding: [0xaf'A',A,0x40'A',0xf9'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst64_lo12 -// CHECK-AARCH64: ldr q3, [x2, #:lo12:sym] // encoding: [0x43'A',A,0xc0'A',0x3d'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_a64_ldst128_lo12 // CHECK-ARM64: str x15, [x5, :lo12:sym] // encoding: [0xaf,0bAAAAAA00,0b00AAAAAA,0xf9] // CHECK-ARM64: // fixup A - offset: 0, value: :lo12:sym, kind: fixup_arm64_ldst_imm12_scale8 @@ -3507,10 +3322,6 @@ _func: movz x2, #:abs_g0:sym movk w3, #:abs_g0_nc:sym -// CHECK-AARCH64: movz x2, #:abs_g0:sym // encoding: [0x02'A',A,0x80'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_a64_movw_uabs_g0 -// CHECK-AARCH64: movk w3, #:abs_g0_nc:sym // encoding: [0x03'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_a64_movw_uabs_g0_nc // CHECK-ARM64: movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_arm64_movw @@ -3519,10 +3330,6 @@ _func: movz x4, #:abs_g1:sym movk w5, #:abs_g1_nc:sym -// CHECK-AARCH64: movz x4, #:abs_g1:sym // encoding: [0x04'A',A,0xa0'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_a64_movw_uabs_g1 -// CHECK-AARCH64: movk w5, #:abs_g1_nc:sym // encoding: [0x05'A',A,0xa0'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_nc:sym, kind: fixup_a64_movw_uabs_g1_nc // CHECK-ARM64: movz x4, #:abs_g1:sym // encoding: [0bAAA00100,A,0b101AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_arm64_movw @@ -3531,10 +3338,6 @@ _func: movz x6, #:abs_g2:sym movk x7, #:abs_g2_nc:sym -// CHECK-AARCH64: movz x6, #:abs_g2:sym // encoding: [0x06'A',A,0xc0'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_a64_movw_uabs_g2 -// CHECK-AARCH64: movk x7, #:abs_g2_nc:sym // encoding: [0x07'A',A,0xc0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2_nc:sym, kind: fixup_a64_movw_uabs_g2_nc // CHECK-ARM64: movz x6, #:abs_g2:sym // encoding: [0bAAA00110,A,0b110AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2:sym, kind: fixup_arm64_movw @@ -3543,10 +3346,6 @@ _func: movz x8, #:abs_g3:sym movk x9, #:abs_g3:sym -// CHECK-AARCH64: movz x8, #:abs_g3:sym // encoding: [0x08'A',A,0xe0'A',0xd2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3 -// CHECK-AARCH64: movk x9, #:abs_g3:sym // encoding: [0x09'A',A,0xe0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_a64_movw_uabs_g3 // CHECK-ARM64: movz x8, #:abs_g3:sym // encoding: [0bAAA01000,A,0b111AAAAA,0xd2] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g3:sym, kind: fixup_arm64_movw @@ -3558,14 +3357,6 @@ _func: movz x19, #:abs_g0_s:sym movn w10, #:abs_g0_s:sym movz w25, #:abs_g0_s:sym -// CHECK-AARCH64: movn x30, #:abs_g0_s:sym // encoding: [0x1e'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 -// CHECK-AARCH64: movz x19, #:abs_g0_s:sym // encoding: [0x13'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 -// CHECK-AARCH64: movn w10, #:abs_g0_s:sym // encoding: [0x0a'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 -// CHECK-AARCH64: movz w25, #:abs_g0_s:sym // encoding: [0x19'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_a64_movw_sabs_g0 // CHECK-ARM64: movn x30, #:abs_g0_s:sym // encoding: [0bAAA11110,A,0b100AAAAA,0x92] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_arm64_movw @@ -3580,14 +3371,6 @@ _func: movz x19, #:abs_g1_s:sym movn w10, #:abs_g1_s:sym movz w25, #:abs_g1_s:sym -// CHECK-AARCH64: movn x30, #:abs_g1_s:sym // encoding: [0x1e'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 -// CHECK-AARCH64: movz x19, #:abs_g1_s:sym // encoding: [0x13'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 -// CHECK-AARCH64: movn w10, #:abs_g1_s:sym // encoding: [0x0a'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 -// CHECK-AARCH64: movz w25, #:abs_g1_s:sym // encoding: [0x19'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_a64_movw_sabs_g1 // CHECK-ARM64: movn x30, #:abs_g1_s:sym // encoding: [0bAAA11110,A,0b101AAAAA,0x92] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g1_s:sym, kind: fixup_arm64_movw @@ -3600,10 +3383,6 @@ _func: movn x30, #:abs_g2_s:sym movz x19, #:abs_g2_s:sym -// CHECK-AARCH64: movn x30, #:abs_g2_s:sym // encoding: [0x1e'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2 -// CHECK-AARCH64: movz x19, #:abs_g2_s:sym // encoding: [0x13'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_a64_movw_sabs_g2 // CHECK-ARM64: movn x30, #:abs_g2_s:sym // encoding: [0bAAA11110,A,0b110AAAAA,0x92] // CHECK-ARM64-NEXT: // fixup A - offset: 0, value: :abs_g2_s:sym, kind: fixup_arm64_movw @@ -3616,10 +3395,6 @@ _func: adr x2, loc adr xzr, loc - // CHECK-AARCH64: adr x2, loc // encoding: [0x02'A',A,A,0x10'A'] - // CHECK-AARCH64: // fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel - // CHECK-AARCH64: adr xzr, loc // encoding: [0x1f'A',A,A,0x10'A'] - // CHECK-AARCH64: // fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel // CHECK-ARM64: adr x2, loc // encoding: [0x02'A',A,A,0x10'A'] // CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21 @@ -3627,8 +3402,6 @@ _func: // CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adr_imm21 adrp x29, loc - // CHECK-AARCH64: adrp x29, loc // encoding: [0x1d'A',A,A,0x90'A'] - // CHECK-AARCH64: // fixup A - offset: 0, value: loc, kind: fixup_a64_adr_prel_page // CHECK-ARM64: adrp x29, loc // encoding: [0x1d'A',A,A,0x90'A'] // CHECK-ARM64: // fixup A - offset: 0, value: loc, kind: fixup_arm64_pcrel_adrp_imm21 @@ -5008,12 +4781,6 @@ _func: tbz x5, #0, somewhere tbz xzr, #63, elsewhere tbnz x5, #45, nowhere -// CHECK-AARCH64: tbz x5, #0, somewhere // encoding: [0x05'A',A,A,0x36'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: somewhere, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbz xzr, #63, elsewhere // encoding: [0x1f'A',A,0xf8'A',0xb6'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: elsewhere, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbnz x5, #45, nowhere // encoding: [0x05'A',A,0x68'A',0xb7'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr // CHECK-ARM64: tbz w5, #0, somewhere // encoding: [0bAAA00101,A,0b00000AAA,0x36] // CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch14 @@ -5026,12 +4793,6 @@ _func: tbnz w3, #2, there tbnz wzr, #31, nowhere tbz w5, #12, anywhere -// CHECK-AARCH64: tbnz w3, #2, there // encoding: [0x03'A',A,0x10'A',0x37'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: there, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbnz wzr, #31, nowhere // encoding: [0x1f'A',A,0xf8'A',0x37'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: nowhere, kind: fixup_a64_tstbr -// CHECK-AARCH64: tbz w5, #12, anywhere // encoding: [0x05'A',A,0x60'A',0x36'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: anywhere, kind: fixup_a64_tstbr // CHECK-ARM64: tbnz w3, #2, there // encoding: [0bAAA00011,A,0b00010AAA,0x37] // CHECK-ARM64: // fixup A - offset: 0, value: there, kind: fixup_arm64_pcrel_branch14 @@ -5046,10 +4807,6 @@ _func: b somewhere bl elsewhere -// CHECK-AARCH64: b somewhere // encoding: [A,A,A,0x14'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: somewhere, kind: fixup_a64_uncondbr -// CHECK-AARCH64: bl elsewhere // encoding: [A,A,A,0x94'A'] -// CHECK-AARCH64: // fixup A - offset: 0, value: elsewhere, kind: fixup_a64_call // CHECK-ARM64: b somewhere // encoding: [A,A,A,0b000101AA] // CHECK-ARM64: // fixup A - offset: 0, value: somewhere, kind: fixup_arm64_pcrel_branch26 diff --git a/test/MC/AArch64/basic-pic.s b/test/MC/AArch64/basic-pic.s index c3317f35d3..6bb6aaa7de 100644 --- a/test/MC/AArch64/basic-pic.s +++ b/test/MC/AArch64/basic-pic.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o -| llvm-objdump -r - | FileCheck %s // CHECK: RELOCATION RECORDS FOR [.rela.text] diff --git a/test/MC/AArch64/elf-extern.s b/test/MC/AArch64/elf-extern.s index 23cb4bd46c..3d84bde052 100644 --- a/test/MC/AArch64/elf-extern.s +++ b/test/MC/AArch64/elf-extern.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // External symbols are a different concept to global variables but should still diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s index 2d134ff586..b69926efbc 100644 --- a/test/MC/AArch64/elf-objdump.s +++ b/test/MC/AArch64/elf-objdump.s @@ -1,5 +1,4 @@ // 64 bit little endian -// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d - // RUN: llvm-mc -filetype=obj -triple arm64-none-linux-gnu %s -o - | llvm-objdump -d - // We just want to see if llvm-objdump works at all. diff --git a/test/MC/AArch64/elf-reloc-addend.s b/test/MC/AArch64/elf-reloc-addend.s deleted file mode 100644 index 8d575fb8b9..0000000000 --- a/test/MC/AArch64/elf-reloc-addend.s +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=aarch64-linux-gnu -r - | FileCheck %s - -// RUN: llvm-mc -triple=arm64-linux-gnu -filetype=obj -o - %s | llvm-objdump -triple=aarch64-linux-gnu -r - | FileCheck %s - - add x0, x4, #:lo12:sym -// CHECK: 0 R_AARCH64_ADD_ABS_LO12_NC sym - add x3, x5, #:lo12:sym+1 -// CHECK: 4 R_AARCH64_ADD_ABS_LO12_NC sym+1 - add x3, x5, #:lo12:sym-1 -// CHECK: 8 R_AARCH64_ADD_ABS_LO12_NC sym-1 diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s index a64249e8b8..cc5c3f7f25 100644 --- a/test/MC/AArch64/elf-reloc-addsubimm.s +++ b/test/MC/AArch64/elf-reloc-addsubimm.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s index 55ba5f8b74..3554ef3ae4 100644 --- a/test/MC/AArch64/elf-reloc-ldrlit.s +++ b/test/MC/AArch64/elf-reloc-ldrlit.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s index faf2c459a6..196f65fd29 100644 --- a/test/MC/AArch64/elf-reloc-ldstunsimm.s +++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+fp-armv8 -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s index 29f89443c3..dc7dbb0c15 100644 --- a/test/MC/AArch64/elf-reloc-movw.s +++ b/test/MC/AArch64/elf-reloc-movw.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s index ee9b207369..652011318c 100644 --- a/test/MC/AArch64/elf-reloc-pcreladdressing.s +++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s @@ -1,7 +1,4 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ +// RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s adr x2, some_label diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s index 370b9ee126..9cbe3a53fb 100644 --- a/test/MC/AArch64/elf-reloc-tstb.s +++ b/test/MC/AArch64/elf-reloc-tstb.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s index 69b0a2fcb6..8f3915afab 100644 --- a/test/MC/AArch64/elf-reloc-uncondbrimm.s +++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s @@ -1,6 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj %s -o - | \ // RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ %s diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s index c884312354..6f4f5ee66c 100644 --- a/test/MC/AArch64/gicv3-regs-diagnostics.s +++ b/test/MC/AArch64/gicv3-regs-diagnostics.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s // RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s // Write-only diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s index 470fc4667f..b9eac1a569 100644 --- a/test/MC/AArch64/gicv3-regs.s +++ b/test/MC/AArch64/gicv3-regs.s @@ -1,4 +1,3 @@ - // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -show-encoding < %s | FileCheck %s mrs x8, icc_iar1_el1 diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s index c12ebf4636..33d5bf519f 100644 --- a/test/MC/AArch64/inline-asm-modifiers.s +++ b/test/MC/AArch64/inline-asm-modifiers.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj -mattr=+fp-armv8 < %s | llvm-objdump -r - | FileCheck %s .file "<stdin>" diff --git a/test/MC/AArch64/jump-table.s b/test/MC/AArch64/jump-table.s index 3fe9bc58cd..439ecd90de 100644 --- a/test/MC/AArch64/jump-table.s +++ b/test/MC/AArch64/jump-table.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc < %s -triple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s // RUN: llvm-mc < %s -triple=arm64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s .file "<stdin>" diff --git a/test/MC/AArch64/lit.local.cfg b/test/MC/AArch64/lit.local.cfg index 8378712e9c..17a6b7ab03 100644 --- a/test/MC/AArch64/lit.local.cfg +++ b/test/MC/AArch64/lit.local.cfg @@ -1,3 +1,3 @@ targets = set(config.root.targets_to_build.split()) -if 'AArch64' not in targets or 'ARM64' not in targets: - config.unsupported = True
\ No newline at end of file +if 'ARM64' not in targets: + config.unsupported = True diff --git a/test/MC/AArch64/mapping-across-sections.s b/test/MC/AArch64/mapping-across-sections.s index 14336382be..00b324cb82 100644 --- a/test/MC/AArch64/mapping-across-sections.s +++ b/test/MC/AArch64/mapping-across-sections.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s .text diff --git a/test/MC/AArch64/mapping-within-section.s b/test/MC/AArch64/mapping-within-section.s index b80721ac65..f515cb9a5c 100644 --- a/test/MC/AArch64/mapping-within-section.s +++ b/test/MC/AArch64/mapping-within-section.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -t - | FileCheck %s .text diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s index 567f228921..04841d0164 100644 --- a/test/MC/AArch64/neon-2velem.s +++ b/test/MC/AArch64/neon-2velem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-3vdiff.s b/test/MC/AArch64/neon-3vdiff.s index 476f7c6abe..3ffc38fc69 100644 --- a/test/MC/AArch64/neon-3vdiff.s +++ b/test/MC/AArch64/neon-3vdiff.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+crypto -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s index 8833b3bbe9..e796483414 100644 --- a/test/MC/AArch64/neon-aba-abd.s +++ b/test/MC/AArch64/neon-aba-abd.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s index 1a5446b3a4..60b766d8c8 100644 --- a/test/MC/AArch64/neon-across.s +++ b/test/MC/AArch64/neon-across.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s index 83d443edb7..0b9e4d3146 100644 --- a/test/MC/AArch64/neon-add-pairwise.s +++ b/test/MC/AArch64/neon-add-pairwise.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s index ad169a8ff2..7d11d70bb9 100644 --- a/test/MC/AArch64/neon-add-sub-instructions.s +++ b/test/MC/AArch64/neon-add-sub-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-bitwise-instructions.s b/test/MC/AArch64/neon-bitwise-instructions.s index 949d1b14ff..ec192aa2d8 100644 --- a/test/MC/AArch64/neon-bitwise-instructions.s +++ b/test/MC/AArch64/neon-bitwise-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s index dfc3ae7151..4d3daf066e 100644 --- a/test/MC/AArch64/neon-compare-instructions.s +++ b/test/MC/AArch64/neon-compare-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-crypto.s b/test/MC/AArch64/neon-crypto.s index 3f36ba9e2a..ed1bf88826 100644 --- a/test/MC/AArch64/neon-crypto.s +++ b/test/MC/AArch64/neon-crypto.s @@ -1,5 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s -// RUN: not llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -mattr=+crypto -show-encoding < %s | FileCheck %s // RUN: not llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CRYPTO-ARM64 %s diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 10fdde4602..46ae311f5f 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -1,5 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-AARCH64-ERROR < %t %s // RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=+neon < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ERROR --check-prefix=CHECK-ARM64-ERROR < %t %s @@ -590,12 +588,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fcmgt v0.2d, v31.2s, v16.2s // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected floating-point constant #0.0 or invalid register type -// CHECK-AARCH64-ERROR: fcmgt v4.4s, v7.4s, v15.4h -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected floating-point constant #0.0 or invalid register type -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, v2.16b -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ARM64-ERROR: fcmgt v4.4s, v7.4s, v15.4h @@ -691,12 +683,6 @@ // CHECK-ERROR: fcmeq v0.16b, v1.16b, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmeq v0.8b, v1.4h, #1.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmeq v0.8b, v1.4h, #1 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmeq v0.8b, v1.4h, #1.0 @@ -722,12 +708,6 @@ // CHECK-ERROR: fcmge v3.8b, v8.2s, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.8h, v15.2d, #-1.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.8h, v15.2d, #2 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmle v17.8h, v15.2d, #-1.0 @@ -752,12 +732,6 @@ // CHECK-ERROR: fcmgt v4.4s, v7.4h, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #255.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #255 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #255.0 @@ -782,12 +756,6 @@ // CHECK-ERROR: fcmge v3.8b, v8.2s, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.2d, v15.2d, #15.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmle v17.2d, v15.2d, #15 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmle v17.2d, v15.2d, #15.0 @@ -812,12 +780,6 @@ // CHECK-ERROR: fcmgt v4.4s, v7.4h, #0.0 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #16.0 -// CHECK-AARCH64-ERROR: ^ -// CHECK-AARCH64-ERROR: error: only #0.0 is acceptable as immediate -// CHECK-AARCH64-ERROR: fcmlt v29.2d, v5.2d, #2 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: expected floating-point constant #0.0 // CHECK-ARM64-ERROR: fcmlt v29.2d, v5.2d, #16.0 @@ -1337,9 +1299,6 @@ shl v0.4s, v21.4s, #32 shl v0.2d, v1.2d, #64 -// CHECK-AARCH64-ERROR: error: expected comma before next operand -// CHECK-AARCH64-ERROR: shl v0.4s, v15,2s, #3 -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ARM64-ERROR: shl v0.4s, v15,2s, #3 @@ -2673,9 +2632,6 @@ pmull2 v0.4s, v1.8h v2.8h pmull2 v0.2d, v1.4s, v2.4s -// CHECK-AARCH64-ERROR: error: expected comma before next operand -// CHECK-AARCH64-ERROR: pmull2 v0.4s, v1.8h v2.8h -// CHECK-AARCH64-ERROR: ^ // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ARM64-ERROR: pmull2 v0.4s, v1.8h v2.8h @@ -3003,22 +2959,18 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mla v0.2d, v1.2d, v16.d[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mla v0.2h, v1.2h, v2.h[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mla v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3041,22 +2993,18 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mls v0.2d, v1.2d, v16.d[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: mls v0.2h, v1.2h, v2.h[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mls v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3082,27 +3030,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmla v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v3.4s, v8.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v3.4s, v8.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3122,27 +3064,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmls v0.8h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v3.4s, v8.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v3.4s, v8.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmls v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3163,7 +3099,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3173,18 +3108,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3194,11 +3126,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3219,7 +3149,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3229,18 +3158,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3250,11 +3176,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3275,7 +3199,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3285,18 +3208,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal v0.2s, v1.2s, v2.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3306,11 +3226,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3331,7 +3249,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3341,18 +3258,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3362,11 +3276,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3387,7 +3299,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3397,18 +3308,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3418,11 +3326,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3443,7 +3349,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3453,18 +3358,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl v0.2s, v1.2s, v2.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl2 v0.4h, v1.8h, v1.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.4s, v1.8h, v1.h[8] // CHECK-ERROR: ^ @@ -3474,11 +3376,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl2 v0.2s, v1.4s, v1.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.2d, v1.4s, v1.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3493,35 +3393,27 @@ mul v0.4s, v1.4s, v22.s[4] mul v0.2d, v1.2d, v2.d[1] -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: mul v0.4h, v1.4h, v16.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: invalid operand for instruction // CHECK-ERROR: mul v0.8h, v1.8h, v16.h[8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: mul v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3540,27 +3432,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmul v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3576,27 +3462,21 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx v0.4h, v1.4h, v2.h[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2d, v1.2d, v2.d[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx v0.2d, v1.2d, v22.d[2] // CHECK-ERROR: ^ @@ -3617,7 +3497,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3627,18 +3506,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3648,11 +3524,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: smull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: smull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3673,7 +3547,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3683,18 +3556,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3704,11 +3574,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: umull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: umull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3729,7 +3597,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull v0.4h, v1.4h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.4s, v1.4h, v2.h[8] // CHECK-ERROR: ^ @@ -3739,18 +3606,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull v0.2s, v1.2s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.2d, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull v0.2d, v1.2s, v22.s[4] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull2 v0.4h, v1.8h, v2.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.4s, v1.8h, v2.h[8] // CHECK-ERROR: ^ @@ -3760,11 +3624,9 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull2 v0.2s, v1.4s, v2.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.2d, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull2 v0.2d, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3779,33 +3641,27 @@ sqdmulh v0.4s, v1.4s, v22.s[4] sqdmulh v0.2d, v1.2d, v22.d[1] -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh v0.4h, v1.4h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh v0.8h, v1.8h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -3823,33 +3679,27 @@ sqrdmulh v0.4s, v1.4s, v22.s[4] sqrdmulh v0.2d, v1.2d, v22.d[1] -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4h, v1.4h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh v0.4h, v1.4h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.8h, v1.8h, v2.h[8] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh v0.8h, v1.8h, v16.h[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.2s, v1.2s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.2s, v1.2s, v22.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4s, v1.4s, v2.s[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh v0.4s, v1.4s, v22.s[4] // CHECK-ERROR: ^ @@ -4068,15 +3918,12 @@ ld1 {v4}, [x0] ld1 {v32.16b}, [x0] ld1 {v15.8h}, [x32] -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1 {x3}, [x2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v4}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1 {v32.16b}, [x0] // CHECK-ERROR: ^ @@ -4091,14 +3938,12 @@ ld1 {v1.8h-v1.8h}, [x0] ld1 {v15.8h-v17.4h}, [x15] ld1 {v0.8b-v2.8b, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: '{' expected // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ERROR: ld1 v0.8b, v1.8b}, [x0] // CHECK-ERROR: ^ @@ -4108,7 +3953,6 @@ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld1 {v1.8h-v1.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld1 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4121,18 +3965,14 @@ ld2 {v15.4h, v16.4h, v17.4h}, [x32] ld2 {v15.8h-v16.4h}, [x15] ld2 {v0.2d-v2.2d}, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64: error: invalid operand for instruction // CHECK-ERROR: ld2 {v15.4h, v16.4h, v17.4h}, [x32] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h-v16.4h}, [x15] // CHECK-ERROR: ^ @@ -4145,19 +3985,15 @@ ld3 {v0.8b, v2.8b, v3.8b}, [x0] ld3 {v15.8h-v17.4h}, [x15] ld3 {v31.4s-v2.4s}, [sp] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v15.8h, v16.8h, v17.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4170,18 +4006,15 @@ ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] ld4 {v15.8h-v18.4h}, [x15] ld4 {v31.2s-v1.2s}, [x31] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h-v18.4h}, [x15] // CHECK-ERROR: ^ @@ -4193,15 +4026,12 @@ st1 {v4}, [x0] st1 {v32.16b}, [x0] st1 {v15.8h}, [x32] -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: st1 {x3}, [x2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v4}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: st1 {v32.16b}, [x0] // CHECK-ERROR: ^ @@ -4216,14 +4046,12 @@ st1 {v1.8h-v1.8h}, [x0] st1 {v15.8h-v17.4h}, [x15] st1 {v0.8b-v2.8b, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: '{' expected // CHECK-ARM64-ERROR: error: unexpected token in argument list // CHECK-ERROR: st1 v0.8b, v1.8b}, [x0] // CHECK-ERROR: ^ @@ -4233,7 +4061,6 @@ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st1 {v1.8h-v1.8h}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st1 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4246,18 +4073,15 @@ st2 {v15.4h, v16.4h, v17.4h}, [x30] st2 {v15.8h-v16.4h}, [x15] st2 {v0.2d-v2.2d}, [x0] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v15.4h, v16.4h, v17.4h}, [x30] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h-v16.4h}, [x15] // CHECK-ERROR: ^ @@ -4270,19 +4094,15 @@ st3 {v0.8b, v2.8b, v3.8b}, [x0] st3 {v15.8h-v17.4h}, [x15] st3 {v31.4s-v2.4s}, [sp] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v15.8h, v16.8h, v17.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v15.8h-v17.4h}, [x15] // CHECK-ERROR: ^ @@ -4295,18 +4115,15 @@ st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] st4 {v15.8h-v18.4h}, [x15] st4 {v31.2s-v1.2s}, [x31] -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: registers must be sequential // CHECK-ERROR: st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors // CHECK-ERROR: st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h-v18.4h}, [x15] // CHECK-ERROR: ^ @@ -4324,7 +4141,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v0.16b}, [x0], #8 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid vector kind qualifier // CHECK-ERROR: ld1 {v0.8h, v1.16h}, [x0], x1 // CHECK-ERROR: ^ @@ -4341,7 +4157,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld3 {v5.2s, v6.2s, v7.2s}, [x1], #48 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1 // CHECK-ERROR: ^ @@ -4352,7 +4167,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v0.16b}, [x0], #8 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: invalid vector kind qualifier // CHECK-ERROR: st1 {v0.8h, v1.16h}, [x0], x1 // CHECK-ERROR: ^ @@ -4369,7 +4183,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st3 {v5.2s, v6.2s, v7.2s}, [x1], #48 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v31.2d, v0.2d, v1.2d, v2.1d}, [x3], x1 // CHECK-ERROR: ^ @@ -4382,18 +4195,15 @@ ld2r {v31.4s, v0.2s}, [sp] ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp] -// CHECK-AARCH64-ERROR: error: expected vector type register // CHECK-ARM64-ERROR: error: vector register expected // CHECK-ERROR: ld1r {x1}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2r {v31.4s, v0.2s}, [sp] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld3r {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: invalid space between two vectors // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4r {v31.2s, v0.2s, v1.2d, v2.2s}, [sp] // CHECK-ERROR: ^ @@ -4406,19 +4216,15 @@ ld2 {v15.h, v16.h}[8], [x15] ld3 {v31.s, v0.s, v1.s}[-1], [sp] ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] -// CHECK-AARCH64-ERROR:: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld1 {v0.b}[16], [x0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld2 {v15.h, v16.h}[8], [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected lane number // CHECK-ARM64-ERROR: error: vector lane must be an integer in range // CHECK-ERROR: ld3 {v31.s, v0.s, v1.s}[-1], [sp] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ld4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] // CHECK-ERROR: ^ @@ -4427,18 +4233,15 @@ st2 {v31.s, v0.s}[3], [8] st3 {v15.h, v16.h, v17.h}[-1], [x15] st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] -// CHECK-AARCH64-ERROR:: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: st1 {v0.d}[16], [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v31.s, v0.s}[3], [8] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected lane number // CHECK-ARM64-ERROR: error: vector lane must be an integer in range // CHECK-ERROR: st3 {v15.h, v16.h, v17.h}[-1], [x15] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: st4 {v0.d, v1.d, v2.d, v3.d}[2], [x0] // CHECK-ERROR: ^ @@ -4478,7 +4281,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld2 {v15.h, v16.h}[0], [x15], #3 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected the same vector layout // CHECK-ARM64-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v31.s, v0.s, v1.d}[0], [sp], x9 // CHECK-ERROR: ^ @@ -4513,19 +4315,15 @@ ins v20.s[1], s30 ins v1.d[0], d7 -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v2.b[16], w1 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v7.h[8], w14 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v20.s[5], w30 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: ins v1.d[2], x7 // CHECK-ERROR: ^ @@ -4553,23 +4351,18 @@ smov x14, v6.d[1] smov x20, v9.d[0] -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov w1, v0.b[16] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov w14, v6.h[8] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x1, v0.b[16] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x14, v6.h[8] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR smov x20, v9.s[5] // CHECK-ERROR ^ @@ -4597,19 +4390,15 @@ umov s20, v9.s[2] umov d7, v18.d[1] -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w1, v0.b[16] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w14, v6.h[8] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov w20, v9.s[5] // CHECK-ERROR ^ -// CHECK-AARCH64-ERROR error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR umov x7, v18.d[3] // CHECK-ERROR ^ @@ -5026,7 +4815,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s17, h27, s12 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: too few operands for instruction // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal d19, s24, d12 // CHECK-ERROR: ^ @@ -5041,7 +4829,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl s14, h12, s25 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: too few operands for instruction // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl d12, s23, d13 // CHECK-ERROR: ^ @@ -5056,7 +4843,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s12, h22, s12 // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: too few operands for instruction // CHECK-ARM64-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull d15, s22, d12 // CHECK-ERROR: ^ @@ -7099,7 +6885,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmul h0, h1, v1.s[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmul s2, s29, v10.s[4] // CHECK-ERROR: ^ @@ -7119,7 +6904,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmulx h0, h1, v1.d[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmulx d2, d29, v10.d[3] // CHECK-ERROR: ^ @@ -7139,7 +6923,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmla d30, s11, v1.d[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: fmla s16, s22, v16.s[5] // CHECK-ERROR: ^ @@ -7159,7 +6942,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: fmls h7, h17, v26.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: expected lane number // CHECK-ARM64-ERROR: error: vector lane must be an integer in range [0, 1] // CHECK-ERROR: fmls d16, d22, v16.d[-1] // CHECK-ERROR: ^ @@ -7182,7 +6964,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlal s8, s9, v14.s[1] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlal d4, s5, v1.s[5] // CHECK-ERROR: ^ @@ -7208,7 +6989,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmlsl d1, h1, v13.s[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmlsl d1, s1, v13.s[4] // CHECK-ERROR: ^ @@ -7236,7 +7016,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmull s1, s1, v4.s[0] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmull s12, h17, v9.h[9] // CHECK-ERROR: ^ @@ -7262,7 +7041,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqdmulh s25, s26, v27.h[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqdmulh s25, s26, v27.s[4] // CHECK-ERROR: ^ @@ -7288,7 +7066,6 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: sqrdmulh s5, h6, v7.s[2] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: sqrdmulh h31, h30, v14.h[9] // CHECK-ERROR: ^ @@ -7321,19 +7098,15 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: dup d0, v17.s[3] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup d0, v17.d[4] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup s0, v1.s[7] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup h0, v31.h[16] // CHECK-ERROR: ^ -// CHECK-AARCH64-ERROR: error: lane number incompatible with layout // CHECK-ARM64-ERROR: vector lane must be an integer in range // CHECK-ERROR: dup b1, v3.b[16] // CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-extract.s b/test/MC/AArch64/neon-extract.s index fbfc048de9..1daa46d096 100644 --- a/test/MC/AArch64/neon-extract.s +++ b/test/MC/AArch64/neon-extract.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s index bb739fa185..799b85ff42 100644 --- a/test/MC/AArch64/neon-facge-facgt.s +++ b/test/MC/AArch64/neon-facge-facgt.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s index ec3b64bfa5..56bc47154a 100644 --- a/test/MC/AArch64/neon-frsqrt-frecp.s +++ b/test/MC/AArch64/neon-frsqrt-frecp.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-halving-add-sub.s b/test/MC/AArch64/neon-halving-add-sub.s index 8e36b20386..19b56ced3e 100644 --- a/test/MC/AArch64/neon-halving-add-sub.s +++ b/test/MC/AArch64/neon-halving-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s index 4421be4ed0..e48f975358 100644 --- a/test/MC/AArch64/neon-max-min-pairwise.s +++ b/test/MC/AArch64/neon-max-min-pairwise.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s index 3700f75539..8cc4ac86e6 100644 --- a/test/MC/AArch64/neon-max-min.s +++ b/test/MC/AArch64/neon-max-min.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s index b82706862e..5c8b7d8788 100644 --- a/test/MC/AArch64/neon-mla-mls-instructions.s +++ b/test/MC/AArch64/neon-mla-mls-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mov.s b/test/MC/AArch64/neon-mov.s index 8c420f1c01..6231ffe49c 100644 --- a/test/MC/AArch64/neon-mov.s +++ b/test/MC/AArch64/neon-mov.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-mul-div-instructions.s b/test/MC/AArch64/neon-mul-div-instructions.s index 6a39ad8e2e..2601d50f13 100644 --- a/test/MC/AArch64/neon-mul-div-instructions.s +++ b/test/MC/AArch64/neon-mul-div-instructions.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-perm.s b/test/MC/AArch64/neon-perm.s index 641415ee1e..4b28dd01db 100644 --- a/test/MC/AArch64/neon-perm.s +++ b/test/MC/AArch64/neon-perm.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-rounding-halving-add.s b/test/MC/AArch64/neon-rounding-halving-add.s index 7e81b1a65c..55c9f921da 100644 --- a/test/MC/AArch64/neon-rounding-halving-add.s +++ b/test/MC/AArch64/neon-rounding-halving-add.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-rounding-shift.s b/test/MC/AArch64/neon-rounding-shift.s index 5f72bafea4..38924e7c4b 100644 --- a/test/MC/AArch64/neon-rounding-shift.s +++ b/test/MC/AArch64/neon-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-add-sub.s b/test/MC/AArch64/neon-saturating-add-sub.s index 1d2916a48d..d39997901f 100644 --- a/test/MC/AArch64/neon-saturating-add-sub.s +++ b/test/MC/AArch64/neon-saturating-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-rounding-shift.s b/test/MC/AArch64/neon-saturating-rounding-shift.s index bc5c1c0a21..702b9d2c60 100644 --- a/test/MC/AArch64/neon-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-saturating-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-saturating-shift.s b/test/MC/AArch64/neon-saturating-shift.s index d35e1f3d0f..d03172b178 100644 --- a/test/MC/AArch64/neon-saturating-shift.s +++ b/test/MC/AArch64/neon-saturating-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s index c529cfc752..897c93506e 100644 --- a/test/MC/AArch64/neon-scalar-abs.s +++ b/test/MC/AArch64/neon-scalar-abs.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-add-sub.s b/test/MC/AArch64/neon-scalar-add-sub.s index fea1fc8ee8..955c30716b 100644 --- a/test/MC/AArch64/neon-scalar-add-sub.s +++ b/test/MC/AArch64/neon-scalar-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s index 7d5c6d04fd..d4f3682dc2 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s index 78c51594d1..d22aa9b15b 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s index 007568cceb..dadb8db993 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mla.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //----------------------------------------------------------------------------- diff --git a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s index 727bc670e1..90eeb5e64c 100644 --- a/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s +++ b/test/MC/AArch64/neon-scalar-by-elem-saturating-mul.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //----------------------------------------------------------------------------- diff --git a/test/MC/AArch64/neon-scalar-compare.s b/test/MC/AArch64/neon-scalar-compare.s index 1cd04fd111..16ba92e079 100644 --- a/test/MC/AArch64/neon-scalar-compare.s +++ b/test/MC/AArch64/neon-scalar-compare.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s index dc8e3165b6..047495276f 100644 --- a/test/MC/AArch64/neon-scalar-cvt.s +++ b/test/MC/AArch64/neon-scalar-cvt.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s index 81bdb7c4f8..ba4f3c2ad7 100644 --- a/test/MC/AArch64/neon-scalar-dup.s +++ b/test/MC/AArch64/neon-scalar-dup.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-extract-narrow.s b/test/MC/AArch64/neon-scalar-extract-narrow.s index 7e4ff85de7..e6167930d1 100644 --- a/test/MC/AArch64/neon-scalar-extract-narrow.s +++ b/test/MC/AArch64/neon-scalar-extract-narrow.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s index 054f923322..cb9e7a7a66 100644 --- a/test/MC/AArch64/neon-scalar-fp-compare.s +++ b/test/MC/AArch64/neon-scalar-fp-compare.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s index 968793fea8..21be537cbb 100644 --- a/test/MC/AArch64/neon-scalar-mul.s +++ b/test/MC/AArch64/neon-scalar-mul.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-neg.s b/test/MC/AArch64/neon-scalar-neg.s index ac61f9b78a..e902c2307a 100644 --- a/test/MC/AArch64/neon-scalar-neg.s +++ b/test/MC/AArch64/neon-scalar-neg.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s index 9dc6d069cd..dde26b557b 100644 --- a/test/MC/AArch64/neon-scalar-recip.s +++ b/test/MC/AArch64/neon-scalar-recip.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s index bf5eb5304b..cb7564ac68 100644 --- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s +++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //---------------------------------------------------------------------- diff --git a/test/MC/AArch64/neon-scalar-rounding-shift.s b/test/MC/AArch64/neon-scalar-rounding-shift.s index 2d65495891..2594c2f2ac 100644 --- a/test/MC/AArch64/neon-scalar-rounding-shift.s +++ b/test/MC/AArch64/neon-scalar-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s diff --git a/test/MC/AArch64/neon-scalar-saturating-add-sub.s b/test/MC/AArch64/neon-scalar-saturating-add-sub.s index 3cdfd6204d..d5cd838a92 100644 --- a/test/MC/AArch64/neon-scalar-saturating-add-sub.s +++ b/test/MC/AArch64/neon-scalar-saturating-add-sub.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s index 17bf222661..83bd59f50c 100644 --- a/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s +++ b/test/MC/AArch64/neon-scalar-saturating-rounding-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-saturating-shift.s b/test/MC/AArch64/neon-scalar-saturating-shift.s index 3eddabd616..679f1f4052 100644 --- a/test/MC/AArch64/neon-scalar-saturating-shift.s +++ b/test/MC/AArch64/neon-scalar-saturating-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-scalar-shift-imm.s b/test/MC/AArch64/neon-scalar-shift-imm.s index a0847d207a..47a8dec212 100644 --- a/test/MC/AArch64/neon-scalar-shift-imm.s +++ b/test/MC/AArch64/neon-scalar-shift-imm.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-scalar-shift.s b/test/MC/AArch64/neon-scalar-shift.s index 54b42f5eab..98aa51a63d 100644 --- a/test/MC/AArch64/neon-scalar-shift.s +++ b/test/MC/AArch64/neon-scalar-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s //------------------------------------------------------------------------------ diff --git a/test/MC/AArch64/neon-shift-left-long.s b/test/MC/AArch64/neon-shift-left-long.s index 679af09ea4..8720468310 100644 --- a/test/MC/AArch64/neon-shift-left-long.s +++ b/test/MC/AArch64/neon-shift-left-long.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-shift.s b/test/MC/AArch64/neon-shift.s index d5b730c070..dcff992a78 100644 --- a/test/MC/AArch64/neon-shift.s +++ b/test/MC/AArch64/neon-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-copy.s b/test/MC/AArch64/neon-simd-copy.s index dc8b060b35..917f7cb524 100644 --- a/test/MC/AArch64/neon-simd-copy.s +++ b/test/MC/AArch64/neon-simd-copy.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-ldst-multi-elem.s index 85e7c28e39..b8b3e72ff7 100644 --- a/test/MC/AArch64/neon-simd-ldst-multi-elem.s +++ b/test/MC/AArch64/neon-simd-ldst-multi-elem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-ldst-one-elem.s b/test/MC/AArch64/neon-simd-ldst-one-elem.s index 63b7bca398..4febf6d8fe 100644 --- a/test/MC/AArch64/neon-simd-ldst-one-elem.s +++ b/test/MC/AArch64/neon-simd-ldst-one-elem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s index 4486dddce4..6d1aafdd77 100644 --- a/test/MC/AArch64/neon-simd-misc.s +++ b/test/MC/AArch64/neon-simd-misc.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s index b8cc266cfc..c57a122f35 100644 --- a/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s +++ b/test/MC/AArch64/neon-simd-post-ldst-multi-elem.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s index 46a75009dc..1c1ad7489d 100644 --- a/test/MC/AArch64/neon-simd-shift.s +++ b/test/MC/AArch64/neon-simd-shift.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-sxtl.s b/test/MC/AArch64/neon-sxtl.s index 2efdb4dcbb..363796ee33 100644 --- a/test/MC/AArch64/neon-sxtl.s +++ b/test/MC/AArch64/neon-sxtl.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-tbl.s b/test/MC/AArch64/neon-tbl.s index e8d77c75c3..bb39fa9f22 100644 --- a/test/MC/AArch64/neon-tbl.s +++ b/test/MC/AArch64/neon-tbl.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/neon-uxtl.s b/test/MC/AArch64/neon-uxtl.s index 502166b281..46c56625c0 100644 --- a/test/MC/AArch64/neon-uxtl.s +++ b/test/MC/AArch64/neon-uxtl.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s // Check that the assembler can handle the documented syntax for AArch64 diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s index 3c953e3764..470a74d5b3 100644 --- a/test/MC/AArch64/noneon-diagnostics.s +++ b/test/MC/AArch64/noneon-diagnostics.s @@ -1,6 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-neon < %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s - // RUN: not llvm-mc -triple arm64-none-linux-gnu -mattr=-neon < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ARM64-ERROR < %t %s diff --git a/test/MC/AArch64/optional-hash.s b/test/MC/AArch64/optional-hash.s index a332cb0912..7ae1aa4904 100644 --- a/test/MC/AArch64/optional-hash.s +++ b/test/MC/AArch64/optional-hash.s @@ -1,7 +1,4 @@ // PR18929 -// RUN: llvm-mc < %s -triple=aarch64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \ -// RUN: | llvm-objdump --disassemble -arch=aarch64 -mattr=+fp-armv8,+neon - | FileCheck %s - // RUN: llvm-mc < %s -triple=arm64-linux-gnueabi -mattr=+fp-armv8,+neon -filetype=obj -o - \ // RUN: | llvm-objdump --disassemble -arch=arm64 -mattr=+fp-armv8,+neon - | FileCheck %s diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s index 5b2e988759..ae7b20cefd 100644 --- a/test/MC/AArch64/tls-relocs.s +++ b/test/MC/AArch64/tls-relocs.s @@ -1,7 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=CHECK-AARCH64 -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s -o - | \ -// RUN: llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s - // RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=CHECK-ARM64 // RUN: llvm-mc -triple=arm64-none-linux-gnu -filetype=obj < %s -o - | \ // RUN: llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s @@ -11,14 +7,6 @@ movn x2, #:dtprel_g2:var movz x3, #:dtprel_g2:var movn x4, #:dtprel_g2:var -// CHECK-AARCH64: movz x1, #:dtprel_g2:var // encoding: [0x01'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 -// CHECK-AARCH64: movn x2, #:dtprel_g2:var // encoding: [0x02'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 -// CHECK-AARCH64: movz x3, #:dtprel_g2:var // encoding: [0x03'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 -// CHECK-AARCH64: movn x4, #:dtprel_g2:var // encoding: [0x04'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2 // CHECK-ARM64: movz x1, #:dtprel_g2:var // encoding: [0bAAA00001,A,0b110AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_arm64_movw @@ -41,14 +29,6 @@ movn x6, #:dtprel_g1:var movz w7, #:dtprel_g1:var movn w8, #:dtprel_g1:var -// CHECK-AARCH64: movz x5, #:dtprel_g1:var // encoding: [0x05'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 -// CHECK-AARCH64: movn x6, #:dtprel_g1:var // encoding: [0x06'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 -// CHECK-AARCH64: movz w7, #:dtprel_g1:var // encoding: [0x07'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 -// CHECK-AARCH64: movn w8, #:dtprel_g1:var // encoding: [0x08'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1 // CHECK-ARM64: movz x5, #:dtprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_arm64_movw @@ -67,10 +47,6 @@ movk x9, #:dtprel_g1_nc:var movk w10, #:dtprel_g1_nc:var -// CHECK-AARCH64: movk x9, #:dtprel_g1_nc:var // encoding: [0x09'A',A,0xa0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc -// CHECK-AARCH64: movk w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc // CHECK-ARM64: movk x9, #:dtprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_arm64_movw @@ -85,13 +61,6 @@ movn x12, #:dtprel_g0:var movz w13, #:dtprel_g0:var movn w14, #:dtprel_g0:var -// CHECK-AARCH64: movz x11, #:dtprel_g0:var // encoding: [0x0b'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0 -// CHECK-AARCH64: movn x12, #:dtprel_g0:var // encoding: [0x0c'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0 -// CHECK-AARCH64: movz w13, #:dtprel_g0:var // encoding: [0x0d'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0 -// CHECK-AARCH64: movn w14, #:dtprel_g0:var // encoding: [0x0e'A',A,0x80'A',0x12'A'] // CHECK-ARM64: movz x11, #:dtprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_arm64_movw @@ -110,10 +79,6 @@ movk x15, #:dtprel_g0_nc:var movk w16, #:dtprel_g0_nc:var -// CHECK-AARCH64: movk x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc -// CHECK-AARCH64: movk w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc // CHECK-ARM64: movk x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_arm64_movw @@ -126,10 +91,6 @@ add x17, x18, #:dtprel_hi12:var, lsl #12 add w19, w20, #:dtprel_hi12:var, lsl #12 -// CHECK-AARCH64: add x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12 -// CHECK-AARCH64: add w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12 // CHECK-ARM64: add x17, x18, :dtprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_arm64_add_imm12 @@ -142,10 +103,6 @@ add x21, x22, #:dtprel_lo12:var add w23, w24, #:dtprel_lo12:var -// CHECK-AARCH64: add x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12 -// CHECK-AARCH64: add w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12 // CHECK-ARM64: add x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_add_imm12 @@ -158,10 +115,6 @@ add x25, x26, #:dtprel_lo12_nc:var add w27, w28, #:dtprel_lo12_nc:var -// CHECK-AARCH64: add x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc -// CHECK-AARCH64: add w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc // CHECK-ARM64: add x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_arm64_add_imm12 @@ -174,10 +127,6 @@ ldrb w29, [x30, #:dtprel_lo12:var] ldrsb x29, [x28, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: ldrb w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12 -// CHECK-AARCH64: ldrsb x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc // CHECK-ARM64: ldrb w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 @@ -190,10 +139,6 @@ strh w27, [x26, #:dtprel_lo12:var] ldrsh x25, [x24, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: strh w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12 -// CHECK-AARCH64: ldrsh x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n // CHECK-ARM64: strh w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 @@ -206,10 +151,6 @@ ldr w23, [x22, #:dtprel_lo12:var] ldrsw x21, [x20, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: ldr w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12 -// CHECK-AARCH64: ldrsw x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n // CHECK-ARM64: ldr w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 @@ -222,10 +163,6 @@ ldr x19, [x18, #:dtprel_lo12:var] str x17, [x16, #:dtprel_lo12_nc:var] -// CHECK-AARCH64: ldr x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12 -// CHECK-AARCH64: str x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc // CHECK-ARM64: ldr x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] // CHECK-ARM64: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 @@ -239,10 +176,6 @@ // TLS initial-exec forms movz x15, #:gottprel_g1:var movz w14, #:gottprel_g1:var -// CHECK-AARCH64: movz x15, #:gottprel_g1:var // encoding: [0x0f'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1 -// CHECK-AARCH64: movz w14, #:gottprel_g1:var // encoding: [0x0e'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1 // CHECK-ARM64: movz x15, #:gottprel_g1:var // encoding: [0bAAA01111,A,0b101AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_arm64_movw @@ -255,10 +188,6 @@ movk x13, #:gottprel_g0_nc:var movk w12, #:gottprel_g0_nc:var -// CHECK-AARCH64: movk x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc -// CHECK-AARCH64: movk w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc // CHECK-ARM64: movk x13, #:gottprel_g0_nc:var // encoding: [0bAAA01101,A,0b100AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_arm64_movw @@ -272,12 +201,6 @@ adrp x11, :gottprel:var ldr x10, [x0, #:gottprel_lo12:var] ldr x9, :gottprel:var -// CHECK-AARCH64: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page -// CHECK-AARCH64: ldr x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc -// CHECK-AARCH64: ldr x9, :gottprel:var // encoding: [0x09'A',A,A,0x58'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19 // CHECK-ARM64: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] // CHECK-ARM64: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_arm64_pcrel_adrp_imm21 @@ -294,10 +217,6 @@ // TLS local-exec forms movz x3, #:tprel_g2:var movn x4, #:tprel_g2:var -// CHECK-AARCH64: movz x3, #:tprel_g2:var // encoding: [0x03'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2 -// CHECK-AARCH64: movn x4, #:tprel_g2:var // encoding: [0x04'A',A,0xc0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2 // CHECK-ARM64: movz x3, #:tprel_g2:var // encoding: [0bAAA00011,A,0b110AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_arm64_movw @@ -312,14 +231,6 @@ movn x6, #:tprel_g1:var movz w7, #:tprel_g1:var movn w8, #:tprel_g1:var -// CHECK-AARCH64: movz x5, #:tprel_g1:var // encoding: [0x05'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 -// CHECK-AARCH64: movn x6, #:tprel_g1:var // encoding: [0x06'A',A,0xa0'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 -// CHECK-AARCH64: movz w7, #:tprel_g1:var // encoding: [0x07'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 -// CHECK-AARCH64: movn w8, #:tprel_g1:var // encoding: [0x08'A',A,0xa0'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1 // CHECK-ARM64: movz x5, #:tprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_arm64_movw @@ -338,10 +249,6 @@ movk x9, #:tprel_g1_nc:var movk w10, #:tprel_g1_nc:var -// CHECK-AARCH64: movk x9, #:tprel_g1_nc:var // encoding: [0x09'A',A,0xa0'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc -// CHECK-AARCH64: movk w10, #:tprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc // CHECK-ARM64: movk x9, #:tprel_g1_nc:var // encoding: [0bAAA01001,A,0b101AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_arm64_movw @@ -356,14 +263,6 @@ movn x12, #:tprel_g0:var movz w13, #:tprel_g0:var movn w14, #:tprel_g0:var -// CHECK-AARCH64: movz x11, #:tprel_g0:var // encoding: [0x0b'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 -// CHECK-AARCH64: movn x12, #:tprel_g0:var // encoding: [0x0c'A',A,0x80'A',0x92'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 -// CHECK-AARCH64: movz w13, #:tprel_g0:var // encoding: [0x0d'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 -// CHECK-AARCH64: movn w14, #:tprel_g0:var // encoding: [0x0e'A',A,0x80'A',0x12'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0 // CHECK-ARM64: movz x11, #:tprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_arm64_movw @@ -382,10 +281,6 @@ movk x15, #:tprel_g0_nc:var movk w16, #:tprel_g0_nc:var -// CHECK-AARCH64: movk x15, #:tprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc -// CHECK-AARCH64: movk w16, #:tprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc // CHECK-ARM64: movk x15, #:tprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_arm64_movw @@ -398,10 +293,6 @@ add x17, x18, #:tprel_hi12:var, lsl #12 add w19, w20, #:tprel_hi12:var, lsl #12 -// CHECK-AARCH64: add x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12 -// CHECK-AARCH64: add w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12 // CHECK-ARM64: add x17, x18, :tprel_hi12:var, lsl #12 // encoding: [0x51,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_arm64_add_imm12 @@ -414,10 +305,6 @@ add x21, x22, #:tprel_lo12:var add w23, w24, #:tprel_lo12:var -// CHECK-AARCH64: add x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12 -// CHECK-AARCH64: add w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12 // CHECK-ARM64: add x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_add_imm12 @@ -430,10 +317,6 @@ add x25, x26, #:tprel_lo12_nc:var add w27, w28, #:tprel_lo12_nc:var -// CHECK-AARCH64: add x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc -// CHECK-AARCH64: add w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc // CHECK-ARM64: add x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_arm64_add_imm12 @@ -446,10 +329,6 @@ ldrb w29, [x30, #:tprel_lo12:var] ldrsb x29, [x28, #:tprel_lo12_nc:var] -// CHECK-AARCH64: ldrb w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12 -// CHECK-AARCH64: ldrsb x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc // CHECK-ARM64: ldrb w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale1 @@ -462,10 +341,6 @@ strh w27, [x26, #:tprel_lo12:var] ldrsh x25, [x24, #:tprel_lo12_nc:var] -// CHECK-AARCH64: strh w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12 -// CHECK-AARCH64: ldrsh x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n // CHECK-ARM64: strh w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale2 @@ -478,10 +353,6 @@ ldr w23, [x22, #:tprel_lo12:var] ldrsw x21, [x20, #:tprel_lo12_nc:var] -// CHECK-AARCH64: ldr w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12 -// CHECK-AARCH64: ldrsw x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n // CHECK-ARM64: ldr w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale4 @@ -493,10 +364,6 @@ ldr x19, [x18, #:tprel_lo12:var] str x17, [x16, #:tprel_lo12_nc:var] -// CHECK-AARCH64: ldr x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12 -// CHECK-AARCH64: str x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc // CHECK-ARM64: ldr x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] // CHECK-ARM64: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_arm64_ldst_imm12_scale8 @@ -513,15 +380,6 @@ .tlsdesccall var blr x3 -// CHECK-AARCH64: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page -// CHECK-AARCH64: ldr x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc -// CHECK-AARCH64: add x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A'] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc -// CHECK-AARCH64: .tlsdesccall var // encoding: [] -// CHECK-AARCH64-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call -// CHECK-AARCH64: blr x3 // encoding: [0x60,0x00,0x3f,0xd6] // CHECK-ARM64: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] // CHECK-ARM64: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_arm64_pcrel_adrp_imm21 diff --git a/test/MC/AArch64/trace-regs-diagnostics.s b/test/MC/AArch64/trace-regs-diagnostics.s index 04f9d27735..fa57817dd3 100644 --- a/test/MC/AArch64/trace-regs-diagnostics.s +++ b/test/MC/AArch64/trace-regs-diagnostics.s @@ -1,4 +1,3 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s // RUN: not llvm-mc -triple arm64-none-linux-gnu < %s 2>&1 | FileCheck %s // Write-only mrs x12, trcoslar diff --git a/test/MC/AArch64/trace-regs.s b/test/MC/AArch64/trace-regs.s index b763e67c91..be25f08947 100644 --- a/test/MC/AArch64/trace-regs.s +++ b/test/MC/AArch64/trace-regs.s @@ -1,4 +1,3 @@ -// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s // RUN: llvm-mc -triple=arm64-none-linux-gnu -show-encoding < %s | FileCheck %s mrs x8, trcstatr diff --git a/test/MC/Disassembler/AArch64/lit.local.cfg b/test/MC/Disassembler/AArch64/lit.local.cfg index c6f83453ac..6536974148 100644 --- a/test/MC/Disassembler/AArch64/lit.local.cfg +++ b/test/MC/Disassembler/AArch64/lit.local.cfg @@ -1,4 +1,4 @@ targets = set(config.root.targets_to_build.split()) -if 'AArch64' not in targets or 'ARM64' not in targets: +if 'ARM64' not in targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg index a24a72819c..f1d1f88cf3 100644 --- a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg +++ b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg @@ -1,6 +1,6 @@ config.suffixes = ['.ll'] targets = set(config.root.targets_to_build.split()) -if not 'AArch64' in targets: +if not 'ARM64' in targets: config.unsupported = True |