summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2014-04-01 10:37:09 +0000
committerTim Northover <tnorthover@apple.com>2014-04-01 10:37:09 +0000
commitacfb67961848bc2127da4a2b179c3ea762f4a9b2 (patch)
treeb9a80a7f98b2da3cde0f2546315903caf4003963
parent233c567ea9f3bfe1510b2566cdb2559a4668c400 (diff)
downloadllvm-acfb67961848bc2127da4a2b179c3ea762f4a9b2.tar.gz
llvm-acfb67961848bc2127da4a2b179c3ea762f4a9b2.tar.bz2
llvm-acfb67961848bc2127da4a2b179c3ea762f4a9b2.tar.xz
ARM64: add patterns for more lane-wise ld1/st1 operations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205294 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM64/ARM64InstrFormats.td26
-rw-r--r--lib/Target/ARM64/ARM64InstrInfo.td113
-rw-r--r--test/CodeGen/ARM64/ld1.ll140
-rw-r--r--test/CodeGen/ARM64/st1.ll128
4 files changed, 268 insertions, 139 deletions
diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td
index 440bf4f3a1..cf8c5037f6 100644
--- a/lib/Target/ARM64/ARM64InstrFormats.td
+++ b/lib/Target/ARM64/ARM64InstrFormats.td
@@ -7971,8 +7971,7 @@ multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm,
}
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
- RegisterOperand listtype,
- RegisterOperand GPR64pi> {
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm,
(outs listtype:$dst),
(ins listtype:$Vt, VectorIndexD:$idx,
@@ -7985,12 +7984,10 @@ multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
- RegisterOperand listtype, list<dag> pattern,
- RegisterOperand GPR64pi> {
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
def i8 : SIMDLdStSingleB<0, R, opcode, asm,
(outs), (ins listtype:$Vt, VectorIndexB:$idx,
- am_simdnoindex:$vaddr),
- pattern>;
+ am_simdnoindex:$vaddr), []>;
def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm,
(outs), (ins listtype:$Vt, VectorIndexB:$idx,
@@ -7998,12 +7995,10 @@ multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
- RegisterOperand listtype, list<dag> pattern,
- RegisterOperand GPR64pi> {
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
def i16 : SIMDLdStSingleH<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexH:$idx,
- am_simdnoindex:$vaddr),
- pattern>;
+ am_simdnoindex:$vaddr), []>;
def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexH:$idx,
@@ -8011,12 +8006,10 @@ multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
- RegisterOperand listtype, list<dag> pattern,
- RegisterOperand GPR64pi> {
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
def i32 : SIMDLdStSingleS<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexS:$idx,
- am_simdnoindex:$vaddr),
- pattern>;
+ am_simdnoindex:$vaddr), []>;
def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexS:$idx,
@@ -8024,11 +8017,10 @@ multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm,
- RegisterOperand listtype, list<dag> pattern,
- RegisterOperand GPR64pi> {
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
def i64 : SIMDLdStSingleD<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexD:$idx,
- am_simdnoindex:$vaddr), pattern>;
+ am_simdnoindex:$vaddr), []>;
def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexD:$idx,
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td
index 9fc4e7a2ea..c9a714b8dd 100644
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ b/lib/Target/ARM64/ARM64InstrInfo.td
@@ -4087,18 +4087,32 @@ def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
(LD1Rv1d am_simdnoindex:$vaddr)>;
-def : Pat<(vector_insert (v16i8 VecListOne128:$Rd),
- (i32 (extloadi8 am_simdnoindex:$vaddr)), VectorIndexB:$idx),
- (LD1i8 VecListOne128:$Rd, VectorIndexB:$idx, am_simdnoindex:$vaddr)>;
-def : Pat<(vector_insert (v8i16 VecListOne128:$Rd),
- (i32 (extloadi16 am_simdnoindex:$vaddr)), VectorIndexH:$idx),
- (LD1i16 VecListOne128:$Rd, VectorIndexH:$idx, am_simdnoindex:$vaddr)>;
-def : Pat<(vector_insert (v4i32 VecListOne128:$Rd),
- (i32 (load am_simdnoindex:$vaddr)), VectorIndexS:$idx),
- (LD1i32 VecListOne128:$Rd, VectorIndexS:$idx, am_simdnoindex:$vaddr)>;
-def : Pat<(vector_insert (v2i64 VecListOne128:$Rd),
- (i64 (load am_simdnoindex:$vaddr)), VectorIndexD:$idx),
- (LD1i64 VecListOne128:$Rd, VectorIndexD:$idx, am_simdnoindex:$vaddr)>;
+class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1>
+ : Pat<(vector_insert (VTy VecListOne128:$Rd),
+ (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
+ (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>;
+
+def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
+def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
+def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
+def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
+def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
+
+class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1>
+ : Pat<(vector_insert (VTy VecListOne64:$Rd),
+ (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
+ (EXTRACT_SUBREG
+ (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
+ VecIndex:$idx, am_simdnoindex:$vaddr),
+ dsub)>;
+
+def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
+def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
+def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
@@ -4107,38 +4121,53 @@ defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
// Stores
-let AddedComplexity = 8 in {
-defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb,
- [(truncstorei8
- (i32 (vector_extract (v16i8 VecListOneb:$Vt), VectorIndexB:$idx)),
- am_simdnoindex:$vaddr)], GPR64pi1>;
-defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh,
- [(truncstorei16
- (i32 (vector_extract (v8i16 VecListOneh:$Vt), VectorIndexH:$idx)),
- am_simdnoindex:$vaddr)], GPR64pi2>;
-defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes,
- [(store
- (i32 (vector_extract (v4i32 VecListOnes:$Vt), VectorIndexS:$idx)),
- am_simdnoindex:$vaddr)], GPR64pi4>;
-defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned,
- [(store
- (i64 (vector_extract (v2i64 VecListOned:$Vt), VectorIndexD:$idx)),
- am_simdnoindex:$vaddr)], GPR64pi8>;
-}
+defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
+defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
+defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
+defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
+
+let AddedComplexity = 8 in
+class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1>
+ : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ am_simdnoindex:$vaddr),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>;
+
+def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
+def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
+def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
+def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
+def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
+
+let AddedComplexity = 8 in
+class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1>
+ : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ am_simdnoindex:$vaddr),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, am_simdnoindex:$vaddr)>;
+
+def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
+def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
+def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
let mayStore = 1, neverHasSideEffects = 1 in {
-defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, [], GPR64pi2>;
-defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, [], GPR64pi4>;
-defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, [], GPR64pi8>;
-defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, [], GPR64pi16>;
-defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, [], GPR64pi3>;
-defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, [], GPR64pi6>;
-defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, [], GPR64pi12>;
-defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, [], GPR64pi24>;
-defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, [], GPR64pi4>;
-defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, [], GPR64pi8>;
-defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, [], GPR64pi16>;
-defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, [], GPR64pi32>;
+defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
+defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
+defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
+defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
+defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
+defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
+defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
+defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
+defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
+defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
+defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
+defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
}
defm ST1 : SIMDLdSt1SingleAliases<"st1">;
diff --git a/test/CodeGen/ARM64/ld1.ll b/test/CodeGen/ARM64/ld1.ll
index f2fd55ce2c..61836a10a8 100644
--- a/test/CodeGen/ARM64/ld1.ll
+++ b/test/CodeGen/ARM64/ld1.ll
@@ -5,7 +5,7 @@
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
-; CHECK: ld2_8b
+; CHECK-LABEL: ld2_8b
; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
; and from the argument of the function also defined by ABI (i.e., x0)
; CHECK ld2.8b { v0, v1 }, [x0]
@@ -15,7 +15,7 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
}
define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
-; CHECK: ld3_8b
+; CHECK-LABEL: ld3_8b
; Make sure we are using the operands defined by the ABI
; CHECK ld3.8b { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -24,7 +24,7 @@ define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
}
define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
-; CHECK: ld4_8b
+; CHECK-LABEL: ld4_8b
; Make sure we are using the operands defined by the ABI
; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -41,7 +41,7 @@ declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) nounwind r
%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
-; CHECK: ld2_16b
+; CHECK-LABEL: ld2_16b
; Make sure we are using the operands defined by the ABI
; CHECK ld2.16b { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -50,7 +50,7 @@ define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
}
define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
-; CHECK: ld3_16b
+; CHECK-LABEL: ld3_16b
; Make sure we are using the operands defined by the ABI
; CHECK ld3.16b { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -59,7 +59,7 @@ define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
}
define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
-; CHECK: ld4_16b
+; CHECK-LABEL: ld4_16b
; Make sure we are using the operands defined by the ABI
; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -76,7 +76,7 @@ declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) nounwind
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
-; CHECK: ld2_4h
+; CHECK-LABEL: ld2_4h
; Make sure we are using the operands defined by the ABI
; CHECK ld2.4h { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -85,7 +85,7 @@ define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
}
define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
-; CHECK: ld3_4h
+; CHECK-LABEL: ld3_4h
; Make sure we are using the operands defined by the ABI
; CHECK ld3.4h { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -94,7 +94,7 @@ define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
}
define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
-; CHECK: ld4_4h
+; CHECK-LABEL: ld4_4h
; Make sure we are using the operands defined by the ABI
; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -111,7 +111,7 @@ declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwi
%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
-; CHECK: ld2_8h
+; CHECK-LABEL: ld2_8h
; Make sure we are using the operands defined by the ABI
; CHECK ld2.8h { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -120,7 +120,7 @@ define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
}
define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
-; CHECK: ld3_8h
+; CHECK-LABEL: ld3_8h
; Make sure we are using the operands defined by the ABI
; CHECK ld3.8h { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -129,7 +129,7 @@ define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
}
define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
-; CHECK: ld4_8h
+; CHECK-LABEL: ld4_8h
; Make sure we are using the operands defined by the ABI
; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -146,7 +146,7 @@ declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwi
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
-; CHECK: ld2_2s
+; CHECK-LABEL: ld2_2s
; Make sure we are using the operands defined by the ABI
; CHECK ld2.2s { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -155,7 +155,7 @@ define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
}
define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
-; CHECK: ld3_2s
+; CHECK-LABEL: ld3_2s
; Make sure we are using the operands defined by the ABI
; CHECK ld3.2s { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -164,7 +164,7 @@ define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
}
define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
-; CHECK: ld4_2s
+; CHECK-LABEL: ld4_2s
; Make sure we are using the operands defined by the ABI
; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -181,7 +181,7 @@ declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwi
%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
-; CHECK: ld2_4s
+; CHECK-LABEL: ld2_4s
; Make sure we are using the operands defined by the ABI
; CHECK ld2.4s { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -190,7 +190,7 @@ define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
}
define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
-; CHECK: ld3_4s
+; CHECK-LABEL: ld3_4s
; Make sure we are using the operands defined by the ABI
; CHECK ld3.4s { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -199,7 +199,7 @@ define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
}
define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
-; CHECK: ld4_4s
+; CHECK-LABEL: ld4_4s
; Make sure we are using the operands defined by the ABI
; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -216,7 +216,7 @@ declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwi
%struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }
define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
-; CHECK: ld2_2d
+; CHECK-LABEL: ld2_2d
; Make sure we are using the operands defined by the ABI
; CHECK ld2.2d { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -225,7 +225,7 @@ define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
}
define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
-; CHECK: ld3_2d
+; CHECK-LABEL: ld3_2d
; Make sure we are using the operands defined by the ABI
; CHECK ld3.2d { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -234,7 +234,7 @@ define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
}
define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
-; CHECK: ld4_2d
+; CHECK-LABEL: ld4_2d
; Make sure we are using the operands defined by the ABI
; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -252,7 +252,7 @@ declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwi
define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
-; CHECK: ld2_1di64
+; CHECK-LABEL: ld2_1di64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -261,7 +261,7 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
}
define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
-; CHECK: ld3_1di64
+; CHECK-LABEL: ld3_1di64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -270,7 +270,7 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
}
define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
-; CHECK: ld4_1di64
+; CHECK-LABEL: ld4_1di64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -289,7 +289,7 @@ declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwi
define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
-; CHECK: ld2_1df64
+; CHECK-LABEL: ld2_1df64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1 }, [x0]
; CHECK-NEXT ret
@@ -298,7 +298,7 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
}
define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
-; CHECK: ld3_1df64
+; CHECK-LABEL: ld3_1df64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@@ -307,7 +307,7 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
}
define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
-; CHECK: ld4_1df64
+; CHECK-LABEL: ld4_1df64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@@ -800,7 +800,7 @@ declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounw
declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
-; CHECK: ld1_16b
+; CHECK-LABEL: ld1_16b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.b { v0 }[0], [x0]
; CHECK-NEXT ret
@@ -810,7 +810,7 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
}
define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
-; CHECK: ld1_8h
+; CHECK-LABEL: ld1_8h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.h { v0 }[0], [x0]
; CHECK-NEXT ret
@@ -820,7 +820,7 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
}
define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
-; CHECK: ld1_4s
+; CHECK-LABEL: ld1_4s
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
; CHECK-NEXT ret
@@ -829,8 +829,18 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
ret <4 x i32> %tmp2
}
+define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
+; CHECK-LABEL: ld1_4s_float:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+ %tmp1 = load float* %bar
+ %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
+ ret <4 x float> %tmp2
+}
+
define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
-; CHECK: ld1_2d
+; CHECK-LABEL: ld1_2d
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.d { v0 }[0], [x0]
; CHECK-NEXT ret
@@ -839,8 +849,18 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
ret <2 x i64> %tmp2
}
+define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
+; CHECK-LABEL: ld1_2d_double:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.d { v0 }[0], [x0]
+; CHECK-NEXT ret
+ %tmp1 = load double* %bar
+ %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
+ ret <2 x double> %tmp2
+}
+
define <1 x i64> @ld1_1d(<1 x i64>* %p) {
-; CHECK: ld1_1d
+; CHECK-LABEL: ld1_1d
; Make sure we are using the operands defined by the ABI
; CHECK: ldr [[REG:d[0-9]+]], [x0]
; CHECK-NEXT: ret
@@ -848,6 +868,46 @@ define <1 x i64> @ld1_1d(<1 x i64>* %p) {
ret <1 x i64> %tmp
}
+define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
+; CHECK-LABEL: ld1_8b
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.b { v0 }[0], [x0]
+; CHECK-NEXT ret
+ %tmp1 = load i8* %bar
+ %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
+ ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
+; CHECK-LABEL: ld1_4h
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.h { v0 }[0], [x0]
+; CHECK-NEXT ret
+ %tmp1 = load i16* %bar
+ %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
+ ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
+; CHECK-LABEL: ld1_2s:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+ %tmp1 = load i32* %bar
+ %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
+ ret <2 x i32> %tmp2
+}
+
+define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
+; CHECK-LABEL: ld1_2s_float:
+; Make sure we are using the operands defined by the ABI
+; CHECK: ld1.s { v0 }[0], [x0]
+; CHECK-NEXT ret
+ %tmp1 = load float* %bar
+ %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
+ ret <2 x float> %tmp2
+}
+
; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
@@ -882,7 +942,7 @@ entry:
; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
define <4 x float> @ld1r_4s_float(float* nocapture %x) {
entry:
-; CHECK: ld1r_4s_float
+; CHECK-LABEL: ld1r_4s_float
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
; CHECK-NEXT ret
@@ -896,7 +956,7 @@ entry:
define <2 x float> @ld1r_2s_float(float* nocapture %x) {
entry:
-; CHECK: ld1r_2s_float
+; CHECK-LABEL: ld1r_2s_float
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
; CHECK-NEXT ret
@@ -908,7 +968,7 @@ entry:
define <2 x double> @ld1r_2d_double(double* nocapture %x) {
entry:
-; CHECK: ld1r_2d_double
+; CHECK-LABEL: ld1r_2d_double
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
; CHECK-NEXT ret
@@ -920,7 +980,7 @@ entry:
define <1 x double> @ld1r_1d_double(double* nocapture %x) {
entry:
-; CHECK: ld1r_1d_double
+; CHECK-LABEL: ld1r_1d_double
; Make sure we are using the operands defined by the ABI
; CHECK: ldr d0, [x0]
; CHECK-NEXT ret
@@ -931,7 +991,7 @@ entry:
define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
entry:
-; CHECK: ld1r_4s_float_shuff
+; CHECK-LABEL: ld1r_4s_float_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
; CHECK-NEXT ret
@@ -943,7 +1003,7 @@ entry:
define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
entry:
-; CHECK: ld1r_2s_float_shuff
+; CHECK-LABEL: ld1r_2s_float_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
; CHECK-NEXT ret
@@ -955,7 +1015,7 @@ entry:
define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
entry:
-; CHECK: ld1r_2d_double_shuff
+; CHECK-LABEL: ld1r_2d_double_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
; CHECK-NEXT ret
@@ -967,7 +1027,7 @@ entry:
define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
entry:
-; CHECK: ld1r_1d_double_shuff
+; CHECK-LABEL: ld1r_1d_double_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ldr d0, [x0]
; CHECK-NEXT ret
diff --git a/test/CodeGen/ARM64/st1.ll b/test/CodeGen/ARM64/st1.ll
index 3c0d3ecc04..b9aafc60e7 100644
--- a/test/CodeGen/ARM64/st1.ll
+++ b/test/CodeGen/ARM64/st1.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
define void @st1lane_16b(<16 x i8> %A, i8* %D) {
-; CHECK: st1lane_16b
+; CHECK-LABEL: st1lane_16b
; CHECK: st1.b
%tmp = extractelement <16 x i8> %A, i32 1
store i8 %tmp, i8* %D
@@ -9,7 +9,7 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) {
}
define void @st1lane_8h(<8 x i16> %A, i16* %D) {
-; CHECK: st1lane_8h
+; CHECK-LABEL: st1lane_8h
; CHECK: st1.h
%tmp = extractelement <8 x i16> %A, i32 1
store i16 %tmp, i16* %D
@@ -17,44 +17,92 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) {
}
define void @st1lane_4s(<4 x i32> %A, i32* %D) {
-; CHECK: st1lane_4s
+; CHECK-LABEL: st1lane_4s
; CHECK: st1.s
%tmp = extractelement <4 x i32> %A, i32 1
store i32 %tmp, i32* %D
ret void
}
+define void @st1lane_4s_float(<4 x float> %A, float* %D) {
+; CHECK-LABEL: st1lane_4s_float
+; CHECK: st1.s
+ %tmp = extractelement <4 x float> %A, i32 1
+ store float %tmp, float* %D
+ ret void
+}
+
define void @st1lane_2d(<2 x i64> %A, i64* %D) {
-; CHECK: st1lane_2d
+; CHECK-LABEL: st1lane_2d
; CHECK: st1.d
%tmp = extractelement <2 x i64> %A, i32 1
store i64 %tmp, i64* %D
ret void
}
+define void @st1lane_2d_double(<2 x double> %A, double* %D) {
+; CHECK-LABEL: st1lane_2d_double
+; CHECK: st1.d
+ %tmp = extractelement <2 x double> %A, i32 1
+ store double %tmp, double* %D
+ ret void
+}
+
+define void @st1lane_8b(<8 x i8> %A, i8* %D) {
+; CHECK-LABEL: st1lane_8b
+; CHECK: st1.b
+ %tmp = extractelement <8 x i8> %A, i32 1
+ store i8 %tmp, i8* %D
+ ret void
+}
+
+define void @st1lane_4h(<4 x i16> %A, i16* %D) {
+; CHECK-LABEL: st1lane_4h
+; CHECK: st1.h
+ %tmp = extractelement <4 x i16> %A, i32 1
+ store i16 %tmp, i16* %D
+ ret void
+}
+
+define void @st1lane_2s(<2 x i32> %A, i32* %D) {
+; CHECK-LABEL: st1lane_2s
+; CHECK: st1.s
+ %tmp = extractelement <2 x i32> %A, i32 1
+ store i32 %tmp, i32* %D
+ ret void
+}
+
+define void @st1lane_2s_float(<2 x float> %A, float* %D) {
+; CHECK-LABEL: st1lane_2s_float
+; CHECK: st1.s
+ %tmp = extractelement <2 x float> %A, i32 1
+ store float %tmp, float* %D
+ ret void
+}
+
define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
-; CHECK: st2lane_16b
+; CHECK-LABEL: st2lane_16b
; CHECK: st2.b
call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
ret void
}
define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
-; CHECK: st2lane_8h
+; CHECK-LABEL: st2lane_8h
; CHECK: st2.h
call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
ret void
}
define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
-; CHECK: st2lane_4s
+; CHECK-LABEL: st2lane_4s
; CHECK: st2.s
call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
ret void
}
define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
-; CHECK: st2lane_2d
+; CHECK-LABEL: st2lane_2d
; CHECK: st2.d
call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
ret void
@@ -66,28 +114,28 @@ declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32
declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
-; CHECK: st3lane_16b
+; CHECK-LABEL: st3lane_16b
; CHECK: st3.b
call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
ret void
}
define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
-; CHECK: st3lane_8h
+; CHECK-LABEL: st3lane_8h
; CHECK: st3.h
call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
ret void
}
define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
-; CHECK: st3lane_4s
+; CHECK-LABEL: st3lane_4s
; CHECK: st3.s
call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
ret void
}
define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
-; CHECK: st3lane_2d
+; CHECK-LABEL: st3lane_2d
; CHECK: st3.d
call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
ret void
@@ -99,28 +147,28 @@ declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32
declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
-; CHECK: st4lane_16b
+; CHECK-LABEL: st4lane_16b
; CHECK: st4.b
call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
ret void
}
define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
-; CHECK: st4lane_8h
+; CHECK-LABEL: st4lane_8h
; CHECK: st4.h
call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
ret void
}
define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
-; CHECK: st4lane_4s
+; CHECK-LABEL: st4lane_4s
; CHECK: st4.s
call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
ret void
}
define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
-; CHECK: st4lane_2d
+; CHECK-LABEL: st4lane_2d
; CHECK: st4.d
call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
ret void
@@ -133,21 +181,21 @@ declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64
define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
-; CHECK: st2_8b
+; CHECK-LABEL: st2_8b
; CHECK st2.8b
call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
ret void
}
define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
-; CHECK: st3_8b
+; CHECK-LABEL: st3_8b
; CHECK st3.8b
call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
ret void
}
define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
-; CHECK: st4_8b
+; CHECK-LABEL: st4_8b
; CHECK st4.8b
call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
ret void
@@ -158,21 +206,21 @@ declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) n
declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
-; CHECK: st2_16b
+; CHECK-LABEL: st2_16b
; CHECK st2.16b
call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
ret void
}
define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
-; CHECK: st3_16b
+; CHECK-LABEL: st3_16b
; CHECK st3.16b
call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
ret void
}
define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
-; CHECK: st4_16b
+; CHECK-LABEL: st4_16b
; CHECK st4.16b
call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
ret void
@@ -183,21 +231,21 @@ declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8
declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
-; CHECK: st2_4h
+; CHECK-LABEL: st2_4h
; CHECK st2.4h
call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
ret void
}
define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
-; CHECK: st3_4h
+; CHECK-LABEL: st3_4h
; CHECK st3.4h
call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
ret void
}
define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
-; CHECK: st4_4h
+; CHECK-LABEL: st4_4h
; CHECK st4.4h
call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
ret void
@@ -208,21 +256,21 @@ declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i
declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
-; CHECK: st2_8h
+; CHECK-LABEL: st2_8h
; CHECK st2.8h
call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
ret void
}
define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
-; CHECK: st3_8h
+; CHECK-LABEL: st3_8h
; CHECK st3.8h
call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
ret void
}
define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
-; CHECK: st4_8h
+; CHECK-LABEL: st4_8h
; CHECK st4.8h
call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
ret void
@@ -233,21 +281,21 @@ declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i
declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
-; CHECK: st2_2s
+; CHECK-LABEL: st2_2s
; CHECK st2.2s
call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
ret void
}
define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
-; CHECK: st3_2s
+; CHECK-LABEL: st3_2s
; CHECK st3.2s
call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
ret void
}
define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
-; CHECK: st4_2s
+; CHECK-LABEL: st4_2s
; CHECK st4.2s
call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
ret void
@@ -258,21 +306,21 @@ declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i
declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
-; CHECK: st2_4s
+; CHECK-LABEL: st2_4s
; CHECK st2.4s
call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
ret void
}
define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
-; CHECK: st3_4s
+; CHECK-LABEL: st3_4s
; CHECK st3.4s
call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
ret void
}
define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
-; CHECK: st4_4s
+; CHECK-LABEL: st4_4s
; CHECK st4.4s
call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
ret void
@@ -283,21 +331,21 @@ declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i
declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
-; CHECK: st2_1d
+; CHECK-LABEL: st2_1d
; CHECK st1.2d
call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
ret void
}
define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
-; CHECK: st3_1d
+; CHECK-LABEL: st3_1d
; CHECK st1.3d
call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
ret void
}
define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
-; CHECK: st4_1d
+; CHECK-LABEL: st4_1d
; CHECK st1.4d
call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
ret void
@@ -308,21 +356,21 @@ declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i
declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
-; CHECK: st2_2d
+; CHECK-LABEL: st2_2d
; CHECK st2.2d
call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
ret void
}
define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
-; CHECK: st3_2d
+; CHECK-LABEL: st3_2d
; CHECK st2.3d
call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
ret void
}
define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
-; CHECK: st4_2d
+; CHECK-LABEL: st4_2d
; CHECK st2.4d
call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
ret void