summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td34
-rw-r--r--test/CodeGen/AArch64/neon-scalar-ext.ll3
2 files changed, 23 insertions, 14 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 1180485b72..3056343abb 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -6233,23 +6233,21 @@ multiclass NeonI_ext<string prefix, SDNode ExtOp> {
(v8i16 (!cast<Instruction>(prefix # "_8B")
(v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
sub_16)>;
-
- // v1i8 -> v1i32
- def : Pat<(v1i32 (ExtOp (v1i8 FPR8:$Rn))),
- (EXTRACT_SUBREG
- (v4i32 (!cast<Instruction>(prefix # "_4H")
- (v4i16 (SUBREG_TO_REG (i64 0),
- (v1i16 (EXTRACT_SUBREG
- (v8i16 (!cast<Instruction>(prefix # "_8B")
- (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
- sub_16)),
- sub_16)), 0)),
- sub_32)>;
}
defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
+// zext v1i8 -> v1i32
+def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))),
+ (v1i32 (EXTRACT_SUBREG
+ (v1i64 (SUBREG_TO_REG (i64 0),
+ (v1i8 (DUPbv_B
+ (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
+ 0)),
+ sub_8)),
+ sub_32))>;
+
// zext v1i8 -> v1i64
def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
(v1i64 (SUBREG_TO_REG (i64 0),
@@ -6266,6 +6264,18 @@ def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))),
0)),
sub_16))>;
+// sext v1i8 -> v1i32
+def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))),
+ (EXTRACT_SUBREG
+ (v4i32 (SSHLLvvi_4H
+ (v4i16 (SUBREG_TO_REG (i64 0),
+ (v1i16 (EXTRACT_SUBREG
+ (v8i16 (SSHLLvvi_8B
+ (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
+ sub_16)),
+ sub_16)), 0)),
+ sub_32)>;
+
// sext v1i8 -> v1i64
def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
(EXTRACT_SUBREG
diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll
index 4e574237e8..51dea06f42 100644
--- a/test/CodeGen/AArch64/neon-scalar-ext.ll
+++ b/test/CodeGen/AArch64/neon-scalar-ext.ll
@@ -29,8 +29,7 @@ define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone {
define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_zext_v1i8_v1i32:
-; CHECK: ushll v0.8h, v0.8b, #0
-; CHECK: ushll v0.4s, v0.4h, #0
+; CHECK: dup b0, v0.b[0]
%1 = extractelement <8 x i8> %v, i32 0
%2 = insertelement <1 x i8> undef, i8 %1, i32 0
%3 = zext <1 x i8> %2 to <1 x i32>