diff options
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 34 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-ext.ll | 3 |
2 files changed, 23 insertions, 14 deletions
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 1180485b72..3056343abb 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -6233,23 +6233,21 @@ multiclass NeonI_ext<string prefix, SDNode ExtOp> { (v8i16 (!cast<Instruction>(prefix # "_8B") (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), sub_16)>; - - // v1i8 -> v1i32 - def : Pat<(v1i32 (ExtOp (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v4i32 (!cast<Instruction>(prefix # "_4H") - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (!cast<Instruction>(prefix # "_8B") - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)>; } defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>; defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>; +// zext v1i8 -> v1i32 +def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))), + (v1i32 (EXTRACT_SUBREG + (v1i64 (SUBREG_TO_REG (i64 0), + (v1i8 (DUPbv_B + (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), + 0)), + sub_8)), + sub_32))>; + // zext v1i8 -> v1i64 def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))), (v1i64 (SUBREG_TO_REG (i64 0), @@ -6266,6 +6264,18 @@ def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))), 0)), sub_16))>; +// sext v1i8 -> v1i32 +def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))), + (EXTRACT_SUBREG + (v4i32 (SSHLLvvi_4H + (v4i16 (SUBREG_TO_REG (i64 0), + (v1i16 (EXTRACT_SUBREG + (v8i16 (SSHLLvvi_8B + (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), + sub_16)), + sub_16)), 0)), + sub_32)>; + // sext v1i8 -> v1i64 def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))), (EXTRACT_SUBREG diff --git a/test/CodeGen/AArch64/neon-scalar-ext.ll b/test/CodeGen/AArch64/neon-scalar-ext.ll index 4e574237e8..51dea06f42 100644 --- a/test/CodeGen/AArch64/neon-scalar-ext.ll +++ b/test/CodeGen/AArch64/neon-scalar-ext.ll @@ -29,8 +29,7 @@ define <1 x i16> @test_zext_v1i8_v1i16(<8 x i8> %v) nounwind readnone { define <1 x i32> @test_zext_v1i8_v1i32(<8 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_zext_v1i8_v1i32: -; CHECK: ushll v0.8h, v0.8b, #0 -; CHECK: ushll v0.4s, v0.4h, #0 +; CHECK: dup b0, v0.b[0] %1 = extractelement <8 x i8> %v, i32 0 %2 = insertelement <1 x i8> undef, i8 %1, i32 0 %3 = zext <1 x i8> %2 to <1 x i32> |