summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJyotsna Verma <jverma@codeaurora.org>2013-04-23 21:17:40 +0000
committerJyotsna Verma <jverma@codeaurora.org>2013-04-23 21:17:40 +0000
commit42ba77db537274c797d74ddfa80902e58901529a (patch)
treec9c21e1068e9c1ee910fabba2f590f44293b92fb
parent197c833ee142e9e7c78fa0b667f16cdb09e6f6b5 (diff)
downloadllvm-42ba77db537274c797d74ddfa80902e58901529a.tar.gz
llvm-42ba77db537274c797d74ddfa80902e58901529a.tar.bz2
llvm-42ba77db537274c797d74ddfa80902e58901529a.tar.xz
Hexagon: Use multiclass for combine and STri[bhwd]_shl_V4 instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180145 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td161
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td164
-rw-r--r--test/CodeGen/Hexagon/always-ext.ll45
3 files changed, 194 insertions, 176 deletions
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 6e2637b508..053f8c465c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -83,21 +83,30 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
//===----------------------------------------------------------------------===//
// ALU32/ALU (Instructions with register-register form)
//===----------------------------------------------------------------------===//
-multiclass ALU32_Pbase<string mnemonic, bit isNot,
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+ SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
+
+def HexagonWrapperCombineRR :
+ SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
let isPredicatedNew = isPredNew in
- def NAME : ALU32_rr<(outs IntRegs:$dst),
+ def NAME : ALU32_rr<(outs RC:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
") $dst = ")#mnemonic#"($src2, $src3)",
[]>;
}
-multiclass ALU32_Pred<string mnemonic, bit PredNot> {
+multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
let isPredicatedFalse = PredNot in {
- defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
+ defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
- defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
+ defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>;
}
}
@@ -112,8 +121,8 @@ multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
(i32 IntRegs:$src2)))]>;
let neverHasSideEffects = 1, isPredicated = 1 in {
- defm Pt : ALU32_Pred<mnemonic, 0>;
- defm NotPt : ALU32_Pred<mnemonic, 1>;
+ defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>;
+ defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>;
}
}
}
@@ -127,6 +136,37 @@ let isCommutable = 1 in {
defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+class T_Combine : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2))))]>;
+
+multiclass Combine_base {
+ let BaseOpcode = "combine" in {
+ def NAME : T_Combine;
+ let neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>;
+ defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>;
+ }
+ }
+}
+
+defm COMBINE_rr : Combine_base, PredNewRel;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+ ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
+
//===----------------------------------------------------------------------===//
// ALU32/ALU (ADD with register-immediate form)
//===----------------------------------------------------------------------===//
@@ -344,52 +384,6 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
// ALU32/PERM +
//===----------------------------------------------------------------------===//
-// Combine.
-
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-
-def HexagonWrapperCombineII :
- SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineRR :
- SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
-
-// Combines the two integer registers SRC1 and SRC2 into a double register.
-let isPredicable = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
- IntRegs:$src2),
- "$dst = combine($src1, $src2)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
- (i32 IntRegs:$src2))))]>;
-
-// Rd=combine(Rt.[HL], Rs.[HL])
-class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1,
- IntRegs:$src2),
- "$dst = combine($src1."# A #", $src2."# B #")", []>;
-
-let isPredicable = 1 in {
- def COMBINE_hh : COMBINE_halves<"H", "H">;
- def COMBINE_hl : COMBINE_halves<"H", "L">;
- def COMBINE_lh : COMBINE_halves<"L", "H">;
- def COMBINE_ll : COMBINE_halves<"L", "L">;
-}
-
-def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
- (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
- (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
-
-// Combines the two immediates SRC1 and SRC2 into a double register.
-class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
- ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
- "$dst = combine(#$src1, #$src2)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
-
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
-def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
-
// Mux.
def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
DoubleRegs:$src2,
@@ -492,32 +486,6 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
// ALU32/PRED +
//===----------------------------------------------------------------------===//
-// Conditional combine.
-let neverHasSideEffects = 1, isPredicated = 1 in {
-def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst = combine($src2, $src3)",
- []>;
-
-let isPredicatedFalse = 1 in
-def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst = combine($src2, $src3)",
- []>;
-
-let isPredicatedNew = 1 in
-def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst = combine($src2, $src3)",
- []>;
-
-let isPredicatedNew = 1, isPredicatedFalse = 1 in
-def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst = combine($src2, $src3)",
- []>;
-}
-
// Compare.
defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
@@ -2162,6 +2130,13 @@ def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
(i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
+ (i64 (COMBINE_rr (TFRI 0),
+ (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>,
+ Requires<[NoV4T]>;
+
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
let AddedComplexity = 10 in
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
@@ -2492,6 +2467,13 @@ def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
+let AddedComplexity = 100 in
+def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 10 in
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
(i32 (LDriw ADDRriS11_0:$src1))>;
@@ -2508,6 +2490,27 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
(i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+ s11_2ExtPred:$offset2)))))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (LDriw_indexed IntRegs:$src2,
+ s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zext (i32 IntRegs:$srcLow))))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ IntRegs:$srcLow))>;
+
// Any extended 64-bit load.
// anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 6c3dfd9052..a77606be75 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -588,17 +588,59 @@ def : Pat<(store (i64 DoubleRegs:$src4),
u2ImmPred:$src3, DoubleRegs:$src4)>;
}
-// memd(Ru<<#u2+#U6)=Rtt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
-validSubTargets = HasV4SubT in
-def STrid_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
- "memd($src1<<#$src2+#$src3) = $src4",
- [(store (i64 DoubleRegs:$src4),
+let isExtended = 1, opExtendable = 2 in
+class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> :
+ STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4),
+ mnemonic#"($src1<<#$src2+##$src3) = $src4",
+ [(stOp (VT RC:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
+let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in
+class T_ST_LongOff_nv <string mnemonic> :
+ NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ mnemonic#"($src1<<#$src2+##$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> {
+ let BaseOpcode = BaseOp#"_shl" in {
+ let isNVStorable = 1 in
+ def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>;
+
+ def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>;
+ }
+}
+
+let AddedComplexity = 10, validSubTargets = HasV4SubT in {
+ def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>;
+ defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel;
+ defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel;
+ defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel;
+}
+
+let AddedComplexity = 40 in
+multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT,
+ PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+ (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+ (add IntRegs:$src1,
+ (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+ (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>;
+defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>;
+defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>;
+defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
+
// memd(Rx++#s4:3)=Rtt
// memd(Rx++#s4:3:circ(Mu))=Rtt
// memd(Rx++I:circ(Mu))=Rtt
@@ -676,17 +718,6 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
(STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
-// memb(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrib_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memb($src1<<#$src2+#$src3) = $src4",
- [(truncstorei8 (i32 IntRegs:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
// memb(Rx++#s4:0:circ(Mu))=Rt
// memb(Rx++I:circ(Mu))=Rt
// memb(Rx++Mu)=Rt
@@ -707,17 +738,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// TODO: needs to be implemented.
// memh(Ru<<#u2+#U6)=Rt.H
-// memh(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrih_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memh($src1<<#$src2+#$src3) = $src4",
- [(truncstorei16 (i32 IntRegs:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
// memh(Rx++#s4:1:circ(Mu))=Rt.H
// memh(Rx++#s4:1:circ(Mu))=Rt
// memh(Rx++I:circ(Mu))=Rt.H
@@ -754,17 +774,6 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
(STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
-// memw(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STriw_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memw($src1<<#$src2+#$src3) = $src4",
- [(store (i32 IntRegs:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2)=Rt
// memw(Rx++#s4:2:circ(Mu))=Rt
// memw(Rx++I:circ(Mu))=Rt
@@ -883,15 +892,6 @@ mayStore = 1 in {
defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
}
-// memb(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrib_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memb($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
//===----------------------------------------------------------------------===//
// Post increment store
// mem[bhwd](Rx++#s4:[0123])=Nt.new
@@ -949,29 +949,11 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
// memb(Rx++I:circ(Mu))=Nt.new
// memb(Rx++Mu)=Nt.new
// memb(Rx++Mu:brev)=Nt.new
-// memh(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrih_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memh($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
// memh(Rx++#s4:1:circ(Mu))=Nt.new
// memh(Rx++I:circ(Mu))=Nt.new
// memh(Rx++Mu)=Nt.new
// memh(Rx++Mu:brev)=Nt.new
-// memw(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STriw_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memw($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2:circ(Mu))=Nt.new
// memw(Rx++I:circ(Mu))=Nt.new
// memw(Rx++Mu)=Nt.new
@@ -3071,19 +3053,21 @@ def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
// Load - Indirect with long offset: These instructions take global address
// as an operand
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 3, AddedComplexity = 40,
+validSubTargets = HasV4SubT in
def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
"$dst=memd($src1<<#$src2+##$offset)",
[(set (i64 DoubleRegs:$dst),
(load (add (shl IntRegs:$src1, u2ImmPred:$src2),
(HexagonCONST32 tglobaladdr:$offset))))]>,
Requires<[HasV4T]>;
-let AddedComplexity = 10 in
+let AddedComplexity = 40 in
multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in
def _lo_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
!strconcat("$dst = ",
!strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
[(set IntRegs:$dst,
@@ -3094,37 +3078,23 @@ multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>;
defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>;
defm LDriw_ind : LD_indirect_lo<"memw", load>;
-// Store - Indirect with long offset: These instructions take global address
-// as an operand
-let AddedComplexity = 10 in
-def STrid_ind_lo_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
- DoubleRegs:$src4),
- "memd($src1<<#$src2+#$src3) = $src4",
- [(store (i64 DoubleRegs:$src4),
- (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$src3)))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10 in
-multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
- def _lo_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
- IntRegs:$src4),
- !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
- [(OpNode (i32 IntRegs:$src4),
- (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$src3)))]>,
- Requires<[HasV4T]>;
-}
+let AddedComplexity = 40 in
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+ (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+ (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+ Requires<[HasV4T]>;
-defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
-defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
-defm STriw_ind : ST_indirect_lo<"memw", store>;
+let AddedComplexity = 40 in
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+ (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+ (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+ Requires<[HasV4T]>;
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
diff --git a/test/CodeGen/Hexagon/always-ext.ll b/test/CodeGen/Hexagon/always-ext.ll
new file mode 100644
index 0000000000..9c8d708ba8
--- /dev/null
+++ b/test/CodeGen/Hexagon/always-ext.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that we don't generate an invalid packet with too many instructions
+; due to a store that has a must-extend operand.
+
+; CHECK: CuSuiteAdd.exit.us
+; CHECK: {
+; CHECK-NOT: call abort
+; CHECK: memw(##0)
+; CHECK: memw(r{{[0-9+]}}<<#2+##4)
+; CHECK: }
+
+%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111 = type { i8*, void (%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*)*, i32, i32, i8*, [23 x i32]* }
+%struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112 = type { i32, [1024 x %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*], i32 }
+
+@__func__.CuSuiteAdd = external unnamed_addr constant [11 x i8], align 8
+@.str24 = external unnamed_addr constant [140 x i8], align 8
+
+declare void @_Assert()
+
+define void @CuSuiteAddSuite() nounwind {
+entry:
+ br i1 undef, label %for.body.us, label %for.end
+
+for.body.us: ; preds = %entry
+ %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4
+ %1 = load i32* undef, align 4
+ %cmp.i.us = icmp slt i32 %1, 1024
+ br i1 %cmp.i.us, label %CuSuiteAdd.exit.us, label %cond.false6.i.us
+
+cond.false6.i.us: ; preds = %for.body.us
+ tail call void @_Assert() nounwind
+ unreachable
+
+CuSuiteAdd.exit.us: ; preds = %for.body.us
+ %arrayidx.i.us = getelementptr inbounds %struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112* null, i32 0, i32 1, i32 %1
+ store %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111* %0, %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** %arrayidx.i.us, align 4
+ call void @llvm.trap()
+ unreachable
+
+for.end: ; preds = %entry
+ ret void
+}
+
+declare void @llvm.trap() noreturn nounwind