summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2007-10-26 01:56:11 +0000
committerEvan Cheng <evan.cheng@apple.com>2007-10-26 01:56:11 +0000
commit2bd122c4d934a70e031dc0ca5171719bac66c2c9 (patch)
tree2aa0e737c3f01cc1f447def3d95d9ec7833f99bc
parent081fdf238ba17747487d35da1283b8323482b4d2 (diff)
downloadllvm-2bd122c4d934a70e031dc0ca5171719bac66c2c9.tar.gz
llvm-2bd122c4d934a70e031dc0ca5171719bac66c2c9.tar.bz2
llvm-2bd122c4d934a70e031dc0ca5171719bac66c2c9.tar.xz
Loosen up iv reuse to allow reuse of the same stride but a larger type when truncating from the larger type to smaller type is free.
e.g. Turns this loop: LBB1_1: # entry.bb_crit_edge xorl %ecx, %ecx xorw %dx, %dx movw %dx, %si LBB1_2: # bb movl L_X$non_lazy_ptr, %edi movw %si, (%edi) movl L_Y$non_lazy_ptr, %edi movw %dx, (%edi) addw $4, %dx incw %si incl %ecx cmpl %eax, %ecx jne LBB1_2 # bb into LBB1_1: # entry.bb_crit_edge xorl %ecx, %ecx xorw %dx, %dx LBB1_2: # bb movl L_X$non_lazy_ptr, %esi movw %cx, (%esi) movl L_Y$non_lazy_ptr, %esi movw %dx, (%esi) addw $4, %dx incl %ecx cmpl %eax, %ecx jne LBB1_2 # bb git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43375 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetLowering.h9
-rw-r--r--lib/Target/X86/README.txt32
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.h5
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp38
-rw-r--r--test/CodeGen/X86/loop-strength-reduce5.ll23
6 files changed, 77 insertions, 37 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index d55a8af722..e092145ebe 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -104,7 +104,7 @@ public:
/// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
/// srl/add/sra.
bool isPow2DivCheap() const { return Pow2DivIsCheap; }
-
+
/// getSetCCResultTy - Return the ValueType of the result of setcc operations.
///
MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
@@ -994,6 +994,13 @@ public:
/// TODO: Handle pre/postinc as well.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const;
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
// Div utility functions
//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index ccd15be4e6..41b38d84c8 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -339,20 +339,18 @@ void foo(int N) {
for (i = 0; i < N; i++) { X = i; Y = i*4; }
}
-LBB1_1: #bb.preheader
- xorl %ecx, %ecx
- xorw %dx, %dx
-LBB1_2: #bb
- movl L_X$non_lazy_ptr, %esi
- movw %dx, (%esi)
- movw %dx, %si
- shlw $2, %si
- movl L_Y$non_lazy_ptr, %edi
- movw %si, (%edi)
- incl %ecx
- incw %dx
- cmpl %eax, %ecx
- jne LBB1_2 #bb
+LBB1_1: # entry.bb_crit_edge
+ xorl %ecx, %ecx
+ xorw %dx, %dx
+LBB1_2: # bb
+ movl L_X$non_lazy_ptr, %esi
+ movw %cx, (%esi)
+ movl L_Y$non_lazy_ptr, %esi
+ movw %dx, (%esi)
+ addw $4, %dx
+ incl %ecx
+ cmpl %eax, %ecx
+ jne LBB1_2 # bb
vs.
@@ -367,11 +365,7 @@ L4:
cmpl %edx, %edi
jne L4
-There are 3 issues:
-
-1. Lack of post regalloc LICM.
-2. LSR unable to reused IV for a different type (i16 vs. i32) even though
- the cast would be free.
+This is due to the lack of post regalloc LICM.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4d528abb6b..172aa5338b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5122,6 +5122,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
}
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ return Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits();
+}
+
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7123adaad2..b68de5a675 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -359,6 +359,11 @@ namespace llvm {
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index fbe3171f81..d81ea2b799 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -178,7 +178,7 @@ private:
bool FindIVForUser(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
- unsigned CheckForIVReuse(bool, const SCEVHandle&,
+ unsigned CheckForIVReuse(bool, bool, const SCEVHandle&,
IVExpr&, const Type*,
const std::vector<BasedUser>& UsersToProcess);
bool ValidStride(bool, int64_t,
@@ -980,15 +980,17 @@ bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
/// a nop.
-bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
- const Type *NewTy) {
- if (Ty == NewTy)
+bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
+ const Type *Ty2) {
+ if (Ty1 == Ty2)
return false;
- return (!Ty->canLosslesslyBitCastTo(NewTy) &&
- !(isa<PointerType>(NewTy) &&
- Ty->canLosslesslyBitCastTo(UIntPtrTy)) &&
- !(isa<PointerType>(Ty) &&
- NewTy->canLosslesslyBitCastTo(UIntPtrTy)));
+ if (TLI && TLI->isTruncateFree(Ty1, Ty2))
+ return false;
+ return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
+ !(isa<PointerType>(Ty2) &&
+ Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
+ !(isa<PointerType>(Ty1) &&
+ Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
}
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
@@ -997,20 +999,23 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty,
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
/// reuse is possible.
unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+ bool AllUsesAreAddresses,
const SCEVHandle &Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
- if (SInt == 1) return 0;
-
for (std::map<SCEVHandle, IVsOfOneStride>::iterator SI= IVsByStride.begin(),
SE = IVsByStride.end(); SI != SE; ++SI) {
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SInt != -SSInt &&
+ if (SI->first != Stride &&
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
int64_t Scale = SInt / SSInt;
+ // When scale is 1, we don't need to worry about whether the
+ // multiplication can be folded into the addressing mode.
+ if (!AllUsesAreAddresses && Scale != 1)
+ continue;
// Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it
@@ -1021,7 +1026,7 @@ unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion.
if (isZero(II->Base) &&
- !RequiresTypeConversion(II->Base->getType(),Ty)) {
+ !RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Scale;
}
@@ -1183,10 +1188,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
SE->getIntegerSCEV(0, Type::Int32Ty),
0, 0);
unsigned RewriteFactor = 0;
- if (AllUsesAreAddresses)
- RewriteFactor = CheckForIVReuse(HaveCommonExprs, Stride, ReuseIV,
- CommonExprs->getType(),
- UsersToProcess);
+ RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
+ Stride, ReuseIV, CommonExprs->getType(),
+ UsersToProcess);
if (RewriteFactor != 0) {
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << " :\n";
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
new file mode 100644
index 0000000000..6e037e2aca
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep inc | count 1
+
+@X = weak global i16 0 ; <i16*> [#uses=1]
+@Y = weak global i16 0 ; <i16*> [#uses=1]
+
+define void @foo(i32 %N) {
+entry:
+ %tmp1019 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
+ br i1 %tmp1019, label %bb, label %return
+
+bb: ; preds = %bb, %entry
+ %i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+ %tmp1 = trunc i32 %i.014.0 to i16 ; <i16> [#uses=2]
+ volatile store i16 %tmp1, i16* @X, align 2
+ %tmp34 = shl i16 %tmp1, 2 ; <i16> [#uses=1]
+ volatile store i16 %tmp34, i16* @Y, align 2
+ %indvar.next = add i32 %i.014.0, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
+ br i1 %exitcond, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}