summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-04-08 21:18:17 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-04-08 21:18:17 +0000
commitdfe482bddddf3a03cdb1f5130e0af9e502b19d85 (patch)
tree5f1c441d3f2ce16866d4f485bcbb182fde6cd442
parent76bf269759e5e18566e2cb3af119c1f509eba110 (diff)
downloadllvm-dfe482bddddf3a03cdb1f5130e0af9e502b19d85.tar.gz
llvm-dfe482bddddf3a03cdb1f5130e0af9e502b19d85.tar.bz2
llvm-dfe482bddddf3a03cdb1f5130e0af9e502b19d85.tar.xz
Merging r203719:
------------------------------------------------------------------------ r203719 | mzolotukhin | 2014-03-12 17:31:05 -0400 (Wed, 12 Mar 2014) | 4 lines PR17473: Don't normalize an expression during postinc transformation unless it's invertible. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@205797 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Analysis/IVUsers.cpp23
-rw-r--r--test/Transforms/LoopStrengthReduce/pr17473.ll67
2 files changed, 88 insertions, 2 deletions
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index b33e2cb999..5a06cdce30 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -187,15 +187,34 @@ bool IVUsers::AddUsersImpl(Instruction *I,
if (AddUserToIVUsers) {
// Okay, we found a user that we cannot reduce.
- IVUses.push_back(new IVStrideUse(this, User, I));
- IVStrideUse &NewUse = IVUses.back();
+ IVStrideUse &NewUse = AddUser(User, I);
// Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
// The regular return value here is discarded; instead of recording
// it, we just recompute it when we need it.
+ const SCEV *OriginalISE = ISE;
ISE = TransformForPostIncUse(NormalizeAutodetect,
ISE, User, I,
NewUse.PostIncLoops,
*SE, *DT);
+
+ // PostIncNormalization effectively simplifies the expression under
+ // pre-increment assumptions. Those assumptions (no wrapping) might not
+ // hold for the post-inc value. Catch such cases by making sure the
+ // transformation is invertible.
+ if (OriginalISE != ISE) {
+ const SCEV *DenormalizedISE =
+ TransformForPostIncUse(Denormalize, ISE, User, I,
+ NewUse.PostIncLoops, *SE, *DT);
+
+ // If we normalized the expression, but denormalization doesn't give the
+ // original one, discard this user.
+ if (OriginalISE != DenormalizedISE) {
+ DEBUG(dbgs() << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+ << *ISE << '\n');
+ IVUses.pop_back();
+ return false;
+ }
+ }
DEBUG(if (SE->getSCEV(I) != ISE)
dbgs() << " NORMALIZED TO: " << *ISE << '\n');
}
diff --git a/test/Transforms/LoopStrengthReduce/pr17473.ll b/test/Transforms/LoopStrengthReduce/pr17473.ll
new file mode 100644
index 0000000000..4204abc7ca
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr17473.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; LSR shouldn't normalize IV if it can't be denormalized to the original
+; expression. In this testcase, the normalized expression was denormalized to
+; an expression different from the original, and we were losing sign extension.
+
+; CHECK: [[TMP:%[a-z]+]] = trunc i32 {{.*}} to i8
+; CHECK: {{%[a-z0-9]+}} = sext i8 [[TMP]] to i32
+
+@j = common global i32 0, align 4
+@c = common global i32 0, align 4
+@g = common global i32 0, align 4
+@h = common global i8 0, align 1
+@d = common global i32 0, align 4
+@i = common global i32 0, align 4
+@e = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%x\0A\00", align 1
+@a = common global i32 0, align 4
+@b = common global i16 0, align 2
+
+; Function Attrs: nounwind optsize ssp uwtable
+define i32 @main() #0 {
+entry:
+ store i8 0, i8* @h, align 1
+ %0 = load i32* @j, align 4
+ %tobool.i = icmp eq i32 %0, 0
+ %1 = load i32* @d, align 4
+ %cmp3 = icmp sgt i32 %1, -1
+ %.lobit = lshr i32 %1, 31
+ %.lobit.not = xor i32 %.lobit, 1
+ br label %for.body
+
+for.body: ; preds = %entry, %fn3.exit
+ %inc9 = phi i8 [ 0, %entry ], [ %inc, %fn3.exit ]
+ %conv = sext i8 %inc9 to i32
+ br i1 %tobool.i, label %fn3.exit, label %land.rhs.i
+
+land.rhs.i: ; preds = %for.body
+ store i32 0, i32* @c, align 4
+ br label %fn3.exit
+
+fn3.exit: ; preds = %for.body, %land.rhs.i
+ %inc = add i8 %inc9, 1
+ %cmp = icmp sgt i8 %inc, -1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %fn3.exit
+ %.lobit.not. = select i1 %cmp3, i32 %.lobit.not, i32 0
+ store i32 %conv, i32* @g, align 4
+ store i32 %.lobit.not., i32* @i, align 4
+ store i8 %inc, i8* @h, align 1
+ %conv7 = sext i8 %inc to i32
+ %add = add nsw i32 %conv7, %conv
+ store i32 %add, i32* @e, align 4
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %add) #2
+ ret i32 0
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+attributes #0 = { nounwind optsize ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }