summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-04-23 17:12:42 +0000
committerNadav Rotem <nrotem@apple.com>2013-04-23 17:12:42 +0000
commita7d9a6ee63bec70fecea79b85a30108ed3e8fabd (patch)
tree29d83f77b12af220401f880a583be82a0623fc84
parent3d7b39e7d4d3cef9f859f5965fbf959e251ee3ee (diff)
downloadllvm-a7d9a6ee63bec70fecea79b85a30108ed3e8fabd.tar.gz
llvm-a7d9a6ee63bec70fecea79b85a30108ed3e8fabd.tar.bz2
llvm-a7d9a6ee63bec70fecea79b85a30108ed3e8fabd.tar.xz
LoopVectorizer: Fix 15830. When scalarizing and unrolling stores make sure that the order in which the elements are scalarized is the same as the original order.
This fixes a miscompilation in FreeBSD's regex library. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180121 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp8
-rw-r--r--test/Transforms/LoopVectorize/bsd_regex.ll36
2 files changed, 40 insertions, 4 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 162587c565..0988a4032f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1085,10 +1085,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
- // For each scalar that we create:
- for (unsigned Width = 0; Width < VF; ++Width) {
- // For each vector unroll 'part':
- for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each vector unroll 'part':
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each scalar that we create:
+ for (unsigned Width = 0; Width < VF; ++Width) {
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
new file mode 100644
index 0000000000..a2aef1c368
--- /dev/null
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s
+
+;PR 15830.
+
+;CHECK: foo
+; When scalarizing stores we need to preserve the original order.
+; Make sure that we are extracting in the correct order (0101, and not 0011).
+;CHECK: extractelement <2 x i64> {{.*}}, i32 0
+;CHECK: extractelement <2 x i64> {{.*}}, i32 1
+;CHECK: extractelement <2 x i64> {{.*}}, i32 0
+;CHECK: extractelement <2 x i64> {{.*}}, i32 1
+;CHECK: store
+;CHECK: store
+;CHECK: store
+;CHECK: store
+;CHECK: ret
+
+define i32 @foo(i32* nocapture %A) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %arrayidx = getelementptr inbounds i32* %A, i64 %0
+ store i32 4, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret i32 undef
+}
+
+