From c4a246996dc37c02958fdbe968c25a5d03a51908 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Mon, 8 Jul 2013 21:20:22 +0000
Subject: R600: Fix R600ControlFlowFinalizer not considering VTX_READ 128 bit
 dst reg

Patch by: Vincent Lejeune

https://bugs.freedesktop.org/show_bug.cgi?id=64877

NOTE: This is a candidate for the 3.3 branch.


Merged from r182600
Author: Tom Stellard <thomas.stellard@amd.com>
Date:   Thu May 23 18:26:42 2013 +0000

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@185868 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/R600/R600ControlFlowFinalizer.cpp | 11 +++++++++--
 test/CodeGen/R600/vtx-schedule.ll            | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 test/CodeGen/R600/vtx-schedule.ll

diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index ffe3414413..7d3a0f52de 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -116,8 +116,15 @@ private:
       const MachineOperand &MO = *I;
       if (!MO.isReg())
         continue;
-      if (MO.isDef())
-        DstMI = MO.getReg();
+      if (MO.isDef()) {
+        unsigned Reg = MO.getReg();
+        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+          DstMI = Reg;
+        else
+          DstMI = TRI.getMatchingSuperReg(Reg,
+              TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)),
+              &AMDGPU::R600_Reg128RegClass);
+      }
       if (MO.isUse()) {
         unsigned Reg = MO.getReg();
         if (AMDGPU::R600_Reg128RegClass.contains(Reg))
diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/R600/vtx-schedule.ll
new file mode 100644
index 0000000000..a0c79e36d3
--- /dev/null
+++ b/test/CodeGen/R600/vtx-schedule.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test is for a scheduler bug where VTX_READ instructions that used
+; the result of another VTX_READ instruction were being grouped in the
+; same fetch clasue.
+
+; CHECK: @test
+; CHECK: Fetch clause
+; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 40
+; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 44
+; CHECK: Fetch clause
+; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
+; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
+define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in0, i32 addrspace(1)* nocapture %in1) {
+entry:
+  %0 = load i32 addrspace(1)* %in0, align 4
+  %1 = load i32 addrspace(1)* %in1, align 4
+  %cmp.i = icmp slt i32 %0, %1
+  %cond.i = select i1 %cmp.i, i32 %0, i32 %1
+  store i32 %cond.i, i32 addrspace(1)* %out, align 4
+  ret void
+}
-- 
cgit v1.2.3


From 7dfcb84fc16b3bf6b2379713b53090757f0a45f9 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Mon, 8 Jul 2013 21:20:34 +0000
Subject: Fix for a regression caused by the LoopVectorizer when vectorizing
 loops with memory accesses to non-zero address spaces. It simply dropped the
 AS info. Fixes PR16306.

Merged from r184103
Author: Pekka Jaaskelainen <pekka.jaaskelainen@tut.fi>
Date:   Mon Jun 17 18:49:06 2013 +0000

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@185869 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp         |  6 +--
 .../LoopVectorize/multiple-address-spaces.ll       | 47 ++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 test/Transforms/LoopVectorize/multiple-address-spaces.ll

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 08d372512d..11ee99ddf1 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -964,7 +964,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
   Type *DataTy = VectorType::get(ScalarDataTy, VF);
   Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
   unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
-
+  unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
   unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
   unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
 
@@ -1039,7 +1039,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
         PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
       }
 
-      Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+      Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
       Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
     }
   }
@@ -1055,7 +1055,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
       PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
     }
 
-    Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+    Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
     Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
     cast<LoadInst>(LI)->setAlignment(Alignment);
     Entry[Part] = Reverse ? reverseVector(LI) :  LI;
diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
new file mode 100644
index 0000000000..70ced7c406
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+; From a simple program with two address spaces:
+; char Y[4*10000] __attribute__((address_space(1)));
+; char X[4*10000];
+; int main() {
+;    for (int i = 0; i < 4*10000; ++i)
+;        X[i] = Y[i] + 1;
+;    return 0;
+;}
+
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@Y = common addrspace(1) global [40000 x i8] zeroinitializer, align 16
+@X = common global [40000 x i8] zeroinitializer, align 16
+
+;CHECK: @main
+;CHECK: bitcast i8 addrspace(1)* %{{.*}} to <4 x i8> addrspace(1)*
+;CHECK: bitcast i8* %{{.*}} to <4 x i8>*
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [40000 x i8] addrspace(1)* @Y, i64 0, i64 %indvars.iv
+  %0 = load i8 addrspace(1)* %arrayidx, align 1, !tbaa !0
+  %add = add i8 %0, 1
+  %arrayidx3 = getelementptr inbounds [40000 x i8]* @X, i64 0, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx3, align 1, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 40000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
-- 
cgit v1.2.3


From 08745de15d80abdb08b1bdd33516974d4205cb80 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Fri, 19 Jul 2013 14:51:22 +0000
Subject: AArch64: add CMake dependency to fix very parallel builds

Merged from r182190
Author: Tim Northover <t.p.northover@gmail.com>
Date:   Sat May 18 08:17:47 2013 +0000

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@186677 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/Utils/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/Target/AArch64/Utils/CMakeLists.txt b/lib/Target/AArch64/Utils/CMakeLists.txt
index 2c28348d7d..2348e44f85 100644
--- a/lib/Target/AArch64/Utils/CMakeLists.txt
+++ b/lib/Target/AArch64/Utils/CMakeLists.txt
@@ -3,3 +3,5 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMAArch64Utils
   AArch64BaseInfo.cpp
   )
+
+add_dependencies(LLVMAArch64Utils AArch64CommonTableGen)
-- 
cgit v1.2.3


From f667db3652e1fd198ce4e3aec4cebf080a124552 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Fri, 19 Jul 2013 14:51:27 +0000
Subject: PR15662: Optimized debug info produces out of order function
 parameters

When a function is inlined we lazily construct the variables
representing the function's parameters. After that, we add any remaining
unused parameters.

If the function doesn't use all the parameters, or uses them out of
order, then the DWARF would produce them in that order, producing a
parameter order that doesn't match the source.

This fix causes us to always keep the arg variables at the start of the
variable list & in the original order from the source.

Merged from r183297
Author: David Blaikie <dblaikie@gmail.com>
Date:   Wed Jun 5 05:39:59 2013 +0000

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_33@186678 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 34 ++++++++++++++--
 test/DebugInfo/inlined-arguments.ll   | 73 +++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 3 deletions(-)
 create mode 100644 test/DebugInfo/inlined-arguments.ll

diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 73bba6989f..1e706ccf1e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1624,9 +1624,37 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 }
 
 void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
-//  SmallVector<DbgVariable *, 8> &Vars = ScopeVariables.lookup(LS);
-  ScopeVariables[LS].push_back(Var);
-//  Vars.push_back(Var);
+  SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
+  DIVariable DV = Var->getVariable();
+  if (DV.getTag() == dwarf::DW_TAG_arg_variable) {
+    DISubprogram Ctxt(DV.getContext());
+    DIArray Variables = Ctxt.getVariables();
+    // If the variable is a parameter (arg_variable) and this is an optimized
+    // build (the subprogram has a 'variables' list) make sure we keep the
+    // parameters in order. Otherwise we would produce an incorrect function
+    // type with parameters out of order if function parameters were used out of
+    // order or unused (see the call to addScopeVariable in endFunction where
+    // the remaining unused variables (including parameters) are added).
+    if (unsigned NumVariables = Variables.getNumElements()) {
+      // Keep the parameters at the start of the variables list. Search through
+      // current variable list (Vars) and the full function variable list in
+      // lock-step looking for this parameter in the full list to find the
+      // insertion point.
+      SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin();
+      unsigned j = 0;
+      while (I != Vars.end() && j != NumVariables &&
+             Variables.getElement(j) != DV &&
+             (*I)->getVariable().getTag() == dwarf::DW_TAG_arg_variable) {
+        if (Variables.getElement(j) == (*I)->getVariable())
+          ++I;
+        ++j;
+      }
+      Vars.insert(I, Var);
+      return;
+    }
+  }
+
+  Vars.push_back(Var);
 }
 
 // Gather and emit post-function debug information.
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
new file mode 100644
index 0000000000..d3ece10063
--- /dev/null
+++ b/test/DebugInfo/inlined-arguments.ll
@@ -0,0 +1,73 @@
+; RUN: llc -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated from clang -O -g with the following source
+;
+; void f1(int x, int y);
+; void f3(int line);
+; void f2() {
+;   f1(1, 2);
+; }
+; void f1(int x, int y) {
+;   f3(y);
+; }
+
+; CHECK: DW_AT_name{{.*}}"f1"
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name{{.*}}"x"
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_name{{.*}}"y"
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
+  tail call void @llvm.dbg.value(metadata !15, i64 0, metadata !16), !dbg !18
+  tail call void @llvm.dbg.value(metadata !19, i64 0, metadata !20), !dbg !18
+  tail call void @_Z2f3i(i32 2), !dbg !21
+  ret void, !dbg !22
+}
+
+; Function Attrs: uwtable
+define void @_Z2f1ii(i32 %x, i32 %y) #0 {
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !13), !dbg !23
+  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !14), !dbg !23
+  tail call void @_Z2f3i(i32 %y), !dbg !24
+  ret void, !dbg !25
+}
+
+declare void @_Z2f3i(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"exp.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !8}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f2", metadata !"f2", metadata !"_Z2f2v", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z2f2v, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f2]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp]
+!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{null}
+!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"_Z2f1ii", i32 6, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32)* @_Z2f1ii, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
+!9 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = metadata !{null, metadata !11, metadata !11}
+!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13, metadata !14}
+!13 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !5, i32 16777222, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 6]
+!14 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 33554438, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [y] [line 6]
+!15 = metadata !{i32 undef}
+!16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !5, i32 16777222, metadata !11, i32 0, metadata !17} ; [ DW_TAG_arg_variable ] [x] [line 6]
+!17 = metadata !{i32 4, i32 0, metadata !4, null}
+!18 = metadata !{i32 6, i32 0, metadata !8, metadata !17}
+!19 = metadata !{i32 2}
+!20 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 33554438, metadata !11, i32 0, metadata !17} ; [ DW_TAG_arg_variable ] [y] [line 6]
+!21 = metadata !{i32 7, i32 0, metadata !8, metadata !17}
+!22 = metadata !{i32 5, i32 0, metadata !4, null}
+!23 = metadata !{i32 6, i32 0, metadata !8, null}
+!24 = metadata !{i32 7, i32 0, metadata !8, null}
+!25 = metadata !{i32 8, i32 0, metadata !8, null} ; [ DW_TAG_imported_declaration ]
-- 
cgit v1.2.3