summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorStepan Dyatkovskiy <stpworld@narod.ru>2013-04-05 05:52:14 +0000
committerStepan Dyatkovskiy <stpworld@narod.ru>2013-04-05 05:52:14 +0000
commit89becbb97423fb608a4dd85ec10c3fde4398d956 (patch)
treee9329b5ebf6b5871c8cc2aa2ccf6479e7c7a4b51 /test
parent1abaf907b6aff6e468cb838fa40e0ec6cc5ece24 (diff)
downloadllvm-89becbb97423fb608a4dd85ec10c3fde4398d956.tar.gz
llvm-89becbb97423fb608a4dd85ec10c3fde4398d956.tar.bz2
llvm-89becbb97423fb608a4dd85ec10c3fde4398d956.tar.xz
Fix for PR14824: "Optimization arm_ldst_opt inserts newly generated instruction vldmia at incorrect position".
Patch introduces memory operands tracking in ARMLoadStoreOpt::LoadStoreMultipleOpti. For each register it keeps the order of load operations as it was before optimization pass. It is kind of deep improvement of fix proposed by Hao: http://llvm.org/bugs/show_bug.cgi?id=14824#c4 But it also tracks conflicts between different register classes (e.g. D2 and S5). For more details see: Bug description: http://llvm.org/bugs/show_bug.cgi?id=14824 LLVM Commits discussion: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130311/167936.html http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130318/168688.html http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130325/169376.html http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130401/170238.html git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178851 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll110
1 files changed, 110 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
new file mode 100644
index 0000000000..2561686c1f
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s
+; The test is presented by Jiangning Liu.
+;CHECK-NOT: vldmia
+
+define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind {
+entry:
+ %s0 = load <8 x i64> * %source, align 64
+ %s1 = load <8 x i64> * %secondSource, align 64
+ %s2 = bitcast <8 x i64> %s0 to i512
+ %data.i.i.48.extract.shift = lshr i512 %s2, 384
+ %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64
+ %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
+ %s120 = load <8 x i64> * %arrayidx64, align 64
+ %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
+ %s121 = load <8 x i64> * %arrayidx67, align 64
+ %s122 = bitcast <8 x i64> %s120 to i512
+ %data.i.i677.48.extract.shift = lshr i512 %s122, 384
+ %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
+ %s123 = insertelement <8 x i64> undef, i64 %data.i.i677.48.extract.trunc, i32 0
+ %data.i.i677.32.extract.shift = lshr i512 %s122, 256
+ %data.i.i677.32.extract.trunc = trunc i512 %data.i.i677.32.extract.shift to i64
+ %s124 = insertelement <8 x i64> %s123, i64 %data.i.i677.32.extract.trunc, i32 1
+ %data.i.i677.16.extract.shift = lshr i512 %s122, 128
+ %data.i.i677.16.extract.trunc = trunc i512 %data.i.i677.16.extract.shift to i64
+ %s125 = insertelement <8 x i64> %s124, i64 %data.i.i677.16.extract.trunc, i32 2
+ %data.i.i677.56.extract.shift = lshr i512 %s122, 448
+ %data.i.i677.56.extract.trunc = trunc i512 %data.i.i677.56.extract.shift to i64
+ %s126 = insertelement <8 x i64> %s125, i64 %data.i.i677.56.extract.trunc, i32 3
+ %data.i.i677.24.extract.shift = lshr i512 %s122, 192
+ %data.i.i677.24.extract.trunc = trunc i512 %data.i.i677.24.extract.shift to i64
+ %s127 = insertelement <8 x i64> %s126, i64 %data.i.i677.24.extract.trunc, i32 4
+ %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5
+ %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6
+ %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
+ %s131 = bitcast <8 x i64> %s121 to i512
+ %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
+ %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
+ %s132 = insertelement <8 x i64> undef, i64 %data.i1.i676.48.extract.trunc, i32 0
+ %data.i1.i676.32.extract.shift = lshr i512 %s131, 256
+ %data.i1.i676.32.extract.trunc = trunc i512 %data.i1.i676.32.extract.shift to i64
+ %s133 = insertelement <8 x i64> %s132, i64 %data.i1.i676.32.extract.trunc, i32 1
+ %data.i1.i676.16.extract.shift = lshr i512 %s131, 128
+ %data.i1.i676.16.extract.trunc = trunc i512 %data.i1.i676.16.extract.shift to i64
+ %s134 = insertelement <8 x i64> %s133, i64 %data.i1.i676.16.extract.trunc, i32 2
+ %data.i1.i676.56.extract.shift = lshr i512 %s131, 448
+ %data.i1.i676.56.extract.trunc = trunc i512 %data.i1.i676.56.extract.shift to i64
+ %s135 = insertelement <8 x i64> %s134, i64 %data.i1.i676.56.extract.trunc, i32 3
+ %data.i1.i676.24.extract.shift = lshr i512 %s131, 192
+ %data.i1.i676.24.extract.trunc = trunc i512 %data.i1.i676.24.extract.shift to i64
+ %s136 = insertelement <8 x i64> %s135, i64 %data.i1.i676.24.extract.trunc, i32 4
+ %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5
+ %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6
+ %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7
+ %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+ %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+ %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+ %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+ %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
+ store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
+ %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7
+ %s140 = load <8 x i64> * %arrayidx75, align 64
+ %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
+ %s141 = load <8 x i64> * %arrayidx78, align 64
+ %s142 = bitcast <8 x i64> %s140 to i512
+ %data.i.i650.32.extract.shift = lshr i512 %s142, 256
+ %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64
+ %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0
+ %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1
+ %data.i.i650.16.extract.shift = lshr i512 %s142, 128
+ %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64
+ %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2
+ %data.i.i650.8.extract.shift = lshr i512 %s142, 64
+ %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64
+ %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3
+ %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4
+ %data.i.i650.48.extract.shift = lshr i512 %s142, 384
+ %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64
+ %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5
+ %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6
+ %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64
+ %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7
+ %s151 = bitcast <8 x i64> %s141 to i512
+ %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
+ %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
+ %s152 = insertelement <8 x i64> undef, i64 %data.i1.i649.32.extract.trunc, i32 0
+ %s153 = insertelement <8 x i64> %s152, i64 %data.i1.i649.32.extract.trunc, i32 1
+ %data.i1.i649.16.extract.shift = lshr i512 %s151, 128
+ %data.i1.i649.16.extract.trunc = trunc i512 %data.i1.i649.16.extract.shift to i64
+ %s154 = insertelement <8 x i64> %s153, i64 %data.i1.i649.16.extract.trunc, i32 2
+ %data.i1.i649.8.extract.shift = lshr i512 %s151, 64
+ %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
+ %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
+ %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4
+ %data.i1.i649.48.extract.shift = lshr i512 %s151, 384
+ %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64
+ %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5
+ %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6
+ %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64
+ %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7
+ %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> <i32 0, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 10, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+ %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
+ %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
+ %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
+ %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+ %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
+ store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64
+ ret void
+}