summaryrefslogtreecommitdiff
path: root/test/Transforms/MemCpyOpt
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2011-06-18 06:05:24 +0000
committerChris Lattner <sabre@nondot.org>2011-06-18 06:05:24 +0000
commitb85e4eba85a38698f3b3332f82554bf8442547e2 (patch)
treeae680321c7e03ee37d612c42282038950d37ea13 /test/Transforms/MemCpyOpt
parent6be41eb7f00319f5ffa1a5435dcd1e81b3ce932d (diff)
downloadllvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.gz
llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.bz2
llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.xz
rip out a ton of intrinsic modernization logic from AutoUpgrade.cpp, which is
for pre-2.9 bitcode files. We keep x86 unaligned loads, movnt, crc32, and the target indep prefetch change. As usual, updating the testsuite is a PITA. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133337 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/MemCpyOpt')
-rw-r--r--test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll44
-rw-r--r--test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll26
-rw-r--r--test/Transforms/MemCpyOpt/memcpy.ll54
-rw-r--r--test/Transforms/MemCpyOpt/memmove.ll14
-rw-r--r--test/Transforms/MemCpyOpt/sret.ll40
5 files changed, 93 insertions, 85 deletions
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 9f1e280467..b95ad91a36 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -4,31 +4,33 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-pc-linux-gnu"
-define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
+%0 = type { x86_fp80, x86_fp80 }
+
+define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind {
entry:
- %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; <x86_fp80*> [#uses=1]
- store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
- %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; <x86_fp80*> [#uses=1]
- store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
- ret void
+ %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0
+ store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+ %agg.result.15 = getelementptr %0* %agg.result, i32 0, i32 1
+ store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+ ret void
}
-declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind
-define fastcc void @badly_optimized() nounwind {
+define fastcc void @badly_optimized() nounwind {
entry:
- %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
- %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
- %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
- call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp )
- %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; <i8*> [#uses=1]
- %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
- %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; <i8*> [#uses=1]
- %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
- %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; <x86_fp80> [#uses=0]
- ret void
+ %z = alloca %0
+ %tmp = alloca %0
+ %memtmp = alloca %0, align 8
+ call fastcc void @initialize(%0* noalias sret %memtmp)
+ %tmp1 = bitcast %0* %tmp to i8*
+ %memtmp2 = bitcast %0* %memtmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
+ %z3 = bitcast %0* %z to i8*
+ %tmp4 = bitcast %0* %tmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
+ %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
+ ret void
}
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 418761e936..24cf576a08 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,20 +1,22 @@
; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
- %a = type { i32 }
- %b = type { float }
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+%a = type { i32 }
+%b = type { float }
+
declare void @g(%a*)
define float @f() {
entry:
- %a_var = alloca %a
- %b_var = alloca %b
- call void @g(%a *%a_var)
- %a_i8 = bitcast %a* %a_var to i8*
- %b_i8 = bitcast %b* %b_var to i8*
- call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4)
- %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
- %tmp2 = load float* %tmp1
- ret float %tmp2
+ %a_var = alloca %a
+ %b_var = alloca %b
+ call void @g(%a* %a_var)
+ %a_i8 = bitcast %a* %a_var to i8*
+ %b_i8 = bitcast %b* %b_var to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+ %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
+ %tmp2 = load float* %tmp1
+ ret float %tmp2
}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index fb979135e0..12519ef76c 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -3,17 +3,21 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9"
+%0 = type { x86_fp80, x86_fp80 }
+%1 = type { i32, i32 }
+
define void @test1({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind {
entry:
- %tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1]
- %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
- %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1 ; <x86_fp80> [#uses=1]
- call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind
- %tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; <i8*> [#uses=2]
- %memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
- %agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+ %tmp2 = alloca %0
+ %memtmp = alloca %0, align 16
+ %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1
+ call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
+ %tmp219 = bitcast %0* %tmp2 to i8*
+ %memtmp20 = bitcast %0* %memtmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
+ %agg.result21 = bitcast %0* %agg.result to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
+ ret void
; Check that one of the memcpy's are removed.
;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
@@ -23,22 +27,19 @@ entry:
; CHECK: call void @llvm.memcpy
; CHECK-NOT: llvm.memcpy
; CHECK: ret void
- ret void
}
declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
-
; The intermediate alloca and one of the memcpy's should be eliminated, the
; other should be related with a memmove.
define void @test2(i8* %P, i8* %Q) nounwind {
- %memtmp = alloca { x86_fp80, x86_fp80 }, align 16
- %R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
- call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
- call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
- ret void
+ %memtmp = alloca %0, align 16
+ %R = bitcast %0* %memtmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
+ ret void
; CHECK: @test2
; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
@@ -51,12 +52,12 @@ define void @test2(i8* %P, i8* %Q) nounwind {
@x = external global { x86_fp80, x86_fp80 }
define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
- %x.0 = alloca { x86_fp80, x86_fp80 }
- %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
- call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
- %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
- call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
- ret void
+ %x.0 = alloca %0
+ %x.01 = bitcast %0* %x.0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
+ %agg.result2 = bitcast %0* %agg.result to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
+ ret void
; CHECK: @test3
; CHECK-NEXT: %agg.result2 = bitcast
; CHECK-NEXT: call void @llvm.memcpy
@@ -66,10 +67,10 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind {
; PR8644
define void @test4(i8 *%P) {
- %A = alloca {i32, i32}
- %a = bitcast {i32, i32}* %A to i8*
+ %A = alloca %1
+ %a = bitcast %1* %A to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
- call void @test4a(i8* byval align 1 %a)
+ call void @test4a(i8* byval align 1 %a)
ret void
; CHECK: @test4
; CHECK-NEXT: call void @test4a(
@@ -127,4 +128,5 @@ entry:
declare i32 @g(%struct.p* byval align 8)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 8d3fbd2b6d..7f1667a455 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,7 +4,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9.0"
-declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
define i8* @test1(i8* nocapture %src) nounwind {
entry:
@@ -13,8 +13,8 @@ entry:
%malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32))
%call3 = bitcast i8* %malloccall to [13 x i8]*
- %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
- tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+ %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
ret i8* %call3.sub
}
declare noalias i8* @malloc(i32)
@@ -24,8 +24,8 @@ define void @test2(i8* %P) nounwind {
entry:
; CHECK: @test2
; CHECK: call void @llvm.memcpy
- %add.ptr = getelementptr i8* %P, i64 16 ; <i8*> [#uses=1]
- tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+ %add.ptr = getelementptr i8* %P, i64 16
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
ret void
}
@@ -34,7 +34,7 @@ define void @test3(i8* %P) nounwind {
entry:
; CHECK: @test3
; CHECK: call void @llvm.memmove
- %add.ptr = getelementptr i8* %P, i64 16 ; <i8*> [#uses=1]
- tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+ %add.ptr = getelementptr i8* %P, i64 16
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
ret void
}
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index ddfd0fd1fc..8eac7da798 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -3,26 +3,28 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin9"
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 8 %z) nounwind {
+%0 = type { x86_fp80, x86_fp80 }
+
+define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind {
entry:
- %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3]
- %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2]
- %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; <x86_fp80*> [#uses=1]
- %tmp2 = load x86_fp80* %tmp1, align 16 ; <x86_fp80> [#uses=1]
- %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2 ; <x86_fp80> [#uses=1]
- %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; <x86_fp80*> [#uses=1]
- %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; <x86_fp80*> [#uses=1]
- %tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; <x86_fp80*> [#uses=1]
- %tmp8 = load x86_fp80* %tmp7, align 16 ; <x86_fp80> [#uses=1]
- store x86_fp80 %tmp3, x86_fp80* %real, align 16
- store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
- call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind
- %memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1]
- %agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
- ret void
+ %iz = alloca %0
+ %memtmp = alloca %0, align 16
+ %tmp1 = getelementptr %0* %z, i32 0, i32 1
+ %tmp2 = load x86_fp80* %tmp1, align 16
+ %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
+ %tmp4 = getelementptr %0* %iz, i32 0, i32 1
+ %real = getelementptr %0* %iz, i32 0, i32 0
+ %tmp7 = getelementptr %0* %z, i32 0, i32 0
+ %tmp8 = load x86_fp80* %tmp7, align 16
+ store x86_fp80 %tmp3, x86_fp80* %real, align 16
+ store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+ call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
+ %memtmp14 = bitcast %0* %memtmp to i8*
+ %agg.result15 = bitcast %0* %agg.result to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
+ ret void
}
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind
+declare void @ccoshl(%0* noalias sret, %0* byval) nounwind
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind