rip out a ton of intrinsic modernization logic from AutoUpgrade.cpp, which is

for pre-2.9 bitcode files. We keep x86 unaligned loads, movnt, crc32, and the target indep prefetch change. As usual, updating the testsuite is a PITA. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133337 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2011-06-18 06:05:24 +0000
committer: Chris Lattner <sabre@nondot.org> 2011-06-18 06:05:24 +0000
commit: b85e4eba85a38698f3b3332f82554bf8442547e2 (patch)
tree: ae680321c7e03ee37d612c42282038950d37ea13 /test/Transforms/MemCpyOpt
parent: 6be41eb7f00319f5ffa1a5435dcd1e81b3ce932d (diff)
download: llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.gz
llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.bz2
llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.xz
5 files changed, 93 insertions, 85 deletions
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 9f1e280467..b95ad91a36 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -4,31 +4,33 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
-define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind {
 entry:
-	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
-	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
-	ret void
+  %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+  %agg.result.15 = getelementptr %0* %agg.result, i32 0, i32 1
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+  ret void
 }
 
-declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind
 
-define fastcc void @badly_optimized() nounwind  {
+define fastcc void @badly_optimized() nounwind {
 entry:
-	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
-	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
-	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
-	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
-	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
-	ret void
+  %z = alloca %0
+  %tmp = alloca %0
+  %memtmp = alloca %0, align 8
+  call fastcc void @initialize(%0* noalias sret %memtmp)
+  %tmp1 = bitcast %0* %tmp to i8*
+  %memtmp2 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
+  %z3 = bitcast %0* %z to i8*
+  %tmp4 = bitcast %0* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
+  %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 418761e936..24cf576a08 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,20 +1,22 @@
 ; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%a = type { i32 }
-	%b = type { float }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+%a = type { i32 }
+%b = type { float }
+
 declare void @g(%a*)
 
 define float @f() {
 entry:
-	%a_var = alloca %a
-	%b_var = alloca %b
-	call void @g(%a *%a_var)
-	%a_i8 = bitcast %a* %a_var to i8*
-	%b_i8 = bitcast %b* %b_var to i8*
-	call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4)
-	%tmp1 = getelementptr %b* %b_var, i32 0, i32 0
-	%tmp2 = load float* %tmp1
-	ret float %tmp2
+  %a_var = alloca %a
+  %b_var = alloca %b
+  call void @g(%a* %a_var)
+  %a_i8 = bitcast %a* %a_var to i8*
+  %b_i8 = bitcast %b* %b_var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+  %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
+  %tmp2 = load float* %tmp1
+  ret float %tmp2
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index fb979135e0..12519ef76c 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -3,17 +3,21 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
+%0 = type { x86_fp80, x86_fp80 }
+%1 = type { i32, i32 }
+
 define void @test1({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
 entry:
-	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
-	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
-	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
-	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
-	%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+  %tmp2 = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1
+  call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
+  %tmp219 = bitcast %0* %tmp2 to i8*
+  %memtmp20 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
+  %agg.result21 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
+  ret void
 
 ; Check that one of the memcpy's are removed.
 ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
@@ -23,22 +27,19 @@ entry:
 ; CHECK: call void @llvm.memcpy
 ; CHECK-NOT: llvm.memcpy
 ; CHECK: ret void
-	ret void
 }
 
 declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
 
 ; The intermediate alloca and one of the memcpy's should be eliminated, the
 ; other should be related with a memmove.
 define void @test2(i8* %P, i8* %Q) nounwind  {
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
-	%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
-	call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
-	call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
-        ret void
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
+  ret void
         
 ; CHECK: @test2
 ; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
@@ -51,12 +52,12 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
 @x = external global { x86_fp80, x86_fp80 }
 
 define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
-	%x.0 = alloca { x86_fp80, x86_fp80 }
-	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
-	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
-	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
-	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
-	ret void
+  %x.0 = alloca %0
+  %x.01 = bitcast %0* %x.0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
+  %agg.result2 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
+  ret void
 ; CHECK: @test3
 ; CHECK-NEXT: %agg.result2 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -66,10 +67,10 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
 
 ; PR8644
 define void @test4(i8 *%P) {
-  %A = alloca {i32, i32}
-  %a = bitcast {i32, i32}* %A to i8*
+  %A = alloca %1
+  %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a) 
+  call void @test4a(i8* byval align 1 %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
@@ -127,4 +128,5 @@ entry:
 
 declare i32 @g(%struct.p* byval align 8)
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 8d3fbd2b6d..7f1667a455 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
 
-declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
@@ -13,8 +13,8 @@ entry:
 
   %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32))
   %call3 = bitcast i8* %malloccall to [13 x i8]*
-  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
-  tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
   ret i8* %call3.sub
 }
 declare noalias i8* @malloc(i32)
@@ -24,8 +24,8 @@ define void @test2(i8* %P) nounwind {
 entry:
 ; CHECK: @test2
 ; CHECK: call void @llvm.memcpy
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
   ret void
 }
 
@@ -34,7 +34,7 @@ define void @test3(i8* %P) nounwind {
 entry:
 ; CHECK: @test3
 ; CHECK: call void @llvm.memmove
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
   ret void
 }
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index ddfd0fd1fc..8eac7da798 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -3,26 +3,28 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind {
 entry:
-	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
-	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
-	store x86_fp80 %tmp3, x86_fp80* %real, align 16
-	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
-	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
-	ret void
+  %iz = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp1 = getelementptr %0* %z, i32 0, i32 1
+  %tmp2 = load x86_fp80* %tmp1, align 16
+  %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
+  %tmp4 = getelementptr %0* %iz, i32 0, i32 1
+  %real = getelementptr %0* %iz, i32 0, i32 0
+  %tmp7 = getelementptr %0* %z, i32 0, i32 0
+  %tmp8 = load x86_fp80* %tmp7, align 16
+  store x86_fp80 %tmp3, x86_fp80* %real, align 16
+  store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+  call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
+  %memtmp14 = bitcast %0* %memtmp to i8*
+  %agg.result15 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
+  ret void
 }
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+declare void @ccoshl(%0* noalias sret, %0* byval) nounwind
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
author	Chris Lattner <sabre@nondot.org>	2011-06-18 06:05:24 +0000
committer	Chris Lattner <sabre@nondot.org>	2011-06-18 06:05:24 +0000
commit	b85e4eba85a38698f3b3332f82554bf8442547e2 (patch)
tree	ae680321c7e03ee37d612c42282038950d37ea13 /test/Transforms/MemCpyOpt
parent	6be41eb7f00319f5ffa1a5435dcd1e81b3ce932d (diff)
download	llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.gz llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.bz2 llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.xz