diff options
author | Chris Lattner <sabre@nondot.org> | 2011-06-18 06:05:24 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-06-18 06:05:24 +0000 |
commit | b85e4eba85a38698f3b3332f82554bf8442547e2 (patch) | |
tree | ae680321c7e03ee37d612c42282038950d37ea13 /test/Transforms/MemCpyOpt | |
parent | 6be41eb7f00319f5ffa1a5435dcd1e81b3ce932d (diff) | |
download | llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.gz llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.bz2 llvm-b85e4eba85a38698f3b3332f82554bf8442547e2.tar.xz |
rip out a ton of intrinsic modernization logic from AutoUpgrade.cpp, which is
for pre-2.9 bitcode files. We keep x86 unaligned loads, movnt, crc32, and the
target indep prefetch change.
As usual, updating the testsuite is a PITA.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133337 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/MemCpyOpt')
-rw-r--r-- | test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 44 | ||||
-rw-r--r-- | test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 26 | ||||
-rw-r--r-- | test/Transforms/MemCpyOpt/memcpy.ll | 54 | ||||
-rw-r--r-- | test/Transforms/MemCpyOpt/memmove.ll | 14 | ||||
-rw-r--r-- | test/Transforms/MemCpyOpt/sret.ll | 40 |
5 files changed, 93 insertions, 85 deletions
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll index 9f1e280467..b95ad91a36 100644 --- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -4,31 +4,33 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i386-pc-linux-gnu" -define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { +%0 = type { x86_fp80, x86_fp80 } + +define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind { entry: - %agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0 ; <x86_fp80*> [#uses=1] - store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 - %agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1 ; <x86_fp80*> [#uses=1] - store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15 - ret void + %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0 + store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03 + %agg.result.15 = getelementptr %0* %agg.result, i32 0, i32 1 + store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15 + ret void } -declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind +declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind -define fastcc void @badly_optimized() nounwind { +define fastcc void @badly_optimized() nounwind { entry: - %z = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %tmp = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %memtmp = alloca { x86_fp80, x86_fp80 }, align 8 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret %memtmp ) - %tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; <i8*> [#uses=1] - %memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1] - call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 ) - %z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8* ; <i8*> [#uses=1] - %tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8* ; <i8*> [#uses=1] - call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 ) - %tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z ) ; <x86_fp80> [#uses=0] - ret void + %z = alloca %0 + %tmp = alloca %0 + %memtmp = alloca %0, align 8 + call fastcc void @initialize(%0* noalias sret %memtmp) + %tmp1 = bitcast %0* %tmp to i8* + %memtmp2 = bitcast %0* %memtmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false) + %z3 = bitcast %0* %z to i8* + %tmp4 = bitcast %0* %tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false) + %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z) + ret void } -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll index 418761e936..24cf576a08 100644 --- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,20 +1,22 @@ ; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.} target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" - %a = type { i32 } - %b = type { float } -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind +%a = type { i32 } +%b = type { float } + declare void @g(%a*) define float @f() { entry: - %a_var = alloca %a - %b_var = alloca %b - call void @g(%a *%a_var) - %a_i8 = bitcast %a* %a_var to i8* - %b_i8 = bitcast %b* %b_var to i8* - call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4) - %tmp1 = getelementptr %b* %b_var, i32 0, i32 0 - %tmp2 = load float* %tmp1 - ret float %tmp2 + %a_var = alloca %a + %b_var = alloca %b + call void @g(%a* %a_var) + %a_i8 = bitcast %a* %a_var to i8* + %b_i8 = bitcast %b* %b_var to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false) + %tmp1 = getelementptr %b* %b_var, i32 0, i32 0 + %tmp2 = load float* %tmp1 + ret float %tmp2 } + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll index fb979135e0..12519ef76c 100644 --- a/test/Transforms/MemCpyOpt/memcpy.ll +++ b/test/Transforms/MemCpyOpt/memcpy.ll @@ -3,17 +3,21 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" +%0 = type { x86_fp80, x86_fp80 } +%1 = type { i32, i32 } + define void @test1({ x86_fp80, x86_fp80 }* sret %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { entry: - %tmp2 = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=1] - %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1 ; <x86_fp80> [#uses=1] - call void @ccoshl( { x86_fp80, x86_fp80 }* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind - %tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8* ; <i8*> [#uses=2] - %memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1] - call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 ) - %agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1] - call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 ) + %tmp2 = alloca %0 + %memtmp = alloca %0, align 16 + %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1 + call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind + %tmp219 = bitcast %0* %tmp2 to i8* + %memtmp20 = bitcast %0* %memtmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false) + %agg.result21 = bitcast %0* %agg.result to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false) + ret void ; Check that one of the memcpy's are removed. ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here. @@ -23,22 +27,19 @@ entry: ; CHECK: call void @llvm.memcpy ; CHECK-NOT: llvm.memcpy ; CHECK: ret void - ret void } declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind - ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be related with a memmove. define void @test2(i8* %P, i8* %Q) nounwind { - %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 - %R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* - call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 ) - call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 ) - ret void + %memtmp = alloca %0, align 16 + %R = bitcast %0* %memtmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false) + ret void ; CHECK: @test2 ; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P @@ -51,12 +52,12 @@ define void @test2(i8* %P, i8* %Q) nounwind { @x = external global { x86_fp80, x86_fp80 } define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { - %x.0 = alloca { x86_fp80, x86_fp80 } - %x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8* - call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 ) - %agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* - call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 ) - ret void + %x.0 = alloca %0 + %x.01 = bitcast %0* %x.0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false) + %agg.result2 = bitcast %0* %agg.result to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false) + ret void ; CHECK: @test3 ; CHECK-NEXT: %agg.result2 = bitcast ; CHECK-NEXT: call void @llvm.memcpy @@ -66,10 +67,10 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind { ; PR8644 define void @test4(i8 *%P) { - %A = alloca {i32, i32} - %a = bitcast {i32, i32}* %A to i8* + %A = alloca %1 + %a = bitcast %1* %A to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false) - call void @test4a(i8* byval align 1 %a) + call void @test4a(i8* byval align 1 %a) ret void ; CHECK: @test4 ; CHECK-NEXT: call void @test4a( @@ -127,4 +128,5 @@ entry: declare i32 @g(%struct.p* byval align 8) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll index 8d3fbd2b6d..7f1667a455 100644 --- a/test/Transforms/MemCpyOpt/memmove.ll +++ b/test/Transforms/MemCpyOpt/memmove.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin9.0" -declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind define i8* @test1(i8* nocapture %src) nounwind { entry: @@ -13,8 +13,8 @@ entry: %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32)) %call3 = bitcast i8* %malloccall to [13 x i8]* - %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2] - tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1) + %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false) ret i8* %call3.sub } declare noalias i8* @malloc(i32) @@ -24,8 +24,8 @@ define void @test2(i8* %P) nounwind { entry: ; CHECK: @test2 ; CHECK: call void @llvm.memcpy - %add.ptr = getelementptr i8* %P, i64 16 ; <i8*> [#uses=1] - tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1) + %add.ptr = getelementptr i8* %P, i64 16 + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false) ret void } @@ -34,7 +34,7 @@ define void @test3(i8* %P) nounwind { entry: ; CHECK: @test3 ; CHECK: call void @llvm.memmove - %add.ptr = getelementptr i8* %P, i64 16 ; <i8*> [#uses=1] - tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1) + %add.ptr = getelementptr i8* %P, i64 16 + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false) ret void } diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll index ddfd0fd1fc..8eac7da798 100644 --- a/test/Transforms/MemCpyOpt/sret.ll +++ b/test/Transforms/MemCpyOpt/sret.ll @@ -3,26 +3,28 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" -define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 8 %z) nounwind { +%0 = type { x86_fp80, x86_fp80 } + +define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind { entry: - %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3] - %memtmp = alloca { x86_fp80, x86_fp80 }, align 16 ; <{ x86_fp80, x86_fp80 }*> [#uses=2] - %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; <x86_fp80*> [#uses=1] - %tmp2 = load x86_fp80* %tmp1, align 16 ; <x86_fp80> [#uses=1] - %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2 ; <x86_fp80> [#uses=1] - %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; <x86_fp80*> [#uses=1] - %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; <x86_fp80*> [#uses=1] - %tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; <x86_fp80*> [#uses=1] - %tmp8 = load x86_fp80* %tmp7, align 16 ; <x86_fp80> [#uses=1] - store x86_fp80 %tmp3, x86_fp80* %real, align 16 - store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16 - call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind - %memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8* ; <i8*> [#uses=1] - %agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8* ; <i8*> [#uses=1] - call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 ) - ret void + %iz = alloca %0 + %memtmp = alloca %0, align 16 + %tmp1 = getelementptr %0* %z, i32 0, i32 1 + %tmp2 = load x86_fp80* %tmp1, align 16 + %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2 + %tmp4 = getelementptr %0* %iz, i32 0, i32 1 + %real = getelementptr %0* %iz, i32 0, i32 0 + %tmp7 = getelementptr %0* %z, i32 0, i32 0 + %tmp8 = load x86_fp80* %tmp7, align 16 + store x86_fp80 %tmp3, x86_fp80* %real, align 16 + store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16 + call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind + %memtmp14 = bitcast %0* %memtmp to i8* + %agg.result15 = bitcast %0* %agg.result to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false) + ret void } -declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind +declare void @ccoshl(%0* noalias sret, %0* byval) nounwind -declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind |