diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-11-11 22:10:24 +0000 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-11-11 22:10:24 +0000 |
commit | 70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a (patch) | |
tree | 0529d234345ee92bad998240c04c98c877ef0bee /test | |
parent | 6c7a7c6474ea60c40e2dbb15f5b6cf0265098ace (diff) | |
download | llvm-70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a.tar.gz llvm-70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a.tar.bz2 llvm-70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a.tar.xz |
R600: Use function inputs to represent data stored in gpr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194425 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
25 files changed, 246 insertions, 280 deletions
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll index 75f24588c1..6b683769fe 100644 --- a/test/CodeGen/R600/big_alu.ll +++ b/test/CodeGen/R600/big_alu.ll @@ -4,54 +4,54 @@ ;This test ensures that R600 backend can handle ifcvt properly ;and do not generate ALU clauses with more than 128 instructions. -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 0) - %1 = call float @llvm.R600.load.input(i32 1) - %2 = call float @llvm.R600.load.input(i32 2) - %3 = call float @llvm.R600.load.input(i32 3) - %4 = call float @llvm.R600.load.input(i32 4) - %5 = call float @llvm.R600.load.input(i32 36) - %6 = call float @llvm.R600.load.input(i32 32) + %0 = extractelement <4 x float> %reg0, i32 0 + %1 = extractelement <4 x float> %reg0, i32 1 + %2 = extractelement <4 x float> %reg0, i32 2 + %3 = extractelement <4 x float> %reg0, i32 3 + %4 = extractelement <4 x float> %reg1, i32 0 + %5 = extractelement <4 x float> %reg9, i32 0 + %6 = extractelement <4 x float> %reg8, i32 0 %7 = fcmp ugt float %6, 0.000000e+00 %8 = select i1 %7, float %4, float %5 - %9 = call float @llvm.R600.load.input(i32 5) - %10 = call float @llvm.R600.load.input(i32 37) - %11 = call float @llvm.R600.load.input(i32 32) + %9 = extractelement <4 x float> %reg1, i32 1 + %10 = extractelement <4 x float> %reg9, i32 1 + %11 = extractelement <4 x float> %reg8, i32 0 %12 = fcmp ugt float %11, 0.000000e+00 %13 = select i1 %12, float %9, float %10 - %14 = call float @llvm.R600.load.input(i32 6) - %15 = call float @llvm.R600.load.input(i32 38) - %16 = call float @llvm.R600.load.input(i32 32) + %14 = extractelement <4 x float> %reg1, i32 2 + %15 = extractelement <4 x float> %reg9, i32 2 + %16 = extractelement <4 x float> %reg8, i32 0 %17 = fcmp ugt float %16, 0.000000e+00 %18 = select i1 %17, float %14, float %15 - %19 = call float @llvm.R600.load.input(i32 7) - %20 = call float @llvm.R600.load.input(i32 39) - %21 = call float @llvm.R600.load.input(i32 32) - %22 = call float @llvm.R600.load.input(i32 8) - %23 = call float @llvm.R600.load.input(i32 9) - %24 = call float @llvm.R600.load.input(i32 10) - %25 = call float @llvm.R600.load.input(i32 11) - %26 = call float @llvm.R600.load.input(i32 12) - %27 = call float @llvm.R600.load.input(i32 13) - %28 = call float @llvm.R600.load.input(i32 14) - %29 = call float @llvm.R600.load.input(i32 15) - %30 = call float @llvm.R600.load.input(i32 16) - %31 = call float @llvm.R600.load.input(i32 17) - %32 = call float @llvm.R600.load.input(i32 18) - %33 = call float @llvm.R600.load.input(i32 19) - %34 = call float @llvm.R600.load.input(i32 20) - %35 = call float @llvm.R600.load.input(i32 21) - %36 = call float @llvm.R600.load.input(i32 22) - %37 = call float @llvm.R600.load.input(i32 23) - %38 = call float @llvm.R600.load.input(i32 24) - %39 = call float @llvm.R600.load.input(i32 25) - %40 = call float @llvm.R600.load.input(i32 26) - %41 = call float @llvm.R600.load.input(i32 27) - %42 = call float @llvm.R600.load.input(i32 28) - %43 = call float @llvm.R600.load.input(i32 29) - %44 = call float @llvm.R600.load.input(i32 30) - %45 = call float @llvm.R600.load.input(i32 31) + %19 = extractelement <4 x float> %reg1, i32 3 + %20 = extractelement <4 x float> %reg9, i32 3 + %21 = extractelement <4 x float> %reg8, i32 0 + %22 = extractelement <4 x float> %reg2, i32 0 + %23 = extractelement <4 x float> %reg2, i32 1 + %24 = extractelement <4 x float> %reg2, i32 2 + %25 = extractelement <4 x float> %reg2, i32 3 + %26 = extractelement <4 x float> %reg3, i32 0 + %27 = extractelement <4 x float> %reg3, i32 1 + %28 = extractelement <4 x float> %reg3, i32 2 + %29 = extractelement <4 x float> %reg3, i32 3 + %30 = extractelement <4 x float> %reg4, i32 0 + %31 = extractelement <4 x float> %reg4, i32 1 + %32 = extractelement <4 x float> %reg4, i32 2 + %33 = extractelement <4 x float> %reg4, i32 3 + %34 = extractelement <4 x float> %reg5, i32 0 + %35 = extractelement <4 x float> %reg5, i32 1 + %36 = extractelement <4 x float> %reg5, i32 2 + %37 = extractelement <4 x float> %reg5, i32 3 + %38 = extractelement <4 x float> %reg6, i32 0 + %39 = extractelement <4 x float> %reg6, i32 1 + %40 = extractelement <4 x float> %reg6, i32 2 + %41 = extractelement <4 x float> %reg6, i32 3 + %42 = extractelement <4 x float> %reg7, i32 0 + %43 = extractelement <4 x float> %reg7, i32 1 + %44 = extractelement <4 x float> %reg7, i32 2 + %45 = extractelement <4 x float> %reg7, i32 3 %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %47 = extractelement <4 x float> %46, i32 0 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) @@ -1147,9 +1147,6 @@ ENDIF178: ; preds = %ENDIF175, %IF179 } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll index 8dcd450deb..99f0d99b35 100644 --- a/test/CodeGen/R600/complex-folding.ll +++ b/test/CodeGen/R600/complex-folding.ll @@ -2,9 +2,9 @@ ; CHECK: @main ; CHECK-NOT: MOV -define void @main() { +define void @main(<4 x float> inreg %reg0) #0 { entry: - %0 = call float @llvm.R600.load.input(i32 0) + %0 = extractelement <4 x float> %reg0, i32 0 %1 = call float @fabs(float %0) %2 = fptoui float %1 to i32 %3 = bitcast i32 %2 to float @@ -13,6 +13,7 @@ entry: ret void } -declare float @llvm.R600.load.input(i32) readnone declare float @fabs(float ) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
\ No newline at end of file +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" }
\ No newline at end of file diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll index 877d69a65b..67e86c41fd 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/R600/floor.ll @@ -2,15 +2,15 @@ ;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @floor(float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @floor(float) readonly +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" }
\ No newline at end of file diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll index 75e65d8133..935e35123f 100644 --- a/test/CodeGen/R600/fmad.ll +++ b/test/CodeGen/R600/fmad.ll @@ -2,18 +2,18 @@ ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) - %r2 = call float @llvm.R600.load.input(i32 2) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = extractelement <4 x float> %reg0, i32 2 %r3 = fmul float %r0, %r1 - %r4 = fadd float %r3, %r2 - call void @llvm.AMDGPU.store.output(float %r4, i32 0) + %r4 = fadd float %r3, %r2 + %vec = insertelement <4 x float> undef, float %r4, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @fabs(float ) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" }
\ No newline at end of file diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll index be25c9ce8d..d7127f485c 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/R600/fmax.ll @@ -2,15 +2,16 @@ ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = fcmp oge float %r0, %r1 %r3 = select i1 %r2, float %r0, float %r1 - call void @llvm.AMDGPU.store.output(float %r3, i32 0) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" }
\ No newline at end of file diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll index 5e34b7c890..defa8c0963 100644 --- a/test/CodeGen/R600/fmin.ll +++ b/test/CodeGen/R600/fmin.ll @@ -2,15 +2,16 @@ ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = fcmp uge float %r0, %r1 %r3 = select i1 %r2, float %r1, float %r0 - call void @llvm.AMDGPU.store.output(float %r3, i32 0) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" }
\ No newline at end of file diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll index cc0732b3ff..83b56a5029 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -2,16 +2,16 @@ ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) - call void @llvm.AMDGPU.store.output(float %r2, i32 0) + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.AMDGPU.mul(float ,float ) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" }
\ No newline at end of file diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index 8fb4559f89..aaf2305dd0 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -5,15 +5,15 @@ ;CHECK: ADD * ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @llvm.cos.f32(float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } declare float @llvm.cos.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index 0f51cf46f5..b587d2b2ae 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -4,16 +4,16 @@ ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = call float @llvm.pow.f32( float %r0, float %r1) - call void @llvm.AMDGPU.store.output(float %r2, i32 0) + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.pow.f32(float ,float ) readonly +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index e94c2ba56b..9eb998315f 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -5,15 +5,15 @@ ;CHECK: ADD * ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @llvm.sin.f32( float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } declare float @llvm.sin.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll index aff2a6e18f..ca86d0e369 100644 --- a/test/CodeGen/R600/load-input-fold.ll +++ b/test/CodeGen/R600/load-input-fold.ll @@ -1,20 +1,20 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman ;REQUIRES: asserts -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) - %5 = call float @llvm.R600.load.input(i32 9) - %6 = call float @llvm.R600.load.input(i32 10) - %7 = call float @llvm.R600.load.input(i32 11) - %8 = call float @llvm.R600.load.input(i32 12) - %9 = call float @llvm.R600.load.input(i32 13) - %10 = call float @llvm.R600.load.input(i32 14) - %11 = call float @llvm.R600.load.input(i32 15) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 + %5 = extractelement <4 x float> %reg2, i32 1 + %6 = extractelement <4 x float> %reg2, i32 2 + %7 = extractelement <4 x float> %reg2, i32 3 + %8 = extractelement <4 x float> %reg3, i32 0 + %9 = extractelement <4 x float> %reg3, i32 1 + %10 = extractelement <4 x float> %reg3, i32 2 + %11 = extractelement <4 x float> %reg3, i32 3 %12 = load <4 x float> addrspace(8)* null %13 = extractelement <4 x float> %12, i32 0 %14 = fmul float %0, %13 @@ -96,9 +96,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll index c31b7c06bb..65a6d2b5fc 100644 --- a/test/CodeGen/R600/max-literals.ll +++ b/test/CodeGen/R600/max-literals.ll @@ -3,13 +3,13 @@ ; CHECK: @main ; CHECK: ADD * -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 %5 = fadd float %0, 2.0 %6 = fadd float %1, 3.0 %7 = fadd float %2, 4.0 @@ -32,13 +32,13 @@ main_body: ; CHECK: @main ; CHECK-NOT: ADD * -define void @main2() #0 { +define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 %5 = fadd float %0, 2.0 %6 = fadd float %1, 3.0 %7 = fadd float %2, 4.0 @@ -59,7 +59,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll index 03fc204559..e5615b9972 100644 --- a/test/CodeGen/R600/pv-packing.ll +++ b/test/CodeGen/R600/pv-packing.ll @@ -3,17 +3,17 @@ ;CHECK: DOT4 T{{[0-9]\.X}} ;CHECK: MULADD_IEEE * T{{[0-9]\.W}} -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 8) - %4 = call float @llvm.R600.load.input(i32 9) - %5 = call float @llvm.R600.load.input(i32 10) - %6 = call float @llvm.R600.load.input(i32 12) - %7 = call float @llvm.R600.load.input(i32 13) - %8 = call float @llvm.R600.load.input(i32 14) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg2, i32 0 + %4 = extractelement <4 x float> %reg2, i32 1 + %5 = extractelement <4 x float> %reg2, i32 2 + %6 = extractelement <4 x float> %reg3, i32 0 + %7 = extractelement <4 x float> %reg3, i32 1 + %8 = extractelement <4 x float> %reg3, i32 2 %9 = load <4 x float> addrspace(8)* null %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9) @@ -36,9 +36,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 @@ -46,5 +43,3 @@ declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } -attributes #2 = { readonly } -attributes #3 = { nounwind readonly } diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index 6d9396cb7d..5a930b2926 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -3,36 +3,36 @@ ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) ;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) - %5 = call float @llvm.R600.load.input(i32 9) - %6 = call float @llvm.R600.load.input(i32 10) - %7 = call float @llvm.R600.load.input(i32 11) - %8 = call float @llvm.R600.load.input(i32 12) - %9 = call float @llvm.R600.load.input(i32 13) - %10 = call float @llvm.R600.load.input(i32 14) - %11 = call float @llvm.R600.load.input(i32 15) - %12 = call float @llvm.R600.load.input(i32 16) - %13 = call float @llvm.R600.load.input(i32 17) - %14 = call float @llvm.R600.load.input(i32 18) - %15 = call float @llvm.R600.load.input(i32 19) - %16 = call float @llvm.R600.load.input(i32 20) - %17 = call float @llvm.R600.load.input(i32 21) - %18 = call float @llvm.R600.load.input(i32 22) - %19 = call float @llvm.R600.load.input(i32 23) - %20 = call float @llvm.R600.load.input(i32 24) - %21 = call float @llvm.R600.load.input(i32 25) - %22 = call float @llvm.R600.load.input(i32 26) - %23 = call float @llvm.R600.load.input(i32 27) - %24 = call float @llvm.R600.load.input(i32 28) - %25 = call float @llvm.R600.load.input(i32 29) - %26 = call float @llvm.R600.load.input(i32 30) - %27 = call float @llvm.R600.load.input(i32 31) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 + %5 = extractelement <4 x float> %reg2, i32 1 + %6 = extractelement <4 x float> %reg2, i32 2 + %7 = extractelement <4 x float> %reg2, i32 3 + %8 = extractelement <4 x float> %reg3, i32 0 + %9 = extractelement <4 x float> %reg3, i32 1 + %10 = extractelement <4 x float> %reg3, i32 2 + %11 = extractelement <4 x float> %reg3, i32 3 + %12 = extractelement <4 x float> %reg4, i32 0 + %13 = extractelement <4 x float> %reg4, i32 1 + %14 = extractelement <4 x float> %reg4, i32 2 + %15 = extractelement <4 x float> %reg4, i32 3 + %16 = extractelement <4 x float> %reg5, i32 0 + %17 = extractelement <4 x float> %reg5, i32 1 + %18 = extractelement <4 x float> %reg5, i32 2 + %19 = extractelement <4 x float> %reg5, i32 3 + %20 = extractelement <4 x float> %reg6, i32 0 + %21 = extractelement <4 x float> %reg6, i32 1 + %22 = extractelement <4 x float> %reg6, i32 2 + %23 = extractelement <4 x float> %reg6, i32 3 + %24 = extractelement <4 x float> %reg7, i32 0 + %25 = extractelement <4 x float> %reg7, i32 1 + %26 = extractelement <4 x float> %reg7, i32 2 + %27 = extractelement <4 x float> %reg7, i32 3 %28 = load <4 x float> addrspace(8)* null %29 = extractelement <4 x float> %28, i32 0 %30 = fmul float %0, %29 @@ -219,9 +219,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll index 6ef3c3124b..b760c882f4 100644 --- a/test/CodeGen/R600/r600-encoding.ll +++ b/test/CodeGen/R600/r600-encoding.ll @@ -10,15 +10,16 @@ ; R600-CHECK: @test ; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] -define void @test() { +define void @test(<4 x float> inreg %reg0) #0 { entry: - %0 = call float @llvm.R600.load.input(i32 0) - %1 = call float @llvm.R600.load.input(i32 1) - %2 = fmul float %0, %1 - call void @llvm.AMDGPU.store.output(float %2, i32 0) + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = fmul float %r0, %r1 + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll index 78c703b74e..73bc0635ab 100644 --- a/test/CodeGen/R600/r600-export-fix.ll +++ b/test/CodeGen/R600/r600-export-fix.ll @@ -10,12 +10,12 @@ ;CHECK: EXPORT T{{[0-9]}}.0000 -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %5 = extractelement <4 x float> %4, i32 0 %6 = fmul float %5, %0 @@ -137,10 +137,6 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll index 895ad5e1c8..6dee3ef89c 100644 --- a/test/CodeGen/R600/r600cfg.ll +++ b/test/CodeGen/R600/r600cfg.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood ;REQUIRES: asserts -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = bitcast float %0 to i32 %5 = icmp eq i32 %4, 0 %6 = sext i1 %5 to i32 @@ -113,12 +113,8 @@ ENDIF48: ; preds = %LOOP47 br label %LOOP47 } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll index 2783929670..b4ac47afce 100644 --- a/test/CodeGen/R600/reciprocal.ll +++ b/test/CodeGen/R600/reciprocal.ll @@ -2,15 +2,14 @@ ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = fdiv float 1.0, %r0 - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) - -declare float @llvm.AMDGPU.rcp(float ) readnone +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll index 474d6ba902..c3fd923e45 100644 --- a/test/CodeGen/R600/rv7x0_count3.ll +++ b/test/CodeGen/R600/rv7x0_count3.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s -; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80] +; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80] -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %1 = call float @llvm.R600.load.input(i32 4) - %2 = call float @llvm.R600.load.input(i32 5) - %3 = call float @llvm.R600.load.input(i32 6) - %4 = call float @llvm.R600.load.input(i32 7) +define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { + %1 = extractelement <4 x float> %reg1, i32 0 + %2 = extractelement <4 x float> %reg1, i32 1 + %3 = extractelement <4 x float> %reg1, i32 2 + %4 = extractelement <4 x float> %reg1, i32 3 %5 = insertelement <4 x float> undef, float %1, i32 0 %6 = insertelement <4 x float> %5, float %2, i32 1 %7 = insertelement <4 x float> %6, float %3, i32 2 @@ -36,9 +36,6 @@ define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #1 = { readnone } + +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll index 2a66094309..11e8f5176f 100644 --- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll +++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs ;REQUIRES: asserts -define void @main() { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 { main_body: - %0 = call float @llvm.R600.interp.input(i32 0, i32 0) - %1 = call float @llvm.R600.interp.input(i32 1, i32 0) - %2 = call float @llvm.R600.interp.input(i32 2, i32 0) - %3 = call float @llvm.R600.interp.input(i32 3, i32 0) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = fcmp ult float %1, 0.000000e+00 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 %6 = fsub float -0.000000e+00, %5 @@ -74,10 +74,9 @@ ELSE17: ; preds = %ELSE br label %ENDIF } -declare float @llvm.R600.interp.input(i32, i32) #0 - declare float @llvm.AMDIL.clamp.(float, float, float) #0 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { readnone } +attributes #1 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll index 44b7c2f680..33b20d3673 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched ;REQUIRES: asserts -define void @main() { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = fcmp ult float %0, 0.000000e+00 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 %6 = fsub float -0.000000e+00, %5 @@ -127,8 +127,6 @@ ENDIF19: ; preds = %ENDIF16 br label %LOOP } -declare float @llvm.R600.load.input(i32) #0 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #0 = { readnone } +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll index c49b5f4bd1..0484fc9a85 100644 --- a/test/CodeGen/R600/shared-op-cycle.ll +++ b/test/CodeGen/R600/shared-op-cycle.ll @@ -4,10 +4,10 @@ ; CHECK: MULADD_IEEE * ; CHECK-NOT: MULADD_IEEE * -define void @main() { - %w0 = call float @llvm.R600.load.input(i32 3) - %w1 = call float @llvm.R600.load.input(i32 7) - %w2 = call float @llvm.R600.load.input(i32 11) +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { + %w0 = extractelement <4 x float> %reg0, i32 3 + %w1 = extractelement <4 x float> %reg1, i32 3 + %w2 = extractelement <4 x float> %reg2, i32 3 %sq0 = fmul float %w0, %w0 %r0 = fadd float %sq0, 2.0 %sq1 = fmul float %w1, %w1 @@ -24,15 +24,9 @@ define void @main() { } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } -attributes #2 = { readonly } -attributes #3 = { nounwind readonly }
\ No newline at end of file +attributes #1 = { readnone }
\ No newline at end of file diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll index 9a58f667f0..16c3f19193 100644 --- a/test/CodeGen/R600/swizzle-export.ll +++ b/test/CodeGen/R600/swizzle-export.ll @@ -6,12 +6,12 @@ ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX ;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = load <4 x float> addrspace(8)* null %5 = extractelement <4 x float> %4, i32 1 %6 = load <4 x float> addrspace(8)* null @@ -96,12 +96,12 @@ main_body: ; EG-CHECK: T{{[0-9]+}}.XY__ ; EG-CHECK: T{{[0-9]+}}.YXZ0 -define void @main2() #0 { +define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = fadd float %0, 2.5 + %3 = fmul float %1, 3.5 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %5 = extractelement <4 x float> %4, i32 0 %6 = call float @llvm.cos.f32(float %5) @@ -109,8 +109,8 @@ main_body: %8 = extractelement <4 x float> %7, i32 0 %9 = load <4 x float> addrspace(8)* null %10 = extractelement <4 x float> %9, i32 1 - %11 = insertelement <4 x float> undef, float %0, i32 0 - %12 = insertelement <4 x float> %11, float %1, i32 1 + %11 = insertelement <4 x float> undef, float %2, i32 0 + %12 = insertelement <4 x float> %11, float %3, i32 1 call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1) %13 = insertelement <4 x float> undef, float %6, i32 0 %14 = insertelement <4 x float> %13, float %8, i32 1 @@ -120,14 +120,10 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: nounwind readonly -declare float @llvm.cos.f32(float) #2 +declare float @llvm.cos.f32(float) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } -attributes #2 = { nounwind readonly } +attributes #1 = { nounwind readonly } diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll index 5979609ce4..cbb9c50974 100644 --- a/test/CodeGen/R600/tex-clause-antidep.ll +++ b/test/CodeGen/R600/tex-clause-antidep.ll @@ -3,11 +3,11 @@ ;CHECK: TEX ;CHECK-NEXT: ALU -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) +define void @test(<4 x float> inreg %reg0) #0 { + %1 = extractelement <4 x float> %reg0, i32 0 + %2 = extractelement <4 x float> %reg0, i32 1 + %3 = extractelement <4 x float> %reg0, i32 2 + %4 = extractelement <4 x float> %reg0, i32 3 %5 = insertelement <4 x float> undef, float %1, i32 0 %6 = insertelement <4 x float> %5, float %2, i32 1 %7 = insertelement <4 x float> %6, float %3, i32 2 @@ -19,6 +19,7 @@ define void @test() { ret void } -declare float @llvm.R600.load.input(i32) readnone declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" }
\ No newline at end of file diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll index 5d0ecef306..789538af58 100644 --- a/test/CodeGen/R600/texture-input-merge.ll +++ b/test/CodeGen/R600/texture-input-merge.ll @@ -2,11 +2,11 @@ ;CHECK-NOT: MOV -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) +define void @test(<4 x float> inreg %reg0) #0 { + %1 = extractelement <4 x float> %reg0, i32 0 + %2 = extractelement <4 x float> %reg0, i32 1 + %3 = extractelement <4 x float> %reg0, i32 2 + %4 = extractelement <4 x float> %reg0, i32 3 %5 = fmul float %1, 3.0 %6 = fmul float %2, 3.0 %7 = fmul float %3, 3.0 @@ -25,6 +25,7 @@ define void @test() { ret void } -declare float @llvm.R600.load.input(i32) readnone declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" }
\ No newline at end of file |