summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-11-11 22:10:24 +0000
committerVincent Lejeune <vljn@ovi.com>2013-11-11 22:10:24 +0000
commit70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a (patch)
tree0529d234345ee92bad998240c04c98c877ef0bee /test
parent6c7a7c6474ea60c40e2dbb15f5b6cf0265098ace (diff)
downloadllvm-70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a.tar.gz
llvm-70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a.tar.bz2
llvm-70a7d5ddb4f00bbb61afe7b536c6f599f771ab9a.tar.xz
R600: Use function inputs to represent data stored in gpr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194425 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/R600/big_alu.ll85
-rw-r--r--test/CodeGen/R600/complex-folding.ll9
-rw-r--r--test/CodeGen/R600/floor.ll14
-rw-r--r--test/CodeGen/R600/fmad.ll20
-rw-r--r--test/CodeGen/R600/fmax.ll13
-rw-r--r--test/CodeGen/R600/fmin.ll13
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.mul.ll16
-rw-r--r--test/CodeGen/R600/llvm.cos.ll12
-rw-r--r--test/CodeGen/R600/llvm.pow.ll16
-rw-r--r--test/CodeGen/R600/llvm.sin.ll12
-rw-r--r--test/CodeGen/R600/load-input-fold.ll29
-rw-r--r--test/CodeGen/R600/max-literals.ll25
-rw-r--r--test/CodeGen/R600/pv-packing.ll25
-rw-r--r--test/CodeGen/R600/pv.ll61
-rw-r--r--test/CodeGen/R600/r600-encoding.ll15
-rw-r--r--test/CodeGen/R600/r600-export-fix.ll14
-rw-r--r--test/CodeGen/R600/r600cfg.ll14
-rw-r--r--test/CodeGen/R600/reciprocal.ll13
-rw-r--r--test/CodeGen/R600/rv7x0_count3.ll19
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested-if.ll13
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop.ll14
-rw-r--r--test/CodeGen/R600/shared-op-cycle.ll16
-rw-r--r--test/CodeGen/R600/swizzle-export.ll32
-rw-r--r--test/CodeGen/R600/tex-clause-antidep.ll13
-rw-r--r--test/CodeGen/R600/texture-input-merge.ll13
25 files changed, 246 insertions, 280 deletions
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 75f24588c1..6b683769fe 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -4,54 +4,54 @@
;This test ensures that R600 backend can handle ifcvt properly
;and do not generate ALU clauses with more than 128 instructions.
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 0)
- %1 = call float @llvm.R600.load.input(i32 1)
- %2 = call float @llvm.R600.load.input(i32 2)
- %3 = call float @llvm.R600.load.input(i32 3)
- %4 = call float @llvm.R600.load.input(i32 4)
- %5 = call float @llvm.R600.load.input(i32 36)
- %6 = call float @llvm.R600.load.input(i32 32)
+ %0 = extractelement <4 x float> %reg0, i32 0
+ %1 = extractelement <4 x float> %reg0, i32 1
+ %2 = extractelement <4 x float> %reg0, i32 2
+ %3 = extractelement <4 x float> %reg0, i32 3
+ %4 = extractelement <4 x float> %reg1, i32 0
+ %5 = extractelement <4 x float> %reg9, i32 0
+ %6 = extractelement <4 x float> %reg8, i32 0
%7 = fcmp ugt float %6, 0.000000e+00
%8 = select i1 %7, float %4, float %5
- %9 = call float @llvm.R600.load.input(i32 5)
- %10 = call float @llvm.R600.load.input(i32 37)
- %11 = call float @llvm.R600.load.input(i32 32)
+ %9 = extractelement <4 x float> %reg1, i32 1
+ %10 = extractelement <4 x float> %reg9, i32 1
+ %11 = extractelement <4 x float> %reg8, i32 0
%12 = fcmp ugt float %11, 0.000000e+00
%13 = select i1 %12, float %9, float %10
- %14 = call float @llvm.R600.load.input(i32 6)
- %15 = call float @llvm.R600.load.input(i32 38)
- %16 = call float @llvm.R600.load.input(i32 32)
+ %14 = extractelement <4 x float> %reg1, i32 2
+ %15 = extractelement <4 x float> %reg9, i32 2
+ %16 = extractelement <4 x float> %reg8, i32 0
%17 = fcmp ugt float %16, 0.000000e+00
%18 = select i1 %17, float %14, float %15
- %19 = call float @llvm.R600.load.input(i32 7)
- %20 = call float @llvm.R600.load.input(i32 39)
- %21 = call float @llvm.R600.load.input(i32 32)
- %22 = call float @llvm.R600.load.input(i32 8)
- %23 = call float @llvm.R600.load.input(i32 9)
- %24 = call float @llvm.R600.load.input(i32 10)
- %25 = call float @llvm.R600.load.input(i32 11)
- %26 = call float @llvm.R600.load.input(i32 12)
- %27 = call float @llvm.R600.load.input(i32 13)
- %28 = call float @llvm.R600.load.input(i32 14)
- %29 = call float @llvm.R600.load.input(i32 15)
- %30 = call float @llvm.R600.load.input(i32 16)
- %31 = call float @llvm.R600.load.input(i32 17)
- %32 = call float @llvm.R600.load.input(i32 18)
- %33 = call float @llvm.R600.load.input(i32 19)
- %34 = call float @llvm.R600.load.input(i32 20)
- %35 = call float @llvm.R600.load.input(i32 21)
- %36 = call float @llvm.R600.load.input(i32 22)
- %37 = call float @llvm.R600.load.input(i32 23)
- %38 = call float @llvm.R600.load.input(i32 24)
- %39 = call float @llvm.R600.load.input(i32 25)
- %40 = call float @llvm.R600.load.input(i32 26)
- %41 = call float @llvm.R600.load.input(i32 27)
- %42 = call float @llvm.R600.load.input(i32 28)
- %43 = call float @llvm.R600.load.input(i32 29)
- %44 = call float @llvm.R600.load.input(i32 30)
- %45 = call float @llvm.R600.load.input(i32 31)
+ %19 = extractelement <4 x float> %reg1, i32 3
+ %20 = extractelement <4 x float> %reg9, i32 3
+ %21 = extractelement <4 x float> %reg8, i32 0
+ %22 = extractelement <4 x float> %reg2, i32 0
+ %23 = extractelement <4 x float> %reg2, i32 1
+ %24 = extractelement <4 x float> %reg2, i32 2
+ %25 = extractelement <4 x float> %reg2, i32 3
+ %26 = extractelement <4 x float> %reg3, i32 0
+ %27 = extractelement <4 x float> %reg3, i32 1
+ %28 = extractelement <4 x float> %reg3, i32 2
+ %29 = extractelement <4 x float> %reg3, i32 3
+ %30 = extractelement <4 x float> %reg4, i32 0
+ %31 = extractelement <4 x float> %reg4, i32 1
+ %32 = extractelement <4 x float> %reg4, i32 2
+ %33 = extractelement <4 x float> %reg4, i32 3
+ %34 = extractelement <4 x float> %reg5, i32 0
+ %35 = extractelement <4 x float> %reg5, i32 1
+ %36 = extractelement <4 x float> %reg5, i32 2
+ %37 = extractelement <4 x float> %reg5, i32 3
+ %38 = extractelement <4 x float> %reg6, i32 0
+ %39 = extractelement <4 x float> %reg6, i32 1
+ %40 = extractelement <4 x float> %reg6, i32 2
+ %41 = extractelement <4 x float> %reg6, i32 3
+ %42 = extractelement <4 x float> %reg7, i32 0
+ %43 = extractelement <4 x float> %reg7, i32 1
+ %44 = extractelement <4 x float> %reg7, i32 2
+ %45 = extractelement <4 x float> %reg7, i32 3
%46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%47 = extractelement <4 x float> %46, i32 0
%48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
@@ -1147,9 +1147,6 @@ ENDIF178: ; preds = %ENDIF175, %IF179
}
; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
; Function Attrs: readnone
diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
index 8dcd450deb..99f0d99b35 100644
--- a/test/CodeGen/R600/complex-folding.ll
+++ b/test/CodeGen/R600/complex-folding.ll
@@ -2,9 +2,9 @@
; CHECK: @main
; CHECK-NOT: MOV
-define void @main() {
+define void @main(<4 x float> inreg %reg0) #0 {
entry:
- %0 = call float @llvm.R600.load.input(i32 0)
+ %0 = extractelement <4 x float> %reg0, i32 0
%1 = call float @fabs(float %0)
%2 = fptoui float %1 to i32
%3 = bitcast i32 %2 to float
@@ -13,6 +13,7 @@ entry:
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
declare float @fabs(float ) readnone
-declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) \ No newline at end of file
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" } \ No newline at end of file
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
index 877d69a65b..67e86c41fd 100644
--- a/test/CodeGen/R600/floor.ll
+++ b/test/CodeGen/R600/floor.ll
@@ -2,15 +2,15 @@
;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @floor(float %r0)
- call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ %vec = insertelement <4 x float> undef, float %r1, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
declare float @floor(float) readonly
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" } \ No newline at end of file
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
index 75e65d8133..935e35123f 100644
--- a/test/CodeGen/R600/fmad.ll
+++ b/test/CodeGen/R600/fmad.ll
@@ -2,18 +2,18 @@
;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
- %r1 = call float @llvm.R600.load.input(i32 1)
- %r2 = call float @llvm.R600.load.input(i32 2)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
+ %r2 = extractelement <4 x float> %reg0, i32 2
%r3 = fmul float %r0, %r1
- %r4 = fadd float %r3, %r2
- call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+ %r4 = fadd float %r3, %r2
+ %vec = insertelement <4 x float> undef, float %r4, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
declare float @fabs(float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" } \ No newline at end of file
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
index be25c9ce8d..d7127f485c 100644
--- a/test/CodeGen/R600/fmax.ll
+++ b/test/CodeGen/R600/fmax.ll
@@ -2,15 +2,16 @@
;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
- %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
%r2 = fcmp oge float %r0, %r1
%r3 = select i1 %r2, float %r0, float %r1
- call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+ %vec = insertelement <4 x float> undef, float %r3, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" } \ No newline at end of file
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
index 5e34b7c890..defa8c0963 100644
--- a/test/CodeGen/R600/fmin.ll
+++ b/test/CodeGen/R600/fmin.ll
@@ -2,15 +2,16 @@
;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
- %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
%r2 = fcmp uge float %r0, %r1
%r3 = select i1 %r2, float %r1, float %r0
- call void @llvm.AMDGPU.store.output(float %r3, i32 0)
+ %vec = insertelement <4 x float> undef, float %r3, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" } \ No newline at end of file
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
index cc0732b3ff..83b56a5029 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -2,16 +2,16 @@
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
- %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
%r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
- call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ %vec = insertelement <4 x float> undef, float %r2, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
declare float @llvm.AMDGPU.mul(float ,float ) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" } \ No newline at end of file
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index 8fb4559f89..aaf2305dd0 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -5,15 +5,15 @@
;CHECK: ADD *
;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @llvm.cos.f32(float %r0)
- call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ %vec = insertelement <4 x float> undef, float %r1, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @llvm.cos.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
index 0f51cf46f5..b587d2b2ae 100644
--- a/test/CodeGen/R600/llvm.pow.ll
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -4,16 +4,16 @@
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
- %r1 = call float @llvm.R600.load.input(i32 1)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
%r2 = call float @llvm.pow.f32( float %r0, float %r1)
- call void @llvm.AMDGPU.store.output(float %r2, i32 0)
+ %vec = insertelement <4 x float> undef, float %r2, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
-
declare float @llvm.pow.f32(float ,float ) readonly
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index e94c2ba56b..9eb998315f 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -5,15 +5,15 @@
;CHECK: ADD *
;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @llvm.sin.f32( float %r0)
- call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ %vec = insertelement <4 x float> undef, float %r1, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @llvm.sin.f32(float) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-declare float @llvm.R600.load.input(i32) readnone
-
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
index aff2a6e18f..ca86d0e369 100644
--- a/test/CodeGen/R600/load-input-fold.ll
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -1,20 +1,20 @@
;RUN: llc < %s -march=r600 -mcpu=cayman
;REQUIRES: asserts
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
- %4 = call float @llvm.R600.load.input(i32 8)
- %5 = call float @llvm.R600.load.input(i32 9)
- %6 = call float @llvm.R600.load.input(i32 10)
- %7 = call float @llvm.R600.load.input(i32 11)
- %8 = call float @llvm.R600.load.input(i32 12)
- %9 = call float @llvm.R600.load.input(i32 13)
- %10 = call float @llvm.R600.load.input(i32 14)
- %11 = call float @llvm.R600.load.input(i32 15)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
+ %4 = extractelement <4 x float> %reg2, i32 0
+ %5 = extractelement <4 x float> %reg2, i32 1
+ %6 = extractelement <4 x float> %reg2, i32 2
+ %7 = extractelement <4 x float> %reg2, i32 3
+ %8 = extractelement <4 x float> %reg3, i32 0
+ %9 = extractelement <4 x float> %reg3, i32 1
+ %10 = extractelement <4 x float> %reg3, i32 2
+ %11 = extractelement <4 x float> %reg3, i32 3
%12 = load <4 x float> addrspace(8)* null
%13 = extractelement <4 x float> %12, i32 0
%14 = fmul float %0, %13
@@ -96,9 +96,6 @@ main_body:
}
; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
; Function Attrs: readonly
diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll
index c31b7c06bb..65a6d2b5fc 100644
--- a/test/CodeGen/R600/max-literals.ll
+++ b/test/CodeGen/R600/max-literals.ll
@@ -3,13 +3,13 @@
; CHECK: @main
; CHECK: ADD *
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
- %4 = call float @llvm.R600.load.input(i32 8)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
+ %4 = extractelement <4 x float> %reg2, i32 0
%5 = fadd float %0, 2.0
%6 = fadd float %1, 3.0
%7 = fadd float %2, 4.0
@@ -32,13 +32,13 @@ main_body:
; CHECK: @main
; CHECK-NOT: ADD *
-define void @main2() #0 {
+define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
- %4 = call float @llvm.R600.load.input(i32 8)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
+ %4 = extractelement <4 x float> %reg2, i32 0
%5 = fadd float %0, 2.0
%6 = fadd float %1, 3.0
%7 = fadd float %2, 4.0
@@ -59,7 +59,6 @@ main_body:
}
; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll
index 03fc204559..e5615b9972 100644
--- a/test/CodeGen/R600/pv-packing.ll
+++ b/test/CodeGen/R600/pv-packing.ll
@@ -3,17 +3,17 @@
;CHECK: DOT4 T{{[0-9]\.X}}
;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 8)
- %4 = call float @llvm.R600.load.input(i32 9)
- %5 = call float @llvm.R600.load.input(i32 10)
- %6 = call float @llvm.R600.load.input(i32 12)
- %7 = call float @llvm.R600.load.input(i32 13)
- %8 = call float @llvm.R600.load.input(i32 14)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg2, i32 0
+ %4 = extractelement <4 x float> %reg2, i32 1
+ %5 = extractelement <4 x float> %reg2, i32 2
+ %6 = extractelement <4 x float> %reg3, i32 0
+ %7 = extractelement <4 x float> %reg3, i32 1
+ %8 = extractelement <4 x float> %reg3, i32 2
%9 = load <4 x float> addrspace(8)* null
%10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
@@ -36,9 +36,6 @@ main_body:
}
; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
@@ -46,5 +43,3 @@ declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
-attributes #2 = { readonly }
-attributes #3 = { nounwind readonly }
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 6d9396cb7d..5a930b2926 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -3,36 +3,36 @@
;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
- %4 = call float @llvm.R600.load.input(i32 8)
- %5 = call float @llvm.R600.load.input(i32 9)
- %6 = call float @llvm.R600.load.input(i32 10)
- %7 = call float @llvm.R600.load.input(i32 11)
- %8 = call float @llvm.R600.load.input(i32 12)
- %9 = call float @llvm.R600.load.input(i32 13)
- %10 = call float @llvm.R600.load.input(i32 14)
- %11 = call float @llvm.R600.load.input(i32 15)
- %12 = call float @llvm.R600.load.input(i32 16)
- %13 = call float @llvm.R600.load.input(i32 17)
- %14 = call float @llvm.R600.load.input(i32 18)
- %15 = call float @llvm.R600.load.input(i32 19)
- %16 = call float @llvm.R600.load.input(i32 20)
- %17 = call float @llvm.R600.load.input(i32 21)
- %18 = call float @llvm.R600.load.input(i32 22)
- %19 = call float @llvm.R600.load.input(i32 23)
- %20 = call float @llvm.R600.load.input(i32 24)
- %21 = call float @llvm.R600.load.input(i32 25)
- %22 = call float @llvm.R600.load.input(i32 26)
- %23 = call float @llvm.R600.load.input(i32 27)
- %24 = call float @llvm.R600.load.input(i32 28)
- %25 = call float @llvm.R600.load.input(i32 29)
- %26 = call float @llvm.R600.load.input(i32 30)
- %27 = call float @llvm.R600.load.input(i32 31)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
+ %4 = extractelement <4 x float> %reg2, i32 0
+ %5 = extractelement <4 x float> %reg2, i32 1
+ %6 = extractelement <4 x float> %reg2, i32 2
+ %7 = extractelement <4 x float> %reg2, i32 3
+ %8 = extractelement <4 x float> %reg3, i32 0
+ %9 = extractelement <4 x float> %reg3, i32 1
+ %10 = extractelement <4 x float> %reg3, i32 2
+ %11 = extractelement <4 x float> %reg3, i32 3
+ %12 = extractelement <4 x float> %reg4, i32 0
+ %13 = extractelement <4 x float> %reg4, i32 1
+ %14 = extractelement <4 x float> %reg4, i32 2
+ %15 = extractelement <4 x float> %reg4, i32 3
+ %16 = extractelement <4 x float> %reg5, i32 0
+ %17 = extractelement <4 x float> %reg5, i32 1
+ %18 = extractelement <4 x float> %reg5, i32 2
+ %19 = extractelement <4 x float> %reg5, i32 3
+ %20 = extractelement <4 x float> %reg6, i32 0
+ %21 = extractelement <4 x float> %reg6, i32 1
+ %22 = extractelement <4 x float> %reg6, i32 2
+ %23 = extractelement <4 x float> %reg6, i32 3
+ %24 = extractelement <4 x float> %reg7, i32 0
+ %25 = extractelement <4 x float> %reg7, i32 1
+ %26 = extractelement <4 x float> %reg7, i32 2
+ %27 = extractelement <4 x float> %reg7, i32 3
%28 = load <4 x float> addrspace(8)* null
%29 = extractelement <4 x float> %28, i32 0
%30 = fmul float %0, %29
@@ -219,9 +219,6 @@ main_body:
}
; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
; Function Attrs: readonly
diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll
index 6ef3c3124b..b760c882f4 100644
--- a/test/CodeGen/R600/r600-encoding.ll
+++ b/test/CodeGen/R600/r600-encoding.ll
@@ -10,15 +10,16 @@
; R600-CHECK: @test
; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
-define void @test() {
+define void @test(<4 x float> inreg %reg0) #0 {
entry:
- %0 = call float @llvm.R600.load.input(i32 0)
- %1 = call float @llvm.R600.load.input(i32 1)
- %2 = fmul float %0, %1
- call void @llvm.AMDGPU.store.output(float %2, i32 0)
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
+ %r2 = fmul float %r0, %r1
+ %vec = insertelement <4 x float> undef, float %r2, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-declare void @llvm.AMDGPU.store.output(float, i32)
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
index 78c703b74e..73bc0635ab 100644
--- a/test/CodeGen/R600/r600-export-fix.ll
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -10,12 +10,12 @@
;CHECK: EXPORT T{{[0-9]}}.0000
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%5 = extractelement <4 x float> %4, i32 0
%6 = fmul float %5, %0
@@ -137,10 +137,6 @@ main_body:
ret void
}
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
index 895ad5e1c8..6dee3ef89c 100644
--- a/test/CodeGen/R600/r600cfg.ll
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -1,12 +1,12 @@
;RUN: llc < %s -march=r600 -mcpu=redwood
;REQUIRES: asserts
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
%4 = bitcast float %0 to i32
%5 = icmp eq i32 %4, 0
%6 = sext i1 %5 to i32
@@ -113,12 +113,8 @@ ENDIF48: ; preds = %LOOP47
br label %LOOP47
}
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
index 2783929670..b4ac47afce 100644
--- a/test/CodeGen/R600/reciprocal.ll
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -2,15 +2,14 @@
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define void @test() {
- %r0 = call float @llvm.R600.load.input(i32 0)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %r0 = extractelement <4 x float> %reg0, i32 0
%r1 = fdiv float 1.0, %r0
- call void @llvm.AMDGPU.store.output(float %r1, i32 0)
+ %vec = insertelement <4 x float> undef, float %r1, i32 0
+ call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-declare void @llvm.AMDGPU.store.output(float, i32)
-
-declare float @llvm.AMDGPU.rcp(float ) readnone
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll
index 474d6ba902..c3fd923e45 100644
--- a/test/CodeGen/R600/rv7x0_count3.ll
+++ b/test/CodeGen/R600/rv7x0_count3.ll
@@ -1,12 +1,12 @@
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
-; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
+; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %1 = call float @llvm.R600.load.input(i32 4)
- %2 = call float @llvm.R600.load.input(i32 5)
- %3 = call float @llvm.R600.load.input(i32 6)
- %4 = call float @llvm.R600.load.input(i32 7)
+define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
+ %1 = extractelement <4 x float> %reg1, i32 0
+ %2 = extractelement <4 x float> %reg1, i32 1
+ %3 = extractelement <4 x float> %reg1, i32 2
+ %4 = extractelement <4 x float> %reg1, i32 3
%5 = insertelement <4 x float> undef, float %1, i32 0
%6 = insertelement <4 x float> %5, float %2, i32 1
%7 = insertelement <4 x float> %6, float %3, i32 2
@@ -36,9 +36,6 @@ define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in)
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #1 = { readnone }
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
index 2a66094309..11e8f5176f 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll
@@ -1,12 +1,12 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
;REQUIRES: asserts
-define void @main() {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
main_body:
- %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
- %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
- %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
- %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
%4 = fcmp ult float %1, 0.000000e+00
%5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
%6 = fsub float -0.000000e+00, %5
@@ -74,10 +74,9 @@ ELSE17: ; preds = %ELSE
br label %ENDIF
}
-declare float @llvm.R600.interp.input(i32, i32) #0
-
declare float @llvm.AMDIL.clamp.(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }
+attributes #1 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 44b7c2f680..33b20d3673 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -1,12 +1,12 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
;REQUIRES: asserts
-define void @main() {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
%4 = fcmp ult float %0, 0.000000e+00
%5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
%6 = fsub float -0.000000e+00, %5
@@ -127,8 +127,6 @@ ENDIF19: ; preds = %ENDIF16
br label %LOOP
}
-declare float @llvm.R600.load.input(i32) #0
-
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { readnone }
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll
index c49b5f4bd1..0484fc9a85 100644
--- a/test/CodeGen/R600/shared-op-cycle.ll
+++ b/test/CodeGen/R600/shared-op-cycle.ll
@@ -4,10 +4,10 @@
; CHECK: MULADD_IEEE *
; CHECK-NOT: MULADD_IEEE *
-define void @main() {
- %w0 = call float @llvm.R600.load.input(i32 3)
- %w1 = call float @llvm.R600.load.input(i32 7)
- %w2 = call float @llvm.R600.load.input(i32 11)
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
+ %w0 = extractelement <4 x float> %reg0, i32 3
+ %w1 = extractelement <4 x float> %reg1, i32 3
+ %w2 = extractelement <4 x float> %reg2, i32 3
%sq0 = fmul float %w0, %w0
%r0 = fadd float %sq0, 2.0
%sq1 = fmul float %w1, %w1
@@ -24,15 +24,9 @@ define void @main() {
}
; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
-; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
-
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
-attributes #2 = { readonly }
-attributes #3 = { nounwind readonly } \ No newline at end of file
+attributes #1 = { readnone } \ No newline at end of file
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index 9a58f667f0..16c3f19193 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -6,12 +6,12 @@
;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW
-define void @main() #0 {
+define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = extractelement <4 x float> %reg1, i32 2
+ %3 = extractelement <4 x float> %reg1, i32 3
%4 = load <4 x float> addrspace(8)* null
%5 = extractelement <4 x float> %4, i32 1
%6 = load <4 x float> addrspace(8)* null
@@ -96,12 +96,12 @@ main_body:
; EG-CHECK: T{{[0-9]+}}.XY__
; EG-CHECK: T{{[0-9]+}}.YXZ0
-define void @main2() #0 {
+define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
main_body:
- %0 = call float @llvm.R600.load.input(i32 4)
- %1 = call float @llvm.R600.load.input(i32 5)
- %2 = call float @llvm.R600.load.input(i32 6)
- %3 = call float @llvm.R600.load.input(i32 7)
+ %0 = extractelement <4 x float> %reg1, i32 0
+ %1 = extractelement <4 x float> %reg1, i32 1
+ %2 = fadd float %0, 2.5
+ %3 = fmul float %1, 3.5
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 0
%6 = call float @llvm.cos.f32(float %5)
@@ -109,8 +109,8 @@ main_body:
%8 = extractelement <4 x float> %7, i32 0
%9 = load <4 x float> addrspace(8)* null
%10 = extractelement <4 x float> %9, i32 1
- %11 = insertelement <4 x float> undef, float %0, i32 0
- %12 = insertelement <4 x float> %11, float %1, i32 1
+ %11 = insertelement <4 x float> undef, float %2, i32 0
+ %12 = insertelement <4 x float> %11, float %3, i32 1
call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
%13 = insertelement <4 x float> undef, float %6, i32 0
%14 = insertelement <4 x float> %13, float %8, i32 1
@@ -120,14 +120,10 @@ main_body:
ret void
}
-; Function Attrs: readnone
-declare float @llvm.R600.load.input(i32) #1
-
; Function Attrs: nounwind readonly
-declare float @llvm.cos.f32(float) #2
+declare float @llvm.cos.f32(float) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="1" }
-attributes #1 = { readnone }
-attributes #2 = { nounwind readonly }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll
index 5979609ce4..cbb9c50974 100644
--- a/test/CodeGen/R600/tex-clause-antidep.ll
+++ b/test/CodeGen/R600/tex-clause-antidep.ll
@@ -3,11 +3,11 @@
;CHECK: TEX
;CHECK-NEXT: ALU
-define void @test() {
- %1 = call float @llvm.R600.load.input(i32 0)
- %2 = call float @llvm.R600.load.input(i32 1)
- %3 = call float @llvm.R600.load.input(i32 2)
- %4 = call float @llvm.R600.load.input(i32 3)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %1 = extractelement <4 x float> %reg0, i32 0
+ %2 = extractelement <4 x float> %reg0, i32 1
+ %3 = extractelement <4 x float> %reg0, i32 2
+ %4 = extractelement <4 x float> %reg0, i32 3
%5 = insertelement <4 x float> undef, float %1, i32 0
%6 = insertelement <4 x float> %5, float %2, i32 1
%7 = insertelement <4 x float> %6, float %3, i32 2
@@ -19,6 +19,7 @@ define void @test() {
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" } \ No newline at end of file
diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll
index 5d0ecef306..789538af58 100644
--- a/test/CodeGen/R600/texture-input-merge.ll
+++ b/test/CodeGen/R600/texture-input-merge.ll
@@ -2,11 +2,11 @@
;CHECK-NOT: MOV
-define void @test() {
- %1 = call float @llvm.R600.load.input(i32 0)
- %2 = call float @llvm.R600.load.input(i32 1)
- %3 = call float @llvm.R600.load.input(i32 2)
- %4 = call float @llvm.R600.load.input(i32 3)
+define void @test(<4 x float> inreg %reg0) #0 {
+ %1 = extractelement <4 x float> %reg0, i32 0
+ %2 = extractelement <4 x float> %reg0, i32 1
+ %3 = extractelement <4 x float> %reg0, i32 2
+ %4 = extractelement <4 x float> %reg0, i32 3
%5 = fmul float %1, 3.0
%6 = fmul float %2, 3.0
%7 = fmul float %3, 3.0
@@ -25,6 +25,7 @@ define void @test() {
ret void
}
-declare float @llvm.R600.load.input(i32) readnone
declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" } \ No newline at end of file