summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2014-06-19 01:19:19 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2014-06-19 01:19:19 +0000
commitd9b35435b89015d154b0e20f4d4796d936237f84 (patch)
treec1fbe95a3f550edf4da38ffc64009b0063781697 /test
parentdd8406a6b7f0cbf9082c4bdb2cc8c3b3d5da6eec (diff)
downloadllvm-d9b35435b89015d154b0e20f4d4796d936237f84.tar.gz
llvm-d9b35435b89015d154b0e20f4d4796d936237f84.tar.bz2
llvm-d9b35435b89015d154b0e20f4d4796d936237f84.tar.xz
R600/SI: Add intrinsics for various math instructions.
These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211247 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r--test/CodeGen/R600/big_alu.ll12
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll27
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll27
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_scale.ll23
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rcp.ll58
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll29
-rw-r--r--test/CodeGen/R600/pv.ll4
-rw-r--r--test/CodeGen/R600/sgpr-copy.ll4
-rw-r--r--test/CodeGen/R600/si-sgpr-spill.ll18
-rw-r--r--test/Transforms/InstCombine/r600-intrinsics.ll47
10 files changed, 230 insertions, 19 deletions
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 6b683769fe..511e8ef629 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -101,7 +101,7 @@ IF137: ; preds = %main_body
%88 = insertelement <4 x float> %87, float %32, i32 2
%89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
%90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89)
- %91 = call float @llvm.AMDGPU.rsq(float %90)
+ %91 = call float @llvm.AMDGPU.rsq.f32(float %90)
%92 = fmul float %30, %91
%93 = fmul float %31, %91
%94 = fmul float %32, %91
@@ -344,7 +344,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%325 = insertelement <4 x float> %324, float %318, i32 2
%326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3
%327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326)
- %328 = call float @llvm.AMDGPU.rsq(float %327)
+ %328 = call float @llvm.AMDGPU.rsq.f32(float %327)
%329 = fmul float %314, %328
%330 = fmul float %316, %328
%331 = fmul float %318, %328
@@ -377,7 +377,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%358 = insertelement <4 x float> %357, float %45, i32 2
%359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3
%360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359)
- %361 = call float @llvm.AMDGPU.rsq(float %360)
+ %361 = call float @llvm.AMDGPU.rsq.f32(float %360)
%362 = fmul float %45, %361
%363 = call float @fabs(float %362)
%364 = fmul float %176, 0x3FECCCCCC0000000
@@ -403,7 +403,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%384 = insertelement <4 x float> %383, float %45, i32 2
%385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3
%386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385)
- %387 = call float @llvm.AMDGPU.rsq(float %386)
+ %387 = call float @llvm.AMDGPU.rsq.f32(float %386)
%388 = fmul float %45, %387
%389 = call float @fabs(float %388)
%390 = fmul float %176, 0x3FF51EB860000000
@@ -1041,7 +1041,7 @@ IF179: ; preds = %ENDIF175
%896 = insertelement <4 x float> %895, float %45, i32 2
%897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3
%898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897)
- %899 = call float @llvm.AMDGPU.rsq(float %898)
+ %899 = call float @llvm.AMDGPU.rsq.f32(float %898)
%900 = fmul float %45, %899
%901 = call float @fabs(float %900)
%902 = fmul float %176, 0x3FECCCCCC0000000
@@ -1150,7 +1150,7 @@ ENDIF178: ; preds = %ENDIF175, %IF179
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
new file mode 100644
index 0000000000..c8c73573e0
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fixup_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+ %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @test_div_fixup_f64:
+; SI: V_DIV_FIXUP_F64
+define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+ %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
new file mode 100644
index 0000000000..4f1e827c2c
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+
+; SI-LABEL: @test_div_fmas_f32:
+; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
+ %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @test_div_fmas_f64:
+; SI: V_DIV_FMAS_F64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
+ %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
new file mode 100644
index 0000000000..1bcbe2f859
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -0,0 +1,23 @@
+; XFAIL: *
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.div.scale.f32(float, float) nounwind readnone
+declare double @llvm.AMDGPU.div.scale.f64(double, double) nounwind readnone
+
+; SI-LABEL @test_div_scale_f32:
+define void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
+ %a = load float addrspace(1)* %aptr, align 4
+ %b = load float addrspace(1)* %bptr, align 4
+ %result = call float @llvm.AMDGPU.div.scale.f32(float %a, float %b) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL @test_div_scale_f64:
+define void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr) nounwind {
+ %a = load double addrspace(1)* %aptr, align 8
+ %b = load double addrspace(1)* %bptr, align 8
+ %result = call double @llvm.AMDGPU.div.scale.f64(double %a, double %b) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
new file mode 100644
index 0000000000..ca5260dc5b
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; FUNC-LABEL: @rcp_f32
+; SI: V_RCP_F32_e32
+define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
+ %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @rcp_f64
+; SI: V_RCP_F64_e32
+define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
+ %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f32
+; SI: V_RCP_F32_e32
+define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+ %rcp = fdiv float 1.0, %src
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @rcp_pat_f64
+; SI: V_RCP_F64_e32
+define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+ %rcp = fdiv double 1.0, %src
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f32
+; SI: V_RSQ_F32_e32
+define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
+ %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
+ %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
+ store float %rcp, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @rsq_rcp_pat_f64
+; SI: V_RSQ_F64_e32
+define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
+ %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
+ %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
+ store double %rcp, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
new file mode 100644
index 0000000000..1c736d447e
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
+
+; SI-LABEL: @test_trig_preop_f64:
+; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
+; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+ %a = load double addrspace(1)* %aptr, align 8
+ %b = load i32 addrspace(1)* %bptr, align 4
+ %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; SI-LABEL: @test_trig_preop_f64_imm_segment:
+; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
+; SI: S_ENDPGM
+define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
+ %a = load double addrspace(1)* %aptr, align 8
+ %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
+ store double %result, double addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index f322bc71c6..55eb56d3fb 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -103,7 +103,7 @@ main_body:
%95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
%96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
%97 = call float @fabs(float %96)
- %98 = call float @llvm.AMDGPU.rsq(float %97)
+ %98 = call float @llvm.AMDGPU.rsq.f32(float %97)
%99 = fmul float %4, %98
%100 = fmul float %5, %98
%101 = fmul float %6, %98
@@ -225,7 +225,7 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
declare float @fabs(float) #2
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #1
+declare float @llvm.AMDGPU.rsq.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #1
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index c581d86b99..c7d5bf9064 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -70,7 +70,7 @@ main_body:
%55 = fadd float %54, %53
%56 = fmul float %45, %45
%57 = fadd float %55, %56
- %58 = call float @llvm.AMDGPU.rsq(float %57)
+ %58 = call float @llvm.AMDGPU.rsq.f32(float %57)
%59 = fmul float %43, %58
%60 = fmul float %44, %58
%61 = fmul float %45, %58
@@ -212,7 +212,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #3
+declare float @llvm.AMDGPU.rsq.f32(float) #3
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #3
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index b34a757d9b..53a096513b 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -203,7 +203,7 @@ main_body:
%198 = fadd float %197, %196
%199 = fmul float %97, %97
%200 = fadd float %198, %199
- %201 = call float @llvm.AMDGPU.rsq(float %200)
+ %201 = call float @llvm.AMDGPU.rsq.f32(float %200)
%202 = fmul float %95, %201
%203 = fmul float %96, %201
%204 = fmul float %202, %29
@@ -384,7 +384,7 @@ IF67: ; preds = %LOOP65
%355 = fadd float %354, %353
%356 = fmul float %352, %352
%357 = fadd float %355, %356
- %358 = call float @llvm.AMDGPU.rsq(float %357)
+ %358 = call float @llvm.AMDGPU.rsq.f32(float %357)
%359 = fmul float %350, %358
%360 = fmul float %351, %358
%361 = fmul float %352, %358
@@ -512,7 +512,7 @@ IF67: ; preds = %LOOP65
%483 = fadd float %482, %481
%484 = fmul float %109, %109
%485 = fadd float %483, %484
- %486 = call float @llvm.AMDGPU.rsq(float %485)
+ %486 = call float @llvm.AMDGPU.rsq.f32(float %485)
%487 = fmul float %107, %486
%488 = fmul float %108, %486
%489 = fmul float %109, %486
@@ -541,7 +541,7 @@ IF67: ; preds = %LOOP65
%512 = fadd float %511, %510
%513 = fmul float %97, %97
%514 = fadd float %512, %513
- %515 = call float @llvm.AMDGPU.rsq(float %514)
+ %515 = call float @llvm.AMDGPU.rsq.f32(float %514)
%516 = fmul float %95, %515
%517 = fmul float %96, %515
%518 = fmul float %97, %515
@@ -658,7 +658,7 @@ declare i32 @llvm.SI.tid() #2
declare float @ceil(float) #3
; Function Attrs: readnone
-declare float @llvm.AMDGPU.rsq(float) #2
+declare float @llvm.AMDGPU.rsq.f32(float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
@@ -887,7 +887,7 @@ main_body:
%212 = fadd float %211, %210
%213 = fmul float %209, %209
%214 = fadd float %212, %213
- %215 = call float @llvm.AMDGPU.rsq(float %214)
+ %215 = call float @llvm.AMDGPU.rsq.f32(float %214)
%216 = fmul float %205, %215
%217 = fmul float %207, %215
%218 = fmul float %209, %215
@@ -1123,7 +1123,7 @@ IF189: ; preds = %LOOP
%434 = fsub float -0.000000e+00, %433
%435 = fadd float 0x3FF00068E0000000, %434
%436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00)
- %437 = call float @llvm.AMDGPU.rsq(float %436)
+ %437 = call float @llvm.AMDGPU.rsq.f32(float %436)
%438 = fmul float %437, %436
%439 = fsub float -0.000000e+00, %436
%440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00)
@@ -1147,7 +1147,7 @@ IF189: ; preds = %LOOP
%458 = fadd float %457, %456
%459 = fmul float %455, %455
%460 = fadd float %458, %459
- %461 = call float @llvm.AMDGPU.rsq(float %460)
+ %461 = call float @llvm.AMDGPU.rsq.f32(float %460)
%462 = fmul float %451, %461
%463 = fmul float %453, %461
%464 = fmul float %455, %461
@@ -1257,7 +1257,7 @@ ENDIF197: ; preds = %IF189, %IF198
%559 = fadd float %558, %557
%560 = fmul float %556, %556
%561 = fadd float %559, %560
- %562 = call float @llvm.AMDGPU.rsq(float %561)
+ %562 = call float @llvm.AMDGPU.rsq.f32(float %561)
%563 = fmul float %562, %561
%564 = fsub float -0.000000e+00, %561
%565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00)
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll
new file mode 100644
index 0000000000..1db6b0d28b
--- /dev/null
+++ b/test/Transforms/InstCombine/r600-intrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
+declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT: ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+ %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT: ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+ %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+ %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
+ ret double %val
+}
+