diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-04-18 05:19:26 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-04-18 05:19:26 +0000 |
commit | 746734df1accfeb5f336de211655c304d58cb7b3 (patch) | |
tree | a8dd5416388f375301241808d8c2833c4b4d2e82 /test | |
parent | 532a5ffe4c264030d6e350641947fe69fe7babad (diff) | |
download | llvm-746734df1accfeb5f336de211655c304d58cb7b3.tar.gz llvm-746734df1accfeb5f336de211655c304d58cb7b3.tar.bz2 llvm-746734df1accfeb5f336de211655c304d58cb7b3.tar.xz |
R600/SI: Try to use scalar BFE.
Use scalar BFE with constant shift and offset when possible.
This is complicated by the fact that the scalar version packs
the two operands of the vector version into one.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206558 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/R600/sext-in-reg.ll | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll index a47da2b25a..b722959aad 100644 --- a/test/CodeGen/R600/sext-in-reg.ll +++ b/test/CodeGen/R600/sext-in-reg.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc < %s -march=r600 -mcpu=cypress | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone ; FUNC-LABEL: @sext_in_reg_i1_i32 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], -; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], [[ARG]], 0, 1 +; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 +; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] ; SI: BUFFER_STORE_DWORD [[EXTRACT]], ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] @@ -148,8 +149,8 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. ; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64 -; XSI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 8 -; XSI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, +; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 +; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31 ; XSI: BUFFER_STORE_DWORD ; XEG: BFE_INT ; XEG: ASHR @@ -204,8 +205,8 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out ; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 ; SI: BUFFER_STORE_DWORDX2 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] @@ -221,10 +222,10 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 -; SI: V_BFE_I32 {{v[0-9]+}}, {{s[0-9]+}}, 0, 1 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 +; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 ; SI: BUFFER_STORE_DWORDX4 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] @@ -320,6 +321,34 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { ret void } +; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 +define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { + %loada = load <4 x i32> addrspace(1)* %a, align 16 + %loadb = load <4 x i32> addrspace(1)* %b, align 16 + %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload + %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> + %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> + store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 +; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 +define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { + %loada = load <4 x i32> addrspace(1)* %a, align 16 + %loadb = load <4 x i32> addrspace(1)* %b, align 16 + %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload + %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> + %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> + store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 + ret void +} + ; FIXME: The BFE should really be eliminated. I think it should happen ; when computeMaskedBitsForTargetNode is implemented for imax. |