diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-05-22 18:00:24 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-05-22 18:00:24 +0000 |
commit | cb0402e9a4361c747aacfc5b9afc5946055bce3a (patch) | |
tree | 6bbf96d61d682f54b9acb1367f7ad5752c149862 | |
parent | 21851f9adb7d538df6d350c7adcd22ff44b31cf7 (diff) | |
download | llvm-cb0402e9a4361c747aacfc5b9afc5946055bce3a.tar.gz llvm-cb0402e9a4361c747aacfc5b9afc5946055bce3a.tar.bz2 llvm-cb0402e9a4361c747aacfc5b9afc5946055bce3a.tar.xz |
R600: Expand mul24 for GPUs without it
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209458 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/R600/AMDGPUInstructions.td | 29 | ||||
-rw-r--r-- | lib/Target/R600/CaymanInstructions.td | 2 | ||||
-rw-r--r-- | lib/Target/R600/EvergreenInstructions.td | 3 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 4 | ||||
-rw-r--r-- | test/CodeGen/R600/llvm.AMDGPU.imul24.ll | 3 | ||||
-rw-r--r-- | test/CodeGen/R600/llvm.AMDGPU.umul24.ll | 10 |
6 files changed, 35 insertions, 16 deletions
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 8a9ab54657..80bdf5b86c 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -433,16 +433,29 @@ class UMad24Pat<Instruction Inst> : Pat < (Inst $src0, $src1, $src2) >; -class IMad24ExpandPat<Instruction MulInst, Instruction AddInst> : Pat < - (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), - (AddInst (MulInst $src0, $src1), $src2) ->; +multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> { + def _expand_imad24 : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; -class UMad24ExpandPat<Instruction MulInst, Instruction AddInst> : Pat < - (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), - (AddInst (MulInst $src0, $src1), $src2) ->; + def _expand_imul24 : Pat < + (AMDGPUmul_i24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} +multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> { + def _expand_umad24 : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_umul24 : Pat < + (AMDGPUmul_u24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} include "R600Instructions.td" include "R700Instructions.td" diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index 58424a6172..26303452c1 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -49,7 +49,7 @@ def COS_cm : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; -def : UMad24ExpandPat<MULLO_UINT_cm, ADD_INT>; +defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 77416671a9..20654419a8 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -75,8 +75,7 @@ def COS_eg : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; -def : IMad24ExpandPat<MULLO_INT_eg, ADD_INT>; -def : UMad24ExpandPat<MULLO_UINT_eg, ADD_INT>; +defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>; //===----------------------------------------------------------------------===// // Memory read/write instructions diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 0c804ffe5d..590fde2029 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1627,8 +1627,8 @@ def : DwordAddrPat <i32, R600_Reg32>; let Predicates = [isR600] in { // Intrinsic patterns -def : IMad24ExpandPat<MULLO_INT_r600, ADD_INT>; -def : UMad24ExpandPat<MULLO_UINT_r600, ADD_INT>; +defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>; +defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>; } // End isR600 def getLDSNoRetOp : InstrMapping { diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll index 33a1b8204e..8ee3520dae 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll @@ -1,14 +1,15 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone ; FUNC-LABEL: @test_imul24 ; SI: V_MUL_I32_I24 ; CM: MUL_INT24 +; R600: MULLO_INT define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone store i32 %mul, i32 addrspace(1)* %out, align 4 ret void } - diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll index 21f824a65f..72a36029fb 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll @@ -1,11 +1,17 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone -; SI-LABEL: @test_umul24 +; FUNC-LABEL: @test_umul24 +; SI: V_MUL_U32_U24 +; R600: MUL_UINT24 +; R600: MULLO_UINT define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind { %mul = call i32 @llvm.AMDGPU.umul24(i32 %src0, i32 %src1) nounwind readnone store i32 %mul, i32 addrspace(1)* %out, align 4 ret void } - |