summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-04-29 23:12:48 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-04-29 23:12:48 +0000
commit2a90e446c0e4110ba741166254c88e346799bc81 (patch)
tree6fb2e2896a3605c9a13a9f668f1aa233ad6347e8 /lib
parent19a970b2dafdfaca4263557db69a70e072e0a130 (diff)
downloadllvm-2a90e446c0e4110ba741166254c88e346799bc81.tar.gz
llvm-2a90e446c0e4110ba741166254c88e346799bc81.tar.bz2
llvm-2a90e446c0e4110ba741166254c88e346799bc81.tar.xz
R600/SI: Only select SALU instructions in the entry or exit block
SALU instructions ignore control flow, so it is not always safe to use them within branches. This is a partial solution to this problem until we can come up with something better. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207590 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp15
-rw-r--r--lib/Target/R600/SIInstructions.td129
2 files changed, 117 insertions, 27 deletions
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index ca981c3802..ba705db69d 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -16,9 +16,11 @@
#include "AMDGPURegisterInfo.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
@@ -70,6 +72,11 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
+ /// \returns True if the current basic block being selected is at control
+ /// flow depth 0. Meaning that the current block dominates the
+ // exit block.
+ bool isCFDepth0() const;
+
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
@@ -565,6 +572,14 @@ bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
return false;
}
+bool AMDGPUDAGToDAGISel::isCFDepth0() const {
+ // FIXME: Figure out a way to use DominatorTree analysis here.
+ const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
+ const Function *Fn = FuncInfo->Fn;
+ return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
+}
+
+
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 80897f2f41..a8aefc2287 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -32,14 +32,16 @@ def isSI : Predicate<"Subtarget.getGeneration() "
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
-def WAIT_FLAG : InstFlag<"printWaitFlag">;
+def isCFDepth0 : Predicate<"isCFDepth0()">;
-let Predicates = [isSI] in {
+def WAIT_FLAG : InstFlag<"printWaitFlag">;
//===----------------------------------------------------------------------===//
// SMRD Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [isSI, isCFDepth0] in {
+
let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
@@ -76,10 +78,14 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+} // let Predicates = [isSI, isCFDepth0]
+
//===----------------------------------------------------------------------===//
// SOP1 Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [isSI, isCFDepth0] in {
+
let neverHasSideEffects = 1 in {
let isMoveImm = 1 in {
@@ -152,10 +158,14 @@ def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+} // let Predicates = [isSI, isCFDepth0]
+
//===----------------------------------------------------------------------===//
// SOP2 Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [isSI, isCFDepth0] in {
+
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
@@ -209,11 +219,6 @@ def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
-def : Pat <
- (i1 (and i1:$src0, i1:$src1)),
- (S_AND_B64 $src0, $src1)
->;
-
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
@@ -222,17 +227,12 @@ def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
[(set i64:$dst, (or i64:$src0, i64:$src1))]
>;
-def : Pat <
- (i1 (or i1:$src0, i1:$src1)),
- (S_OR_B64 $src0, $src1)
->;
-
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
- [(set i1:$dst, (xor i1:$src0, i1:$src1))]
+ [(set i64:$dst, (xor i64:$src0, i64:$src1))]
>;
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
@@ -279,10 +279,14 @@ def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+} // let Predicates = [isSI, isCFDepth0]
+
//===----------------------------------------------------------------------===//
// SOPC Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [isSI, isCFDepth0] in {
+
def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
@@ -301,10 +305,14 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+} // let Predicates = [isSI, isCFDepth0]
+
//===----------------------------------------------------------------------===//
// SOPK Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [isSI, isCFDepth0] in {
+
def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
@@ -353,10 +361,14 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
//def EXP : EXP_ <0x00000000, "EXP", []>;
+} // let Predicates = [isSI, isCFDepth0]
+
//===----------------------------------------------------------------------===//
// SOPP Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [isSI] in {
+
//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
let isTerminator = 1 in {
@@ -449,6 +461,10 @@ let Uses = [EXEC] in {
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
} // End hasSideEffects
+} // let Predicates = [isSI, isCFDepth0]
+
+let Predicates = [isSI] in {
+
//===----------------------------------------------------------------------===//
// VOPC Instructions
//===----------------------------------------------------------------------===//
@@ -1173,27 +1189,43 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
+ [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
+ [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
+ [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
+ [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
+
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
-defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
-defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
-defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
+ [(set i32:$dst, (and i32:$src0, i32:$src1))]>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
+ [(set i32:$dst, (or i32:$src0, i32:$src1))]
+>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
+ [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+>;
} // End isCommutable = 1
@@ -1209,14 +1241,18 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
+ [(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
+ [(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
"V_SUB_I32">;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32",
+ [(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32",
+ [(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>;
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
"V_SUBB_U32">;
} // End Uses = [VCC]
@@ -1592,6 +1628,45 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
+//===----------------------------------------------------------------------===//
+// SOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+ (i1 (and i1:$src0, i1:$src1)),
+ (S_AND_B64 $src0, $src1)
+>;
+
+def : Pat <
+ (i1 (or i1:$src0, i1:$src1)),
+ (S_OR_B64 $src0, $src1)
+>;
+
+def : Pat <
+ (i1 (xor i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+ (or i64:$src0, i64:$src1),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0),
+ (EXTRACT_SUBREG i64:$src1, sub0)), sub0),
+ (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1),
+ (EXTRACT_SUBREG i64:$src1, sub1)), sub1)
+>;
+
+class SextInReg <ValueType vt, int ShiftAmt> : Pat <
+ (sext_inreg i32:$src0, vt),
+ (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
+>;
+
+def : SextInReg <i8, 24>;
+def : SextInReg <i16, 16>;
/********** ======================= **********/
/********** Image sampling patterns **********/