summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-04-30 15:31:33 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-04-30 15:31:33 +0000
commitbd24b33e5782997dfa26b0debf934dd364756982 (patch)
tree6506851df5b86a758703c68df4ee1ba7e36b0319 /lib
parent1d8e31fc7a2139df95ef9115a9d51c51fde2ae86 (diff)
downloadllvm-bd24b33e5782997dfa26b0debf934dd364756982.tar.gz
llvm-bd24b33e5782997dfa26b0debf934dd364756982.tar.bz2
llvm-bd24b33e5782997dfa26b0debf934dd364756982.tar.xz
R600/SI: Use VALU instructions for copying i1 values
We can't use SALU instructions for this since they ignore the EXEC mask and are always executed. This fixes several OpenCV tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207661 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/R600/AMDGPU.h4
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp1
-rw-r--r--lib/Target/R600/CMakeLists.txt1
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp3
-rw-r--r--lib/Target/R600/SIISelLowering.cpp2
-rw-r--r--lib/Target/R600/SIInstructions.td11
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp4
-rw-r--r--lib/Target/R600/SILowerI1Copies.cpp130
-rw-r--r--lib/Target/R600/SIRegisterInfo.td2
9 files changed, 149 insertions, 9 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3e1848b5f8..5d0cf81c40 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+void initializeSILowerI1CopiesPass(PassRegistry &);
+extern char &SILowerI1CopiesID;
+
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index f0935401ec..6b68c2abe3 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+ addPass(createSILowerI1CopiesPass());
return false;
}
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 93a5117975..3c6fa5a7ae 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen
SIInstrInfo.cpp
SIISelLowering.cpp
SILowerControlFlow.cpp
+ SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
SITypeRewriter.cpp
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index a9a7c5ce0f..f6b8b783d0 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -185,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- DstRC == &AMDGPU::M0RegRegClass)
+ DstRC == &AMDGPU::M0RegRegClass ||
+ MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
return false;
SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 8c686c9150..e688048507 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
- addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 00f9be61e2..27e7abe1a3 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1398,6 +1398,12 @@ def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
let isCodeGenOnly = 1, isPseudo = 1 in {
+def V_MOV_I1 : InstSI <
+ (outs VReg_1:$dst),
+ (ins i1imm:$src),
+ "", [(set i1:$dst, (imm:$src))]
+>;
+
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
(ins i32imm:$src),
@@ -1981,11 +1987,6 @@ def : Pat <
>;
def : Pat <
- (i1 imm:$imm),
- (S_MOV_B64 imm:$imm)
->;
-
-def : Pat <
(i64 InlineImm<i64>:$imm),
(S_MOV_B64 InlineImm<i64>:$imm)
>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index d1d925dd23..6601f2a980 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -67,7 +67,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
- const TargetRegisterInfo *TRI;
+ const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp
new file mode 100644
index 0000000000..766380ead5
--- /dev/null
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -0,0 +1,130 @@
+//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// i1 values are usually inserted by the CFG Structurize pass and they are
+/// unique in that they can be copied from VALU to SALU registers.
+/// This is not possible for any other value type. Since there are no
+/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#define DEBUG_TYPE "si-i1-copies"
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerI1Copies : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SILowerI1Copies() : MachineFunctionPass(ID) {
+ initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual const char *getPassName() const override {
+ return "SI Lower il Copies";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+
+char SILowerI1Copies::ID = 0;
+
+char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
+
+FunctionPass *llvm::createSILowerI1CopiesPass() {
+ return new SILowerI1Copies();
+}
+
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
+ MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
+ continue;
+ }
+
+ if (MI.getOpcode() != AMDGPU::COPY ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
+ continue;
+
+
+ const TargetRegisterClass *DstRC =
+ MRI.getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterClass *SrcRC =
+ MRI.getRegClass(MI.getOperand(1).getReg());
+
+ if (DstRC == &AMDGPU::VReg_1RegClass &&
+ TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
+ .addOperand(MI.getOperand(0))
+ .addImm(0)
+ .addImm(-1)
+ .addOperand(MI.getOperand(1))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MI.eraseFromParent();
+ } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
+ SrcRC == &AMDGPU::VReg_1RegClass) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
+ .addOperand(MI.getOperand(0))
+ .addImm(0)
+ .addOperand(MI.getOperand(1))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MI.eraseFromParent();
+ }
+
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 6d6d8b9bd8..f1f01deaf3 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -189,6 +189,8 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
+
//===----------------------------------------------------------------------===//
// [SV]Src_(32|64) register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//