summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Lejeune <vljn@ovi.com>2013-03-05 15:04:49 +0000
committerVincent Lejeune <vljn@ovi.com>2013-03-05 15:04:49 +0000
commitcae6801b7df7c006d2956d5b6012b902cf6eb14d (patch)
treeb4625bc519ac99170ec027260e689973605b1229
parent64ca84d9b2eac2b0520b0f0fedaac5ef3a41daf6 (diff)
downloadllvm-cae6801b7df7c006d2956d5b6012b902cf6eb14d.tar.gz
llvm-cae6801b7df7c006d2956d5b6012b902cf6eb14d.tar.bz2
llvm-cae6801b7df7c006d2956d5b6012b902cf6eb14d.tar.xz
R600: Turn BUILD_VECTOR into Reg_Sequence
Reviewed-by: Tom Stellard <thomas.stellard at amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176487 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp29
-rw-r--r--lib/Target/R600/CMakeLists.txt1
-rw-r--r--test/CodeGen/R600/fdiv.v4f32.ll6
3 files changed, 32 insertions, 4 deletions
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index e77b9dca49..0c7880d232 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -162,6 +162,35 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
switch (Opc) {
default: break;
+ case ISD::BUILD_VECTOR: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ SDValue RegSeqArgs[9] = {
+ CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+ bool IsRegSeq = true;
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ IsRegSeq = false;
+ break;
+ }
+ RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ }
+ if (!IsRegSeq)
+ break;
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+ RegSeqArgs, 2 * N->getNumOperands() + 1);
+ }
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 00f8b106c7..6c695a08d9 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -37,7 +37,6 @@ add_llvm_target(R600CodeGen
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
- R600LowerConstCopy.cpp
R600MachineFunctionInfo.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll
index 459fd119d2..79e677f541 100644
--- a/test/CodeGen/R600/fdiv.v4f32.ll
+++ b/test/CodeGen/R600/fdiv.v4f32.ll
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1