diff options
-rw-r--r-- | lib/Target/R600/SIInstrInfo.cpp | 76 | ||||
-rw-r--r-- | lib/Target/R600/SIInstrInfo.h | 5 | ||||
-rw-r--r-- | lib/Target/R600/SIInstructions.td | 2 | ||||
-rw-r--r-- | test/CodeGen/R600/or.ll | 33 |
4 files changed, 108 insertions, 8 deletions
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index b0a0e9af4c..6cc4dee827 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -879,6 +879,30 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst->eraseFromParent(); continue; } + case AMDGPU::S_AND_B64: + splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32); + Inst->eraseFromParent(); + continue; + + case AMDGPU::S_OR_B64: + splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32); + Inst->eraseFromParent(); + continue; + + case AMDGPU::S_XOR_B64: + splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32); + Inst->eraseFromParent(); + continue; + + case AMDGPU::S_NOT_B64: + splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32); + Inst->eraseFromParent(); + continue; + + case AMDGPU::S_BFE_U64: + case AMDGPU::S_BFE_I64: + case AMDGPU::S_BFM_B64: + llvm_unreachable("Moving this op to VALU not implemented"); } unsigned NewOpcode = getVALUOp(*Inst); @@ -968,6 +992,58 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { return &AMDGPU::VReg_32RegClass; } +void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst, + unsigned Opcode) const { + MachineBasicBlock &MBB = *Inst->getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + // We shouldn't need to worry about immediate operands here. + MachineOperand &Dest = Inst->getOperand(0); + MachineOperand &Src0 = Inst->getOperand(1); + MachineOperand &Src1 = Inst->getOperand(2); + DebugLoc DL = Inst->getDebugLoc(); + + MachineBasicBlock::iterator MII = Inst; + + const MCInstrDesc &InstDesc = get(Opcode); + const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg()); + const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0); + unsigned SrcReg0Sub0 = buildExtractSubReg(MII, MRI, Src0, RC, + AMDGPU::sub0, SubRC); + unsigned SrcReg1Sub0 = buildExtractSubReg(MII, MRI, Src1, RC, + AMDGPU::sub0, SubRC); + + unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) + .addReg(SrcReg0Sub0) + .addReg(SrcReg1Sub0); + + unsigned SrcReg0Sub1 = buildExtractSubReg(MII, MRI, Src0, RC, + AMDGPU::sub1, SubRC); + unsigned SrcReg1Sub1 = buildExtractSubReg(MII, MRI, Src1, RC, + AMDGPU::sub1, SubRC); + + unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) + .addReg(SrcReg0Sub1) + .addReg(SrcReg1Sub1); + + unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + + MRI.replaceRegWith(Dest.getReg(), FullDestReg); + + // Try to legalize the operands in case we need to swap the order to keep it + // valid. + Worklist.push_back(LoHalf); + Worklist.push_back(HiHalf); +} + MachineInstrBuilder SIInstrInfo::buildIndirectWrite( MachineBasicBlock *MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 8c0fb6fbd5..6eefd3ac98 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -38,6 +38,10 @@ private: const TargetRegisterClass *RC, const MachineOperand &Op) const; + void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst, unsigned Opcode) const; + + public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); @@ -92,6 +96,7 @@ public: bool isSALUInstr(const MachineInstr &MI) const; static unsigned getVALUOp(const MachineInstr &MI); + bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const; /// \brief Return the correct register class for \p OpNo. For target-specific diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 8ec29713e0..8e320929fb 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1222,7 +1222,7 @@ def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", >; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", - [] + [(set i64:$dst, (or i64:$src0, i64:$src1))] >; def : Pat < diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 35fc8b33e0..05d1e0f041 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -56,15 +56,34 @@ define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) ret void } -; EG-CHECK-LABEL: @or_i64 +; EG-CHECK-LABEL: @scalar_or_i64 ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z -; SI-CHECK-LABEL: @or_i64 +; SI-CHECK-LABEL: @scalar_or_i64 +; SI-CHECK: S_OR_B64 +define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %or = or i64 %a, %b + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @vector_or_i64 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} -define void @or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { -entry: - %0 = or i64 %a, %b - store i64 %0, i64 addrspace(1)* %out - ret void +define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64 addrspace(1)* %a, align 8 + %loadb = load i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, %loadb + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @scalar_vector_or_i64 +; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} +; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} +define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) { + %loada = load i64 addrspace(1)* %a + %or = or i64 %loada, %b + store i64 %or, i64 addrspace(1)* %out + ret void } |