From 06b423452c85f5a78a1b0555b767cf27b36c0752 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 14 Nov 2012 22:09:20 +0000 Subject: Remove the CellSPU port. Approved by Chris Lattner. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167984 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/CMakeLists.txt | 30 - lib/Target/CellSPU/CellSDKIntrinsics.td | 449 -- lib/Target/CellSPU/LLVMBuild.txt | 32 - lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt | 6 - lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt | 23 - lib/Target/CellSPU/MCTargetDesc/Makefile | 16 - lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp | 43 - lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h | 30 - .../CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp | 94 - lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h | 38 - lib/Target/CellSPU/Makefile | 20 - lib/Target/CellSPU/README.txt | 106 - lib/Target/CellSPU/SPU.h | 31 - lib/Target/CellSPU/SPU.td | 66 - lib/Target/CellSPU/SPU128InstrInfo.td | 41 - lib/Target/CellSPU/SPU64InstrInfo.td | 408 -- lib/Target/CellSPU/SPUAsmPrinter.cpp | 333 -- lib/Target/CellSPU/SPUCallingConv.td | 53 - lib/Target/CellSPU/SPUFrameLowering.cpp | 256 -- lib/Target/CellSPU/SPUFrameLowering.h | 80 - lib/Target/CellSPU/SPUHazardRecognizers.cpp | 135 - lib/Target/CellSPU/SPUHazardRecognizers.h | 37 - lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 1192 ------ lib/Target/CellSPU/SPUISelLowering.cpp | 3267 -------------- lib/Target/CellSPU/SPUISelLowering.h | 178 - lib/Target/CellSPU/SPUInstrBuilder.h | 43 - lib/Target/CellSPU/SPUInstrFormats.td | 320 -- lib/Target/CellSPU/SPUInstrInfo.cpp | 449 -- lib/Target/CellSPU/SPUInstrInfo.h | 84 - lib/Target/CellSPU/SPUInstrInfo.td | 4484 -------------------- lib/Target/CellSPU/SPUMachineFunction.cpp | 14 - lib/Target/CellSPU/SPUMachineFunction.h | 50 - lib/Target/CellSPU/SPUMathInstr.td | 97 - lib/Target/CellSPU/SPUNodes.td | 159 - lib/Target/CellSPU/SPUNopFiller.cpp | 153 - lib/Target/CellSPU/SPUOperands.td | 664 --- lib/Target/CellSPU/SPURegisterInfo.cpp | 357 -- lib/Target/CellSPU/SPURegisterInfo.h | 106 - lib/Target/CellSPU/SPURegisterInfo.td | 183 - lib/Target/CellSPU/SPURegisterNames.h | 19 - lib/Target/CellSPU/SPUSchedule.td | 59 - lib/Target/CellSPU/SPUSelectionDAGInfo.cpp | 23 - lib/Target/CellSPU/SPUSelectionDAGInfo.h | 31 - lib/Target/CellSPU/SPUSubtarget.cpp | 65 - lib/Target/CellSPU/SPUSubtarget.h | 97 - lib/Target/CellSPU/SPUTargetMachine.cpp | 94 - lib/Target/CellSPU/SPUTargetMachine.h | 96 - lib/Target/CellSPU/TargetInfo/CMakeLists.txt | 7 - .../CellSPU/TargetInfo/CellSPUTargetInfo.cpp | 20 - lib/Target/CellSPU/TargetInfo/LLVMBuild.txt | 23 - lib/Target/CellSPU/TargetInfo/Makefile | 15 - lib/Target/LLVMBuild.txt | 2 +- 52 files changed, 1 insertion(+), 14677 deletions(-) delete mode 100644 lib/Target/CellSPU/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/CellSDKIntrinsics.td delete mode 100644 lib/Target/CellSPU/LLVMBuild.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/Makefile delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h delete mode 100644 lib/Target/CellSPU/Makefile delete mode 100644 lib/Target/CellSPU/README.txt delete mode 100644 lib/Target/CellSPU/SPU.h delete mode 100644 lib/Target/CellSPU/SPU.td delete mode 100644 lib/Target/CellSPU/SPU128InstrInfo.td delete mode 100644 lib/Target/CellSPU/SPU64InstrInfo.td delete mode 100644 lib/Target/CellSPU/SPUAsmPrinter.cpp delete mode 100644 lib/Target/CellSPU/SPUCallingConv.td delete mode 100644 lib/Target/CellSPU/SPUFrameLowering.cpp delete mode 100644 lib/Target/CellSPU/SPUFrameLowering.h delete mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.cpp delete mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.h delete mode 100644 lib/Target/CellSPU/SPUISelDAGToDAG.cpp delete mode 100644 lib/Target/CellSPU/SPUISelLowering.cpp delete mode 100644 lib/Target/CellSPU/SPUISelLowering.h delete mode 100644 lib/Target/CellSPU/SPUInstrBuilder.h delete mode 100644 lib/Target/CellSPU/SPUInstrFormats.td delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.cpp delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.h delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.td delete mode 100644 lib/Target/CellSPU/SPUMachineFunction.cpp delete mode 100644 lib/Target/CellSPU/SPUMachineFunction.h delete mode 100644 lib/Target/CellSPU/SPUMathInstr.td delete mode 100644 lib/Target/CellSPU/SPUNodes.td delete mode 100644 lib/Target/CellSPU/SPUNopFiller.cpp delete mode 100644 lib/Target/CellSPU/SPUOperands.td delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.cpp delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.h delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.td delete mode 100644 lib/Target/CellSPU/SPURegisterNames.h delete mode 100644 lib/Target/CellSPU/SPUSchedule.td delete mode 100644 lib/Target/CellSPU/SPUSelectionDAGInfo.cpp delete mode 100644 lib/Target/CellSPU/SPUSelectionDAGInfo.h delete mode 100644 lib/Target/CellSPU/SPUSubtarget.cpp delete mode 100644 lib/Target/CellSPU/SPUSubtarget.h delete mode 100644 lib/Target/CellSPU/SPUTargetMachine.cpp delete mode 100644 lib/Target/CellSPU/SPUTargetMachine.h delete mode 100644 lib/Target/CellSPU/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp delete mode 100644 lib/Target/CellSPU/TargetInfo/LLVMBuild.txt delete mode 100644 lib/Target/CellSPU/TargetInfo/Makefile (limited to 'lib/Target') diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt deleted file mode 100644 index 1f8ca8681c..0000000000 --- a/lib/Target/CellSPU/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS SPU.td) - -tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter) -tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info) -tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv) -add_public_tablegen_target(CellSPUCommonTableGen) - -add_llvm_target(CellSPUCodeGen - SPUAsmPrinter.cpp - SPUHazardRecognizers.cpp - SPUInstrInfo.cpp - SPUISelDAGToDAG.cpp - SPUISelLowering.cpp - SPUFrameLowering.cpp - SPUMachineFunction.cpp - SPURegisterInfo.cpp - SPUSubtarget.cpp - SPUTargetMachine.cpp - SPUSelectionDAGInfo.cpp - SPUNopFiller.cpp - ) - -add_dependencies(LLVMCellSPUCodeGen intrinsics_gen) - -add_subdirectory(TargetInfo) -add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td deleted file mode 100644 index cdb4099ffb..0000000000 --- a/lib/Target/CellSPU/CellSDKIntrinsics.td +++ /dev/null @@ -1,449 +0,0 @@ -//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -///--==-- Arithmetic ops intrinsics --==-- -def CellSDKah: - RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>; -def CellSDKahi: - RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>; -def CellSDKa: - RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>; -def CellSDKai: - RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>; -def CellSDKsfh: - RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>; -def CellSDKsfhi: - RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>; -def CellSDKsf: - RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>; -def CellSDKsfi: - RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>; -def CellSDKaddx: - RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>; -def CellSDKcg: - RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>; -def CellSDKcgx: - RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>; -def CellSDKsfx: - RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>; -def CellSDKbg: - RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>; -def CellSDKbgx: - RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>; - -def CellSDKmpy: - RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpy $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyu: - RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyu $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))] >; - -def CellSDKmpyi: - RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "mpyi $rT, $rA, $val", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA), - i16ImmSExt10:$val))]>; - -def CellSDKmpyui: - RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "mpyui $rT, $rA, $val", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA), - i16ImmSExt10:$val))]>; - -def CellSDKmpya: - RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "mpya $rT, $rA, $rB, $rC", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB), - (v8i16 VECREG:$rC)))]>; - -def CellSDKmpyh: - RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyh $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpys: - RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpys $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyhh: - RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhh $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyhha: - RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhha $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave -// as an intrinsic for the time being -def CellSDKmpyhhu: - RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhhu $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKmpyhhau: - RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpyhhau $rT, $rA, $rB", IntegerMulDiv, - [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def CellSDKand: - RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "and\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKandc: - RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "andc\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKandbi: - RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "andbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKandhi: - RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "andhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKandi: - RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "andi\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKor: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "or\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKorc: - RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "addc\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "orbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKorhi: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "orhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKori: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ori\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKxor: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "xor\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKxorbi: - RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "xorbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKxorhi: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "xorhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKxori: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "xori\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKnor: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "nor\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKnand: - RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "nand\t $rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), - (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -//===----------------------------------------------------------------------===// -// Shift/rotate intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKshli: - Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val), - (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def CellSDKshlqbi: - Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB), - (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>; - -def CellSDKshlqii: - Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val), - (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def CellSDKshlqby: - Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB), - (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>; - -def CellSDKshlqbyi: - Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val), - (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>; - - -//===----------------------------------------------------------------------===// -// Branch/compare intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKceq: - RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ceq\t $rT, $rA, $rB", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKceqi: - RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ceqi\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKceqb: - RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ceqb\t $rT, $rA, $rB", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; - -def CellSDKceqbi: - RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "ceqbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKceqh: - RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ceqh\t $rT, $rA, $rB", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; - -def CellSDKceqhi: - RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ceqhi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; -def CellSDKcgth: - RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "cgth\t $rT, $rA, $rB", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; - -def CellSDKcgthi: - RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "cgthi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKcgt: - RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "cgt\t $rT, $rA, $rB", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKcgti: - RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "cgti\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKcgtb: - RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "cgtb\t $rT, $rA, $rB", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; - -def CellSDKcgtbi: - RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "cgtbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>; - -def CellSDKclgth: - RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "clgth\t $rT, $rA, $rB", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; - -def CellSDKclgthi: - RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "clgthi\t $rT, $rA, $val", BranchResolv, - [(set (v8i16 VECREG:$rT), - (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; - -def CellSDKclgt: - RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "clgt\t $rT, $rA, $rB", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - -def CellSDKclgti: - RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "clgti\t $rT, $rA, $val", BranchResolv, - [(set (v4i32 VECREG:$rT), - (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; - -def CellSDKclgtb: - RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "clgtb\t $rT, $rA, $rB", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; - -def CellSDKclgtbi: - RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), - "clgtbi\t $rT, $rA, $val", BranchResolv, - [(set (v16i8 VECREG:$rT), - (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>; - -//===----------------------------------------------------------------------===// -// Floating-point intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKfa: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fa\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfs: - RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fs\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfm: - RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fm\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfceq: - RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fceq\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfcgt: - RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fcgt\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfcmeq: - RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fcmeq\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfcmgt: - RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fcmgt\t $rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def CellSDKfma: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t $rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rC)))]>; - -def CellSDKfnms: - RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t $rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rC)))]>; - -def CellSDKfms: - RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t $rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rC)))]>; - -//===----------------------------------------------------------------------===// -// Double precision floating-point intrinsics: -//===----------------------------------------------------------------------===// - -def CellSDKdfa: - RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfa\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfs: - RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfs\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfm: - RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfm\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfma: - RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfma\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfnma: - RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfnma\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfnms: - RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfnms\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; - -def CellSDKdfms: - RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfms\t $rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))]>; diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt deleted file mode 100644 index 277620bf4e..0000000000 --- a/lib/Target/CellSPU/LLVMBuild.txt +++ /dev/null @@ -1,32 +0,0 @@ -;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = CellSPU -parent = Target -has_asmprinter = 1 - -[component_1] -type = Library -name = CellSPUCodeGen -parent = CellSPU -required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target -add_to_library_groups = CellSPU diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 0027bdbf6c..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMCellSPUDesc - SPUMCTargetDesc.cpp - SPUMCAsmInfo.cpp - ) - -add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen) diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index 71e5bbc629..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = CellSPUDesc -parent = CellSPU -required_libraries = CellSPUInfo MC -add_to_library_groups = CellSPU diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/CellSPU/MCTargetDesc/Makefile deleted file mode 100644 index 10d9a42239..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp deleted file mode 100644 index 4bad37eaca..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the SPUMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "SPUMCAsmInfo.h" -using namespace llvm; - -void SPULinuxMCAsmInfo::anchor() { } - -SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { - IsLittleEndian = false; - - ZeroDirective = "\t.space\t"; - Data64bitsDirective = "\t.quad\t"; - AlignmentIsInBytes = false; - - PCSymbol = "."; - CommentString = "#"; - GlobalPrefix = ""; - PrivateGlobalPrefix = ".L"; - - // Has leb128 - HasLEB128 = true; - - SupportsDebugInformation = true; - - // Exception handling is not supported on CellSPU (think about it: you only - // have 256K for code+data. Would you support exception handling?) - ExceptionsType = ExceptionHandling::None; - - // SPU assembly requires ".section" before ".bss" - UsesELFSectionDirectiveForBSS = true; -} - diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h deleted file mode 100644 index f786147b92..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the SPUMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef SPUTARGETASMINFO_H -#define SPUTARGETASMINFO_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - - class SPULinuxMCAsmInfo : public MCAsmInfo { - virtual void anchor(); - public: - explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT); - }; -} // namespace llvm - -#endif /* SPUTARGETASMINFO_H */ diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp deleted file mode 100644 index 8450e2c663..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides Cell SPU specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "SPUMCTargetDesc.h" -#include "SPUMCAsmInfo.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "SPUGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "SPUGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "SPUGenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createSPUMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitSPUMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitSPUMCRegisterInfo(X, SPU::R0); - return X; -} - -static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitSPUMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { - MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT); - - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(SPU::R1, 0); - MAI->addInitialFrameState(0, Dst, Src); - - return MAI; -} - -static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - // For the time being, use static relocations, since there's really no - // support for PIC yet. - X->InitMCCodeGenInfo(Reloc::Static, CM, OL); - return X; -} - -// Force static initialization. -extern "C" void LLVMInitializeCellSPUTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget, - createSPUMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget, - createCellSPUMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, - createSPUMCSubtargetInfo); -} diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h deleted file mode 100644 index d26449e890..0000000000 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h +++ /dev/null @@ -1,38 +0,0 @@ -//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides CellSPU specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef SPUMCTARGETDESC_H -#define SPUMCTARGETDESC_H - -namespace llvm { -class Target; - -extern Target TheCellSPUTarget; - -} // End llvm namespace - -// Define symbolic names for Cell registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "SPUGenRegisterInfo.inc" - -// Defines symbolic names for the SPU instructions. -// -#define GET_INSTRINFO_ENUM -#include "SPUGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "SPUGenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile deleted file mode 100644 index d7a8247f57..0000000000 --- a/lib/Target/CellSPU/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMCellSPUCodeGen -TARGET = SPU -BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \ - SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ - SPUGenDAGISel.inc \ - SPUGenSubtargetInfo.inc SPUGenCallingConv.inc - -DIRS = TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt deleted file mode 100644 index 3bce9609bf..0000000000 --- a/lib/Target/CellSPU/README.txt +++ /dev/null @@ -1,106 +0,0 @@ -//===- README.txt - Notes for improving CellSPU-specific code gen ---------===// - -This code was contributed by a team from the Computer Systems Research -Department in The Aerospace Corporation: - -- Scott Michel (head bottle washer and much of the non-floating point - instructions) -- Mark Thomas (floating point instructions) -- Michael AuYeung (intrinsics) -- Chandler Carruth (LLVM expertise) -- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) - -Some minor fixes added by Kalle Raiskila. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR -OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES -OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING -OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT -LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR -REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, -OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR -SUCH DAMAGES ARE FORESEEABLE. - ---------------------------------------------------------------------------- ---WARNING--: ---WARNING--: The CellSPU work is work-in-progress and "alpha" quality code. ---WARNING--: - -If you are brave enough to try this code or help to hack on it, be sure -to add 'spu' to configure's --enable-targets option, e.g.: - - ./configure \ - --enable-targets=x86,x86_64,powerpc,spu - ---------------------------------------------------------------------------- - -TODO: -* In commit r142152 vector legalization was set to element promotion per - default. This breaks half vectors (e.g. v2i32) badly as they get element - promoted to much slower types (v2i64). - -* Many CellSPU specific codegen tests only grep & count the number of - instructions, not checking their place with FileCheck. There have also - been some commits that change the CellSPU checks, some of which might - have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU - assembly. (especially since about the time of r142152) - -* Some of the i64 math have huge tablegen rules, which sometime cause - tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics - should probably be done with libraries. - -* Create a machine pass for performing dual-pipeline scheduling specifically - for CellSPU, and insert branch prediction instructions as needed. - -* i32 instructions: - - * i32 division (work-in-progress) - -* i64 support (see i64operations.c test harness): - - * shifts and comparison operators: done - * sign and zero extension: done - * addition: done - * subtraction: needed - * multiplication: done - -* i128 support: - - * zero extension, any extension: done - * sign extension: done - * arithmetic operators (add, sub, mul, div): needed - * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed - - * or: done - -* f64 support - - * Comparison operators: - SETOEQ unimplemented - SETOGT unimplemented - SETOGE unimplemented - SETOLT unimplemented - SETOLE unimplemented - SETONE unimplemented - SETO done (lowered) - SETUO done (lowered) - SETUEQ unimplemented - SETUGT unimplemented - SETUGE unimplemented - SETULT unimplemented - SETULE unimplemented - SETUNE unimplemented - -* LLVM vector suport - - * VSETCC needs to be implemented. It's pretty straightforward to code, but - needs implementation. - -* Intrinsics - - * spu.h instrinsics added but not tested. Need to have an operational - llvm-spu-gcc in order to write a unit test harness. - -===-------------------------------------------------------------------------=== diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h deleted file mode 100644 index c660131706..0000000000 --- a/lib/Target/CellSPU/SPU.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// Cell SPU back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_IBMCELLSPU_H -#define LLVM_TARGET_IBMCELLSPU_H - -#include "MCTargetDesc/SPUMCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class SPUTargetMachine; - class FunctionPass; - class formatted_raw_ostream; - - FunctionPass *createSPUISelDag(SPUTargetMachine &TM); - FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm); - -} - -#endif /* LLVM_TARGET_IBMCELLSPU_H */ diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td deleted file mode 100644 index e835b9cac8..0000000000 --- a/lib/Target/CellSPU/SPU.td +++ /dev/null @@ -1,66 +0,0 @@ -//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the top level entry point for the STI Cell SPU target machine. -// -//===----------------------------------------------------------------------===// - -// Get the target-independent interfaces which we are implementing. -// -include "llvm/Target/Target.td" - -// Holder of code fragments (you'd think this'd already be in -// a td file somewhere... :-) - -class CodeFrag { - dag Fragment = frag; -} - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "SPURegisterInfo.td" - -//===----------------------------------------------------------------------===// -// Instruction formats, instructions -//===----------------------------------------------------------------------===// - -include "SPUNodes.td" -include "SPUOperands.td" -include "SPUSchedule.td" -include "SPUInstrFormats.td" -include "SPUInstrInfo.td" - -//===----------------------------------------------------------------------===// -// Subtarget features: -//===----------------------------------------------------------------------===// - -def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">; -def LargeMemFeature: - SubtargetFeature<"large_mem","UseLargeMem", "true", - "Use large (>256) LSA memory addressing [default = false]">; - -def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>; - -//===----------------------------------------------------------------------===// -// Calling convention: -//===----------------------------------------------------------------------===// - -include "SPUCallingConv.td" - -// Target: - -def SPUInstrInfo : InstrInfo { - let isLittleEndianEncoding = 1; -} - -def SPU : Target { - let InstructionSet = SPUInstrInfo; -} diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td deleted file mode 100644 index e051e04733..0000000000 --- a/lib/Target/CellSPU/SPU128InstrInfo.td +++ /dev/null @@ -1,41 +0,0 @@ -//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===// -// -// Cell SPU 128-bit operations -// -//===----------------------------------------------------------------------===// - -// zext 32->128: Zero extend 32-bit to 128-bit -def : Pat<(i128 (zext R32C:$rSrc)), - (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; - -// zext 64->128: Zero extend 64-bit to 128-bit -def : Pat<(i128 (zext R64C:$rSrc)), - (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; - -// zext 16->128: Zero extend 16-bit to 128-bit -def : Pat<(i128 (zext R16C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; - -// zext 8->128: Zero extend 8-bit to 128-bit -def : Pat<(i128 (zext R8C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; - -// anyext 32->128: Zero extend 32-bit to 128-bit -def : Pat<(i128 (anyext R32C:$rSrc)), - (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; - -// anyext 64->128: Zero extend 64-bit to 128-bit -def : Pat<(i128 (anyext R64C:$rSrc)), - (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; - -// anyext 16->128: Zero extend 16-bit to 128-bit -def : Pat<(i128 (anyext R16C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; - -// anyext 8->128: Zero extend 8-bit to 128-bit -def : Pat<(i128 (anyext R8C:$rSrc)), - (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; - -// Shift left -def : Pat<(shl GPRC:$rA, R32C:$rB), - (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td deleted file mode 100644 index bea33b5362..0000000000 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ /dev/null @@ -1,408 +0,0 @@ -//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===// -// -// Cell SPU 64-bit operations -// -//===----------------------------------------------------------------------===// - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// 64-bit comparisons: -// -// 1. The instruction sequences for vector vice scalar differ by a -// constant. In the scalar case, we're only interested in the -// top two 32-bit slots, whereas we're interested in an exact -// all-four-slot match in the vector case. -// -// 2. There are no "immediate" forms, since loading 64-bit constants -// could be a constant pool load. -// -// 3. i64 setcc results are i32, which are subsequently converted to a FSM -// mask when used in a select pattern. -// -// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) -// [Note: this may be moot, since gb produces v4i32 or r32.] -// -// 5. The code sequences for r64 and v2i64 are probably overly conservative, -// compared to the code that gcc produces. -// -// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// selb instruction definition for i64. Note that the selection mask is -// a vector, produced by various forms of FSM: -def SELBr64_cond: - SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), - [/* no pattern */]>; - -// The generic i64 select pattern, which assumes that the comparison result -// is in a 32-bit register that contains a select mask pattern (i.e., gather -// bits result): - -def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), - (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; - -// select the negative condition: -class I64SELECTNegCond: - Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), - (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; - -// setcc the negative condition: -class I64SETCCNegCond: - Pat<(cond R64C:$rA, R64C:$rB), - (XORIr32 compare.Fragment, -1)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// The i64 seteq fragment that does the scalar->vector conversion and -// comparison: -def CEQr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; - -// The i64 seteq fragment that does the vector comparison -def CEQv2i64compare: - CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; - -// i64 seteq (equality): the setcc result is i32, which is converted to a -// vector FSM mask when used in a select pattern. -// -// v2i64 seteq (equality): the setcc result is v4i32 -multiclass CompareEqual64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; - def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CEQr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; -} - -defm I64EQ: CompareEqual64; - -def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; -def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; - -// i64 setne: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setugt/setule: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CLGTr64ugt: - CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CLGTr64eq: - CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CLGTr64compare: - CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, - (XSWDv2i64 CLGTr64ugt.Fragment), - CLGTr64eq.Fragment)>; - -def CLGTv2i64ugt: - CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; - -def CLGTv2i64eq: - CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; - -def CLGTv2i64compare: - CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, - (XSWDv2i64 CLGTr64ugt.Fragment), - CLGTv2i64eq.Fragment)>; - -multiclass CompareLogicalGreaterThan64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGTr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; -} - -defm I64LGT: CompareLogicalGreaterThan64; - -def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; -//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), -// I64LGTv2i64.Fragment>; - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setuge/setult: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CLGEr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, - CLGTr64eq.Fragment)), 0xb)>; - -def CLGEv2i64compare: - CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, - CLGTv2i64eq.Fragment)), 0xf)>; - -multiclass CompareLogicalGreaterEqual64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGEr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; -} - -defm I64LGE: CompareLogicalGreaterEqual64; - -def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; -def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), - I64LGEv2i64.Fragment>; - - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setgt/setle: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CGTr64sgt: - CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CGTr64eq: - CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG))>; - -def CGTr64compare: - CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, - (XSWDv2i64 CGTr64sgt.Fragment), - CGTr64eq.Fragment)>; - -def CGTv2i64sgt: - CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; - -def CGTv2i64eq: - CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; - -def CGTv2i64compare: - CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, - (XSWDv2i64 CGTr64sgt.Fragment), - CGTv2i64eq.Fragment)>; - -multiclass CompareGreaterThan64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CGTr64compare.Fragment), R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS - (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; -} - -defm I64GT: CompareLogicalGreaterThan64; - -def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; -//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), -// I64GTv2i64.Fragment>; - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// i64 setge/setlt: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def CGEr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, - CGTr64eq.Fragment)), 0xb)>; - -def CGEv2i64compare: - CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, - CGTv2i64eq.Fragment)), 0xf)>; - -multiclass CompareGreaterEqual64 { - // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; - def v2i64: CodeFrag; - - // SELB mask from FSM: - def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; - def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; -} - -defm I64GE: CompareGreaterEqual64; - -def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; -def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), - I64GEv2i64.Fragment>; - -// i64 setult: -def : I64SETCCNegCond; -def : I64SELECTNegCond; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v2i64, i64 add -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class v2i64_add_cg: - CodeFrag<(CGv4i32 lhs, rhs)>; - -class v2i64_add_1: - CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; - -class v2i64_add: - v2i64_add_1.Fragment, cg_mask>; - -def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG), - (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; - -def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)), - v2i64_add<(v2i64 VECREG:$rA), - (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v2i64, i64 subtraction -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class v2i64_sub_bg: CodeFrag<(BGv4i32 lhs, rhs)>; - -class v2i64_sub: - CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; - -def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (COPY_TO_REGCLASS - v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG), - v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, - (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; - -def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)), - v2i64_sub<(v2i64 VECREG:$rA), - (v2i64 VECREG:$rB), - v2i64_sub_bg<(v2i64 VECREG:$rA), - (v2i64 VECREG:$rB)>.Fragment, - (v4i32 VECREG:$rCGmask)>.Fragment>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v2i64, i64 multiply -// -// Note: i64 multiply is simply the vector->scalar conversion of the -// full-on v2i64 multiply, since the entire vector has to be manipulated -// anyway. -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class v2i64_mul_ahi64 : - CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; - -class v2i64_mul_bhi64 : - CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; - -class v2i64_mul_alo64 : - CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; - -class v2i64_mul_blo64 : - CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; - -class v2i64_mul_ashlq2: - CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; - -class v2i64_mul_ashlq4: - CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; - -class v2i64_mul_bshlq2 : - CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; - -class v2i64_mul_bshlq4 : - CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; - -class v2i64_highprod: - CodeFrag<(Av4i32 - (Av4i32 - (MPYUv4i32 v2i64_mul_bshlq4.Fragment, // a1 x b3 - v2i64_mul_ahi64.Fragment), - (MPYHv4i32 v2i64_mul_ahi64.Fragment, // a0 x b3 - v2i64_mul_bshlq4.Fragment)), - (Av4i32 - (MPYHv4i32 v2i64_mul_bhi64.Fragment, - v2i64_mul_ashlq4.Fragment), - (Av4i32 - (MPYHv4i32 v2i64_mul_ashlq4.Fragment, - v2i64_mul_bhi64.Fragment), - (Av4i32 - (MPYUv4i32 v2i64_mul_ashlq4.Fragment, - v2i64_mul_bhi64.Fragment), - (Av4i32 - (MPYHv4i32 v2i64_mul_ashlq2.Fragment, - v2i64_mul_bshlq2.Fragment), - (MPYUv4i32 v2i64_mul_ashlq2.Fragment, - v2i64_mul_bshlq2.Fragment))))))>; - -class v2i64_mul_a3_b3: - CodeFrag<(MPYUv4i32 v2i64_mul_alo64.Fragment, - v2i64_mul_blo64.Fragment)>; - -class v2i64_mul_a2_b3: - CodeFrag<(SELBv4i32 (SHLQBYIv4i32 - (MPYHHUv4i32 v2i64_mul_alo64.Fragment, - v2i64_mul_bshlq2.Fragment), 0x2), - (ILv4i32 0), - (FSMBIv4i32 0xc3c3))>; - -class v2i64_mul_a3_b2: - CodeFrag<(SELBv4i32 (SHLQBYIv4i32 - (MPYHHUv4i32 v2i64_mul_blo64.Fragment, - v2i64_mul_ashlq2.Fragment), 0x2), - (ILv4i32 0), - (FSMBIv4i32 0xc3c3))>; - -class v2i64_lowsum: - v2i64_add.Fragment, - v2i64_mul_a2_b3.Fragment, rCGmask>.Fragment, - v2i64_mul_a3_b2.Fragment, rCGmask>; - -class v2i64_mul: - v2i64_add.Fragment, - (SELBv4i32 v2i64_highprod.Fragment, - (ILv4i32 0), - (FSMBIv4i32 0x0f0f)), - rCGmask>; - -def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), - (COPY_TO_REGCLASS R64C:$rB, VECREG), - (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; - -def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)), - v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// f64 comparisons -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// selb instruction definition for i64. Note that the selection mask is -// a vector, produced by various forms of FSM: -def SELBf64_cond: - SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), - [(set R64FP:$rT, - (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp deleted file mode 100644 index 3396e8b1ef..0000000000 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ /dev/null @@ -1,333 +0,0 @@ -//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to Cell SPU assembly language. This printer -// is the output mechanism used by `llc'. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asmprinter" -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class SPUAsmPrinter : public AsmPrinter { - public: - explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : - AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "STI CBEA SPU Assembly Printer"; - } - - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. - void printInstruction(const MachineInstr *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - - - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - void printOp(const MachineOperand &MO, raw_ostream &OS); - - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (MO.isReg()) { - O << getRegisterName(MO.getReg()); - } else if (MO.isImm()) { - O << MO.getImm(); - } else { - printOp(MO, O); - } - } - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - - void - printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - unsigned int value = MI->getOperand(OpNo).getImm(); - assert(value < (1 << 8) && "Invalid u7 argument"); - O << value; - } - - void - printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - char value = MI->getOperand(OpNo).getImm(); - O << (int) value; - O << "("; - printOperand(MI, OpNo+1, O); - O << ")"; - } - - void - printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (short) MI->getOperand(OpNo).getImm(); - } - - void - printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (unsigned short)MI->getOperand(OpNo).getImm(); - } - - void - printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // When used as the base register, r0 reads constant zero rather than - // the value contained in the register. For this reason, the darwin - // assembler requires that we print r0 as 0 (no r) when used as the base. - const MachineOperand &MO = MI->getOperand(OpNo); - O << getRegisterName(MO.getReg()) << ", "; - printOperand(MI, OpNo+1, O); - } - - void - printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - unsigned int value = MI->getOperand(OpNo).getImm(); - assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); - O << value; - } - - void - printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - short value = MI->getOperand(OpNo).getImm(); - assert((value >= -(1 << 9) && value <= (1 << 9) - 1) - && "Invalid s10 argument"); - O << value; - } - - void - printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - short value = MI->getOperand(OpNo).getImm(); - assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); - O << value; - } - - void - printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - assert(MI->getOperand(OpNo).isImm() && - "printDFormAddr first operand is not immediate"); - int64_t value = int64_t(MI->getOperand(OpNo).getImm()); - int16_t value16 = int16_t(value); - assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) - && "Invalid dform s10 offset argument"); - O << (value16 & ~0xf) << "("; - printOperand(MI, OpNo+1, O); - O << ")"; - } - - void - printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - /* Note: operand 1 is an offset or symbol name. */ - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - if (MI->getOperand(OpNo+1).isImm()) { - int displ = int(MI->getOperand(OpNo+1).getImm()); - if (displ > 0) - O << "+" << displ; - else if (displ < 0) - O << displ; - } - } - } - - void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // Used to generate a ".-", but it turns out that the assembler - // really wants the target. - // - // N.B.: This operand is used for call targets. Branch hints are another - // animal entirely. - printOp(MI->getOperand(OpNo), O); - } - - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - O << "@h"; - } - } - - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - O << "@l"; - } - } - - /// Print local store address - void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - int value = (int) MI->getOperand(OpNo).getImm(); - assert((value >= 0 && value < 16) - && "Invalid negated immediate rotate 7-bit argument"); - O << -value; - } else { - llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); - } - } - - void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){ - assert(MI->getOperand(OpNo).isImm() && - "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); - int value = (int) MI->getOperand(OpNo).getImm(); - assert((value >= 0 && value <= 32) - && "Invalid negated immediate rotate 7-bit argument"); - O << -value; - } - }; -} // end of anonymous namespace - -// Include the auto-generated portion of the assembly writer -#include "SPUGenAsmWriter.inc" - -void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { - switch (MO.getType()) { - case MachineOperand::MO_Immediate: - report_fatal_error("printOp() does not handle immediate values"); - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_ExternalSymbol: - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() != Reloc::Static) { - O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName() - << "$non_lazy_ptr"; - return; - } - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - case MachineOperand::MO_GlobalAddress: - // External or weakly linked global variables need non-lazily-resolved - // stubs - if (TM.getRelocationModel() != Reloc::Static) { - const GlobalValue *GV = MO.getGlobal(); - if (((GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { - O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - return; - } - } - O << *Mang->getSymbol(MO.getGlobal()); - return; - case MachineOperand::MO_MCSymbol: - O << *(MO.getMCSymbol()); - return; - default: - O << ""; - return; - } -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: - // See if this is a generic print operand - return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); - case 'L': // Write second word of DImode reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNo).isReg() || - OpNo+1 == MI->getNumOperands() || - !MI->getOperand(OpNo+1).isReg()) - return true; - ++OpNo; // Return the high-part. - break; - } - } - - printOperand(MI, OpNo, O); - return false; -} - -bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - printMemRegReg(MI, OpNo, O); - return false; -} - -// Force static initialization. -extern "C" void LLVMInitializeCellSPUAsmPrinter() { - RegisterAsmPrinter X(TheCellSPUTarget); -} diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td deleted file mode 100644 index 9bc6be7986..0000000000 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ /dev/null @@ -1,53 +0,0 @@ -//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the STI Cell SPU architecture. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Return Value Calling Convention -//===----------------------------------------------------------------------===// - -// Return-value convention for Cell SPU: return value to be passed in reg 3-74 -def RetCC_SPU : CallingConv<[ - CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74]>> -]>; - - -//===----------------------------------------------------------------------===// -// CellSPU Argument Calling Conventions -//===----------------------------------------------------------------------===// -def CCC_SPU : CallingConv<[ - CCIfType<[i8, i16, i32, i64, i128, f32, f64, - v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74]>>, - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToStack<16, 16>> -]>; diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp deleted file mode 100644 index f01199515a..0000000000 --- a/lib/Target/CellSPU/SPUFrameLowering.cpp +++ /dev/null @@ -1,256 +0,0 @@ -//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the Cell SPU target. -// -//===----------------------------------------------------------------------===// - -#include "SPUFrameLowering.h" -#include "SPU.h" -#include "SPUInstrBuilder.h" -#include "SPUInstrInfo.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// SPUFrameLowering: -//===----------------------------------------------------------------------===// - -SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), - Subtarget(sti) { - LR[0].first = SPU::R0; - LR[0].second = 16; -} - - -//-------------------------------------------------------------------------- -// hasFP - Return true if the specified function actually has a dedicated frame -// pointer register. This is true if the function needs a frame pointer and has -// a non-zero stack size. -bool SPUFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - return MFI->getStackSize() && - (MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()); -} - - -/// determineFrameLayout - Determine the size of the frame and maximum call -/// frame size. -void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); - - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned TargetAlign = getStackAlignment(); - unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); - assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); - unsigned AlignMask = Align - 1; - - // Get the maximum call frame size of all the calls. - unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - - // If we have dynamic alloca then maxCallFrameSize needs to be aligned so - // that allocations will be aligned. - if (MFI->hasVarSizedObjects()) - maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - - // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); - - // Include call frame size in total. - FrameSize += maxCallFrameSize; - - // Make sure the frame is aligned. - FrameSize = (FrameSize + AlignMask) & ~AlignMask; - - // Update frame info. - MFI->setStackSize(FrameSize); -} - -void SPUFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const SPUInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - MachineModuleInfo &MMI = MF.getMMI(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Prepare for debug frame info. - bool hasDebugInfo = MMI.hasDebugInfo(); - MCSymbol *FrameLabel = 0; - - // Move MBBI back to the beginning of the function. - MBBI = MBB.begin(); - - // Work out frame sizes. - determineFrameLayout(MF); - int FrameSize = MFI->getStackSize(); - - assert((FrameSize & 0xf) == 0 - && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); - - // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->adjustsStack()) { - FrameSize = -(FrameSize + SPUFrameLowering::minStackSize()); - if (hasDebugInfo) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); - } - - // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) - // for the ABI - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) - .addReg(SPU::R1); - if (isInt<10>(FrameSize)) { - // Spill $sp to adjusted $sp - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) - .addReg(SPU::R1); - // Adjust $sp by required amout - BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) - .addImm(FrameSize); - } else if (isInt<16>(FrameSize)) { - // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use - // $r2 to adjust $sp: - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) - .addImm(-16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) - .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) - .addReg(SPU::R2) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) - .addReg(SPU::R1) - .addReg(SPU::R2); - BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2) - .addReg(SPU::R2) - .addImm(16); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) - .addReg(SPU::R2) - .addReg(SPU::R1); - } else { - report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); - } - - if (hasDebugInfo) { - std::vector &Moves = MMI.getFrameMoves(); - - // Show update of SP. - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - - // Add callee saved registers to move list. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - if (Reg == SPU::R0) continue; - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); - } - - // Mark effective beginning of when frame pointer is ready. - MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); - - MachineLocation FPDst(SPU::R1); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); - } - } -} - -void SPUFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - const SPUInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int FrameSize = MFI->getStackSize(); - int LinkSlotOffset = SPUFrameLowering::stackSlotSize(); - DebugLoc dl = MBBI->getDebugLoc(); - - assert(MBBI->getOpcode() == SPU::RET && - "Can only insert epilog into returning blocks"); - assert((FrameSize & 0xf) == 0 && "FrameSize not aligned"); - - // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->adjustsStack()) { - FrameSize = FrameSize + SPUFrameLowering::minStackSize(); - if (isInt<10>(FrameSize + LinkSlotOffset)) { - // Reload $lr, adjust $sp by required amount - // Note: We do this to slightly improve dual issue -- not by much, but it - // is an opportunity for dual issue. - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) - .addImm(FrameSize + LinkSlotOffset) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1) - .addReg(SPU::R1) - .addImm(FrameSize); - } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { - // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use - // $r2 to adjust $sp: - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) - .addImm(16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) - .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) - .addReg(SPU::R1) - .addReg(SPU::R2); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) - .addImm(16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). - addReg(SPU::R2) - .addImm(16); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) - .addReg(SPU::R2) - .addReg(SPU::R1); - } else { - report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); - } - } -} - -void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const{ - // Mark LR and SP unused, since the prolog spills them to stack and - // we don't want anyone else to spill them for us. - // - // Also, unless R2 is really used someday, don't spill it automatically. - MF.getRegInfo().setPhysRegUnused(SPU::R0); - MF.getRegInfo().setPhysRegUnused(SPU::R1); - MF.getRegInfo().setPhysRegUnused(SPU::R2); - - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = &SPU::R32CRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); -} diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h deleted file mode 100644 index 11c52818dd..0000000000 --- a/lib/Target/CellSPU/SPUFrameLowering.h +++ /dev/null @@ -1,80 +0,0 @@ -//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains CellSPU frame information that doesn't fit anywhere else -// cleanly... -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_FRAMEINFO_H -#define SPU_FRAMEINFO_H - -#include "SPURegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class SPUSubtarget; - - class SPUFrameLowering: public TargetFrameLowering { - const SPUSubtarget &Subtarget; - std::pair LR[1]; - - public: - SPUFrameLowering(const SPUSubtarget &sti); - - //! Determine the frame's layour - void determineFrameLayout(MachineFunction &MF) const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - //! Prediate: Target has dedicated frame pointer - bool hasFP(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - - //! Return a function's saved spill slots - /*! - For CellSPU, a function's saved spill slots is just the link register. - */ - const std::pair * - getCalleeSaveSpillSlots(unsigned &NumEntries) const; - - //! Stack slot size (16 bytes) - static int stackSlotSize() { - return 16; - } - //! Maximum frame offset representable by a signed 10-bit integer - /*! - This is the maximum frame offset that can be expressed as a 10-bit - integer, used in D-form addresses. - */ - static int maxFrameOffset() { - return ((1 << 9) - 1) * stackSlotSize(); - } - //! Minimum frame offset representable by a signed 10-bit integer - static int minFrameOffset() { - return -(1 << 9) * stackSlotSize(); - } - //! Minimum frame size (enough to spill LR + SP) - static int minStackSize() { - return (2 * stackSlotSize()); - } - //! Convert frame index to stack offset - static int FItoStackOffset(int frame_index) { - return frame_index * stackSlotSize(); - } - }; -} - -#endif diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp deleted file mode 100644 index 67a83f16a6..0000000000 --- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements hazard recognizers for scheduling on Cell SPU -// processors. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sched" - -#include "SPUHazardRecognizers.h" -#include "SPU.h" -#include "SPUInstrInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Cell SPU hazard recognizer -// -// This is the pipeline hazard recognizer for the Cell SPU processor. It does -// very little right now. -//===----------------------------------------------------------------------===// - -/// Return the pipeline hazard type encountered or generated by this -/// instruction. Currently returns NoHazard. -/// -/// \return NoHazard -ScheduleHazardRecognizer::HazardType -SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls) -{ - // Initial thoughts on how to do this, but this code cannot work unless the - // function's prolog and epilog code are also being scheduled so that we can - // accurately determine which pipeline is being scheduled. -#if 0 - assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead"); - - const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); - ScheduleHazardRecognizer::HazardType retval = NoHazard; - bool mustBeOdd = false; - - switch (Node->getOpcode()) { - case SPU::LQDv16i8: - case SPU::LQDv8i16: - case SPU::LQDv4i32: - case SPU::LQDv4f32: - case SPU::LQDv2f64: - case SPU::LQDr128: - case SPU::LQDr64: - case SPU::LQDr32: - case SPU::LQDr16: - case SPU::LQAv16i8: - case SPU::LQAv8i16: - case SPU::LQAv4i32: - case SPU::LQAv4f32: - case SPU::LQAv2f64: - case SPU::LQAr128: - case SPU::LQAr64: - case SPU::LQAr32: - case SPU::LQXv4i32: - case SPU::LQXr128: - case SPU::LQXr64: - case SPU::LQXr32: - case SPU::LQXr16: - case SPU::STQDv16i8: - case SPU::STQDv8i16: - case SPU::STQDv4i32: - case SPU::STQDv4f32: - case SPU::STQDv2f64: - case SPU::STQDr128: - case SPU::STQDr64: - case SPU::STQDr32: - case SPU::STQDr16: - case SPU::STQDr8: - case SPU::STQAv16i8: - case SPU::STQAv8i16: - case SPU::STQAv4i32: - case SPU::STQAv4f32: - case SPU::STQAv2f64: - case SPU::STQAr128: - case SPU::STQAr64: - case SPU::STQAr32: - case SPU::STQAr16: - case SPU::STQAr8: - case SPU::STQXv16i8: - case SPU::STQXv8i16: - case SPU::STQXv4i32: - case SPU::STQXv4f32: - case SPU::STQXv2f64: - case SPU::STQXr128: - case SPU::STQXr64: - case SPU::STQXr32: - case SPU::STQXr16: - case SPU::STQXr8: - case SPU::RET: - mustBeOdd = true; - break; - default: - // Assume that this instruction can be on the even pipe - break; - } - - if (mustBeOdd && !EvenOdd) - retval = Hazard; - - DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " - << retval << "\n"); - EvenOdd ^= 1; - return retval; -#else - return NoHazard; -#endif -} - -void SPUHazardRecognizer::EmitInstruction(SUnit *SU) -{ -} - -void SPUHazardRecognizer::AdvanceCycle() -{ - DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n"); -} - -void SPUHazardRecognizer::EmitNoop() -{ - AdvanceCycle(); -} diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h deleted file mode 100644 index 30acaeaa36..0000000000 --- a/lib/Target/CellSPU/SPUHazardRecognizers.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines hazard recognizers for scheduling on the Cell SPU -// processor. -// -//===----------------------------------------------------------------------===// - -#ifndef SPUHAZRECS_H -#define SPUHAZRECS_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" - -namespace llvm { - -class TargetInstrInfo; - -/// SPUHazardRecognizer -class SPUHazardRecognizer : public ScheduleHazardRecognizer -{ -public: - SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {} - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void EmitNoop(); -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp deleted file mode 100644 index 5d5061054b..0000000000 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ /dev/null @@ -1,1192 +0,0 @@ -//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a pattern matching instruction selector for the Cell SPU, -// converting from a legalized dag to a SPU-target dag. -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "SPUHazardRecognizers.h" -#include "SPUFrameLowering.h" -#include "SPUTargetMachine.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Constants.h" -#include "llvm/GlobalValue.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates - bool - isI32IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values - bool - isI32IntU10Immediate(ConstantSDNode *CN) - { - return isUInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values - bool - isI16IntU10Immediate(ConstantSDNode *CN) - { - return isUInt<10>((short) CN->getZExtValue()); - } - - //! ConstantSDNode predicate for signed 16-bit values - /*! - \param CN The constant SelectionDAG node holding the value - \param Imm The returned 16-bit value, if returning true - - This predicate tests the value in \a CN to see whether it can be - represented as a 16-bit, sign-extended quantity. Returns true if - this is the case. - */ - bool - isIntS16Immediate(ConstantSDNode *CN, short &Imm) - { - EVT vt = CN->getValueType(0); - Imm = (short) CN->getZExtValue(); - if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) { - return true; - } else if (vt == MVT::i32) { - int32_t i_val = (int32_t) CN->getZExtValue(); - return i_val == SignExtend32<16>(i_val); - } else { - int64_t i_val = (int64_t) CN->getZExtValue(); - return i_val == SignExtend64<16>(i_val); - } - } - - //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. - static bool - isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) - { - EVT vt = FPN->getValueType(0); - if (vt == MVT::f32) { - int val = FloatToBits(FPN->getValueAPF().convertToFloat()); - if (val == SignExtend32<16>(val)) { - Imm = (short) val; - return true; - } - } - - return false; - } - - //! Generate the carry-generate shuffle mask. - SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); - } - - //! Generate the borrow-generate shuffle mask - SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { - SmallVector ShufBytes; - - // Create the shuffle mask for "rotating" the borrow up one register slot - // once the borrow is generated. - ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); - ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size()); - } - - //===------------------------------------------------------------------===// - /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine - /// instructions for SelectionDAG operations. - /// - class SPUDAGToDAGISel : - public SelectionDAGISel - { - const SPUTargetMachine &TM; - const SPUTargetLowering &SPUtli; - unsigned GlobalBaseReg; - - public: - explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), - SPUtli(*tm.getTargetLowering()) - { } - - virtual bool runOnMachineFunction(MachineFunction &MF) { - // Make sure we re-emit a set of the global base reg if necessary - GlobalBaseReg = 0; - SelectionDAGISel::runOnMachineFunction(MF); - return true; - } - - /// getI32Imm - Return a target constant with the specified value, of type - /// i32. - inline SDValue getI32Imm(uint32_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); - } - - /// getSmallIPtrImm - Return a target constant of pointer type. - inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); - } - - SDNode *emitBuildVector(SDNode *bvNode) { - EVT vecVT = bvNode->getValueType(0); - DebugLoc dl = bvNode->getDebugLoc(); - - // Check to see if this vector can be represented as a CellSPU immediate - // constant by invoking all of the instruction selection predicates: - if (((vecVT == MVT::v8i16) && - (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || - ((vecVT == MVT::v4i32) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || - (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || - ((vecVT == MVT::v2i64) && - ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) { - HandleSDNode Dummy(SDValue(bvNode, 0)); - if (SDNode *N = Select(bvNode)) - return N; - return Dummy.getValue().getNode(); - } - - // No, need to emit a constant pool spill: - std::vector CV; - - for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { - ConstantSDNode *V = cast (bvNode->getOperand(i)); - CV.push_back(const_cast(V->getConstantIntValue())); - } - - const Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); - unsigned Alignment = cast(CPIdx)->getAlignment(); - SDValue CGPoolOffset = - SPU::LowerConstantPool(CPIdx, *CurDAG, TM); - - HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, - CurDAG->getEntryNode(), CGPoolOffset, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment)); - CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } - - /// Select - Convert the specified operand from a target-independent to a - /// target-specific node if it hasn't already been changed. - SDNode *Select(SDNode *N); - - //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDNode *N, EVT OpVT); - - //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDNode *N, EVT OpVT); - - //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDNode *N, EVT OpVT); - - //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl); - - //! Alternate instruction emit sequence for loading i64 constants - SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl); - - //! Returns true if the address N is an A-form (local store) address - bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index); - - //! D-form address predicate - bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// Alternate D-form address using i7 offset predicate - bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base); - - /// D-form address selection workhorse - bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base, int minOffset, int maxOffset); - - //! Address predicate if N can be expressed as an indexed [r+r] operation. - bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index); - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { - SDValue Op0, Op1; - switch (ConstraintCode) { - default: return true; - case 'm': // memory - if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) - && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) - SelectXFormAddr(Op.getNode(), Op, Op0, Op1); - break; - case 'o': // offsetable - if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) - && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) { - Op0 = Op; - Op1 = getSmallIPtrImm(0); - } - break; - case 'v': // not offsetable -#if 1 - llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled."); -#else - SelectAddrIdxOnly(Op, Op, Op0, Op1); - break; -#endif - } - - OutOps.push_back(Op0); - OutOps.push_back(Op1); - return false; - } - - virtual const char *getPassName() const { - return "Cell SPU DAG->DAG Pattern Instruction Selection"; - } - - private: - SDValue getRC( MVT ); - - // Include the pieces autogenerated from the target description. -#include "SPUGenDAGISel.inc" - }; -} - -/*! - \param Op The ISD instruction operand - \param N The address to be tested - \param Base The base address - \param Index The base address index - */ -bool -SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { - // These match the addr256k operand type: - EVT OffsVT = MVT::i16; - SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); - int64_t val; - - switch (N.getOpcode()) { - case ISD::Constant: - val = dyn_cast(N.getNode())->getSExtValue(); - Base = CurDAG->getTargetConstant( val , MVT::i32); - Index = Zero; - return true; - case ISD::ConstantPool: - case ISD::GlobalAddress: - report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered."); - /*NOTREACHED*/ - - case ISD::TargetConstant: - case ISD::TargetGlobalAddress: - case ISD::TargetJumpTable: - report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global " - "not wrapped as A-form address."); - /*NOTREACHED*/ - - case SPUISD::AFormAddr: - // Just load from memory if there's only a single use of the location, - // otherwise, this will get handled below with D-form offset addresses - if (N.hasOneUse()) { - SDValue Op0 = N.getOperand(0); - switch (Op0.getOpcode()) { - case ISD::TargetConstantPool: - case ISD::TargetJumpTable: - Base = Op0; - Index = Zero; - return true; - - case ISD::TargetGlobalAddress: { - GlobalAddressSDNode *GSDN = cast(Op0); - const GlobalValue *GV = GSDN->getGlobal(); - if (GV->getAlignment() == 16) { - Base = Op0; - Index = Zero; - return true; - } - break; - } - } - } - break; - } - return false; -} - -bool -SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base) { - const int minDForm2Offset = -(1 << 7); - const int maxDForm2Offset = (1 << 7) - 1; - return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset, - maxDForm2Offset); -} - -/*! - \param Op The ISD instruction (ignored) - \param N The address to be tested - \param Base Base address register/pointer - \param Index Base address index - - Examine the input address by a base register plus a signed 10-bit - displacement, [r+I10] (D-form address). - - \return true if \a N is a D-form address with \a Base and \a Index set - to non-empty SDValue instances. -*/ -bool -SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { - return DFormAddressPredicate(Op, N, Base, Index, - SPUFrameLowering::minFrameOffset(), - SPUFrameLowering::maxFrameOffset()); -} - -bool -SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index, int minOffset, - int maxOffset) { - unsigned Opc = N.getOpcode(); - EVT PtrTy = SPUtli.getPointerTy(); - - if (Opc == ISD::FrameIndex) { - // Stack frame index must be less than 512 (divided by 16): - FrameIndexSDNode *FIN = cast(N); - int FI = int(FIN->getIndex()); - DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " - << FI << "\n"); - if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI, PtrTy); - return true; - } - } else if (Opc == ISD::ADD) { - // Generated by getelementptr - const SDValue Op0 = N.getOperand(0); - const SDValue Op1 = N.getOperand(1); - - if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) - || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = N; - return true; - } else if (Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = cast(Op1); - int32_t offset = int32_t(CN->getSExtValue()); - - if (Op0.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(Op0); - int FI = int(FIN->getIndex()); - DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset - << " frame index = " << FI << "\n"); - - if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI, PtrTy); - return true; - } - } else if (offset > minOffset && offset < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = Op0; - return true; - } - } else if (Op0.getOpcode() == ISD::Constant - || Op0.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = cast(Op0); - int32_t offset = int32_t(CN->getSExtValue()); - - if (Op1.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = cast(Op1); - int FI = int(FIN->getIndex()); - DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset - << " frame index = " << FI << "\n"); - - if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = CurDAG->getTargetFrameIndex(FI, PtrTy); - return true; - } - } else if (offset > minOffset && offset < maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = Op1; - return true; - } - } - } else if (Opc == SPUISD::IndirectAddr) { - // Indirect with constant offset -> D-Form address - const SDValue Op0 = N.getOperand(0); - const SDValue Op1 = N.getOperand(1); - - if (Op0.getOpcode() == SPUISD::Hi - && Op1.getOpcode() == SPUISD::Lo) { - // (SPUindirect (SPUhi , 0), (SPUlo , 0)) - Base = CurDAG->getTargetConstant(0, PtrTy); - Index = N; - return true; - } else if (isa(Op0) || isa(Op1)) { - int32_t offset = 0; - SDValue idxOp; - - if (isa(Op1)) { - ConstantSDNode *CN = cast(Op1); - offset = int32_t(CN->getSExtValue()); - idxOp = Op0; - } else if (isa(Op0)) { - ConstantSDNode *CN = cast(Op0); - offset = int32_t(CN->getSExtValue()); - idxOp = Op1; - } - - if (offset >= minOffset && offset <= maxOffset) { - Base = CurDAG->getTargetConstant(offset, PtrTy); - Index = idxOp; - return true; - } - } - } else if (Opc == SPUISD::AFormAddr) { - Base = CurDAG->getTargetConstant(0, N.getValueType()); - Index = N; - return true; - } else if (Opc == SPUISD::LDRESULT) { - Base = CurDAG->getTargetConstant(0, N.getValueType()); - Index = N; - return true; - } else if (Opc == ISD::Register - ||Opc == ISD::CopyFromReg - ||Opc == ISD::UNDEF - ||Opc == ISD::Constant) { - unsigned OpOpc = Op->getOpcode(); - - if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { - // Direct load/store without getelementptr - SDValue Offs; - - Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); - - if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { - if (Offs.getOpcode() == ISD::UNDEF) - Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); - - Base = Offs; - Index = N; - return true; - } - } else { - /* If otherwise unadorned, default to D-form address with 0 offset: */ - if (Opc == ISD::CopyFromReg) { - Index = N.getOperand(1); - } else { - Index = N; - } - - Base = CurDAG->getTargetConstant(0, Index.getValueType()); - return true; - } - } - - return false; -} - -/*! - \param Op The ISD instruction operand - \param N The address operand - \param Base The base pointer operand - \param Index The offset/index operand - - If the address \a N can be expressed as an A-form or D-form address, returns - false. Otherwise, creates two operands, Base and Index that will become the - (r)(r) X-form address. -*/ -bool -SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { - if (!SelectAFormAddr(Op, N, Base, Index) - && !SelectDFormAddr(Op, N, Base, Index)) { - // If the address is neither A-form or D-form, punt and use an X-form - // address: - Base = N.getOperand(1); - Index = N.getOperand(0); - return true; - } - - return false; -} - -/*! - Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue - to be used as the last parameter of a -CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call - \param VT the value type for which we want a register class -*/ -SDValue SPUDAGToDAGISel::getRC( MVT VT ) { - switch( VT.SimpleTy ) { - case MVT::i8: - return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32); - case MVT::i16: - return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32); - case MVT::i32: - return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); - case MVT::f32: - return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32); - case MVT::i64: - return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); - case MVT::i128: - return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32); - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v4f32: - case MVT::v2i64: - case MVT::v2f64: - return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); - default: - assert( false && "add a new case here" ); - return SDValue(); - } -} - -//! Convert the operand from a target-independent to a target-specific node -/*! - */ -SDNode * -SPUDAGToDAGISel::Select(SDNode *N) { - unsigned Opc = N->getOpcode(); - int n_ops = -1; - unsigned NewOpc = 0; - EVT OpVT = N->getValueType(0); - SDValue Ops[8]; - DebugLoc dl = N->getDebugLoc(); - - if (N->isMachineOpcode()) - return NULL; // Already selected. - - if (Opc == ISD::FrameIndex) { - int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); - SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); - - if (FI < 128) { - NewOpc = SPU::AIr32; - Ops[0] = TFI; - Ops[1] = Imm0; - n_ops = 2; - } else { - NewOpc = SPU::Ar32; - Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); - Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, - N->getValueType(0), TFI), - 0); - n_ops = 2; - } - } else if (Opc == ISD::Constant && OpVT == MVT::i64) { - // Catch the i64 constants that end up here. Note: The backend doesn't - // attempt to legalize the constant (it's useless because DAGCombiner - // will insert 64-bit constants and we can't stop it). - return SelectI64Constant(N, OpVT, N->getDebugLoc()); - } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) - && OpVT == MVT::i64) { - SDValue Op0 = N->getOperand(0); - EVT Op0VT = Op0.getValueType(); - EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), - Op0VT, (128 / Op0VT.getSizeInBits())); - EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue shufMask; - - switch (Op0VT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); - /*NOTREACHED*/ - case MVT::i32: - shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x00010203, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x08090a0b, MVT::i32)); - break; - - case MVT::i16: - shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80800203, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80800a0b, MVT::i32)); - break; - - case MVT::i8: - shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x80808003, MVT::i32), - CurDAG->getConstant(0x80808080, MVT::i32), - CurDAG->getConstant(0x8080800b, MVT::i32)); - break; - } - - SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); - - HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, - Op0VecVT, Op0)); - - SDValue PromScalar; - if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) - PromScalar = SDValue(N, 0); - else - PromScalar = PromoteScalar.getValue(); - - SDValue zextShuffle = - CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - PromScalar, PromScalar, - SDValue(shufMaskLoad, 0)); - - HandleSDNode Dummy2(zextShuffle); - if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) - zextShuffle = SDValue(N, 0); - else - zextShuffle = Dummy2.getValue(); - HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, - zextShuffle)); - - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - SelectCode(Dummy.getValue().getNode()); - return Dummy.getValue().getNode(); - } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - - HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0))); - - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); - - HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0))); - - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { - SDNode *CGLoad = - emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - - HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0))); - CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); - if (SDNode *N = SelectCode(Dummy.getValue().getNode())) - return N; - return Dummy.getValue().getNode(); - } else if (Opc == ISD::TRUNCATE) { - SDValue Op0 = N->getOperand(0); - if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) - && OpVT == MVT::i32 - && Op0.getValueType() == MVT::i64) { - // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 - // - // Take advantage of the fact that the upper 32 bits are in the - // i32 preferred slot and avoid shuffle gymnastics: - ConstantSDNode *CN = dyn_cast(Op0.getOperand(1)); - if (CN != 0) { - unsigned shift_amt = unsigned(CN->getZExtValue()); - - if (shift_amt >= 32) { - SDNode *hi32 = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, - Op0.getOperand(0), getRC(MVT::i32)); - - shift_amt -= 32; - if (shift_amt > 0) { - // Take care of the additional shift, if present: - SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); - unsigned Opc = SPU::ROTMAIr32_i32; - - if (Op0.getOpcode() == ISD::SRL) - Opc = SPU::ROTMr32; - - hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0), - shift); - } - - return hi32; - } - } - } - } else if (Opc == ISD::SHL) { - if (OpVT == MVT::i64) - return SelectSHLi64(N, OpVT); - } else if (Opc == ISD::SRL) { - if (OpVT == MVT::i64) - return SelectSRLi64(N, OpVT); - } else if (Opc == ISD::SRA) { - if (OpVT == MVT::i64) - return SelectSRAi64(N, OpVT); - } else if (Opc == ISD::FNEG - && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { - DebugLoc dl = N->getDebugLoc(); - // Check if the pattern is a special form of DFNMS: - // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) - SDValue Op0 = N->getOperand(0); - if (Op0.getOpcode() == ISD::FSUB) { - SDValue Op00 = Op0.getOperand(0); - if (Op00.getOpcode() == ISD::FMUL) { - unsigned Opc = SPU::DFNMSf64; - if (OpVT == MVT::v2f64) - Opc = SPU::DFNMSv2f64; - - return CurDAG->getMachineNode(Opc, dl, OpVT, - Op00.getOperand(0), - Op00.getOperand(1), - Op0.getOperand(1)); - } - } - - SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); - SDNode *signMask = 0; - unsigned Opc = SPU::XORfneg64; - - if (OpVT == MVT::f64) { - signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); - } else if (OpVT == MVT::v2f64) { - Opc = SPU::XORfnegvec; - signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, - MVT::v2i64, - negConst, negConst).getNode()); - } - - return CurDAG->getMachineNode(Opc, dl, OpVT, - N->getOperand(0), SDValue(signMask, 0)); - } else if (Opc == ISD::FABS) { - if (OpVT == MVT::f64) { - SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); - return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, - N->getOperand(0), SDValue(signMask, 0)); - } else if (OpVT == MVT::v2f64) { - SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); - SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, - absConst, absConst); - SDNode *signMask = emitBuildVector(absVec.getNode()); - return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, - N->getOperand(0), SDValue(signMask, 0)); - } - } else if (Opc == SPUISD::LDRESULT) { - // Custom select instructions for LDRESULT - EVT VT = N->getValueType(0); - SDValue Arg = N->getOperand(0); - SDValue Chain = N->getOperand(1); - SDNode *Result; - - Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT, - MVT::Other, Arg, - getRC( VT.getSimpleVT()), Chain); - return Result; - - } else if (Opc == SPUISD::IndirectAddr) { - // Look at the operands: SelectCode() will catch the cases that aren't - // specifically handled here. - // - // SPUInstrInfo catches the following patterns: - // (SPUindirect (SPUhi ...), (SPUlo ...)) - // (SPUindirect $sp, imm) - EVT VT = N->getValueType(0); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - RegisterSDNode *RN; - - if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) - || (Op0.getOpcode() == ISD::Register - && ((RN = dyn_cast(Op0.getNode())) != 0 - && RN->getReg() != SPU::R1))) { - NewOpc = SPU::Ar32; - Ops[1] = Op1; - if (Op1.getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast(Op1); - Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); - if (isInt<10>(CN->getSExtValue())) { - NewOpc = SPU::AIr32; - Ops[1] = Op1; - } else { - Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, - N->getValueType(0), - Op1), - 0); - } - } - Ops[0] = Op0; - n_ops = 2; - } - } - - if (n_ops > 0) { - if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); - else - return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); - } else - return SelectCode(N); -} - -/*! - * Emit the instruction sequence for i64 left shifts. The basic algorithm - * is to fill the bottom two word slots with zeros so that zeros are shifted - * in as the entire quadword is shifted left. - * - * \note This code could also be used to implement v2i64 shl. - * - * @param Op The shl operand - * @param OpVT Op's machine value value type (doesn't need to be passed, but - * makes life easier.) - * @return The SDNode with the entire instruction sequence - */ -SDNode * -SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { - SDValue Op0 = N->getOperand(0); - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = N->getOperand(1); - EVT ShiftAmtVT = ShiftAmt.getValueType(); - SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; - SDValue SelMaskVal; - DebugLoc dl = N->getDebugLoc(); - - VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, - Op0, getRC(MVT::v2i64) ); - SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); - SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); - ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT, - CurDAG->getTargetConstant(0, OpVT)); - VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, - SDValue(ZeroFill, 0), - SDValue(VecOp0, 0), - SDValue(SelMask, 0)); - - if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { - unsigned bytes = unsigned(CN->getZExtValue()) >> 3; - unsigned bits = unsigned(CN->getZExtValue()) & 7; - - if (bytes > 0) { - Shift = - CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT, - SDValue(VecOp0, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); - } - - if (bits > 0) { - Shift = - CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : VecOp0), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); - } - } else { - SDNode *Bytes = - CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(3, ShiftAmtVT)); - SDNode *Bits = - CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(7, ShiftAmtVT)); - Shift = - CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT, - SDValue(VecOp0, 0), SDValue(Bytes, 0)); - Shift = - CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(Bits, 0)); - } - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(Shift, 0), getRC(MVT::i64)); -} - -/*! - * Emit the instruction sequence for i64 logical right shifts. - * - * @param Op The shl operand - * @param OpVT Op's machine value value type (doesn't need to be passed, but - * makes life easier.) - * @return The SDNode with the entire instruction sequence - */ -SDNode * -SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { - SDValue Op0 = N->getOperand(0); - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = N->getOperand(1); - EVT ShiftAmtVT = ShiftAmt.getValueType(); - SDNode *VecOp0, *Shift = 0; - DebugLoc dl = N->getDebugLoc(); - - VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, - Op0, getRC(MVT::v2i64) ); - - if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { - unsigned bytes = unsigned(CN->getZExtValue()) >> 3; - unsigned bits = unsigned(CN->getZExtValue()) & 7; - - if (bytes > 0) { - Shift = - CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT, - SDValue(VecOp0, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); - } - - if (bits > 0) { - Shift = - CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : VecOp0), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); - } - } else { - SDNode *Bytes = - CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(3, ShiftAmtVT)); - SDNode *Bits = - CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(7, ShiftAmtVT)); - - // Ensure that the shift amounts are negated! - Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, - SDValue(Bytes, 0), - CurDAG->getTargetConstant(0, ShiftAmtVT)); - - Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, - SDValue(Bits, 0), - CurDAG->getTargetConstant(0, ShiftAmtVT)); - - Shift = - CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT, - SDValue(VecOp0, 0), SDValue(Bytes, 0)); - Shift = - CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(Bits, 0)); - } - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(Shift, 0), getRC(MVT::i64)); -} - -/*! - * Emit the instruction sequence for i64 arithmetic right shifts. - * - * @param Op The shl operand - * @param OpVT Op's machine value value type (doesn't need to be passed, but - * makes life easier.) - * @return The SDNode with the entire instruction sequence - */ -SDNode * -SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { - // Promote Op0 to vector - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), - OpVT, (128 / OpVT.getSizeInBits())); - SDValue ShiftAmt = N->getOperand(1); - EVT ShiftAmtVT = ShiftAmt.getValueType(); - DebugLoc dl = N->getDebugLoc(); - - SDNode *VecOp0 = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - VecVT, N->getOperand(0), getRC(MVT::v2i64)); - - SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); - SDNode *SignRot = - CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, - SDValue(VecOp0, 0), SignRotAmt); - SDNode *UpperHalfSign = - CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32)); - - SDNode *UpperHalfSignMask = - CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); - SDNode *UpperLowerMask = - CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, - CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); - SDNode *UpperLowerSelect = - CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, - SDValue(UpperHalfSignMask, 0), - SDValue(VecOp0, 0), - SDValue(UpperLowerMask, 0)); - - SDNode *Shift = 0; - - if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { - unsigned bytes = unsigned(CN->getZExtValue()) >> 3; - unsigned bits = unsigned(CN->getZExtValue()) & 7; - - if (bytes > 0) { - bytes = 31 - bytes; - Shift = - CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT, - SDValue(UpperLowerSelect, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); - } - - if (bits > 0) { - bits = 8 - bits; - Shift = - CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); - } - } else { - SDNode *NegShift = - CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, - ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); - - Shift = - CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT, - SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); - Shift = - CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(NegShift, 0)); - } - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(Shift, 0), getRC(MVT::i64)); -} - -/*! - Do the necessary magic necessary to load a i64 constant - */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT, - DebugLoc dl) { - ConstantSDNode *CN = cast(N); - return SelectI64Constant(CN->getZExtValue(), OpVT, dl); -} - -SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, - DebugLoc dl) { - EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2); - SDValue i64vec = - SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); - - // Here's where it gets interesting, because we have to parse out the - // subtree handed back in i64vec: - - if (i64vec.getOpcode() == ISD::BITCAST) { - // The degenerate case where the upper and lower bits in the splat are - // identical: - SDValue Op0 = i64vec.getOperand(0); - - ReplaceUses(i64vec, Op0); - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, - SDValue(emitBuildVector(Op0.getNode()), 0), - getRC(MVT::i64)); - } else if (i64vec.getOpcode() == SPUISD::SHUFB) { - SDValue lhs = i64vec.getOperand(0); - SDValue rhs = i64vec.getOperand(1); - SDValue shufmask = i64vec.getOperand(2); - - if (lhs.getOpcode() == ISD::BITCAST) { - ReplaceUses(lhs, lhs.getOperand(0)); - lhs = lhs.getOperand(0); - } - - SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() - ? lhs.getNode() - : emitBuildVector(lhs.getNode())); - - if (rhs.getOpcode() == ISD::BITCAST) { - ReplaceUses(rhs, rhs.getOperand(0)); - rhs = rhs.getOperand(0); - } - - SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() - ? rhs.getNode() - : emitBuildVector(rhs.getNode())); - - if (shufmask.getOpcode() == ISD::BITCAST) { - ReplaceUses(shufmask, shufmask.getOperand(0)); - shufmask = shufmask.getOperand(0); - } - - SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() - ? shufmask.getNode() - : emitBuildVector(shufmask.getNode())); - - SDValue shufNode = - CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - SDValue(lhsNode, 0), SDValue(rhsNode, 0), - SDValue(shufMaskNode, 0)); - HandleSDNode Dummy(shufNode); - SDNode *SN = SelectCode(Dummy.getValue().getNode()); - if (SN == 0) SN = Dummy.getValue().getNode(); - - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, - OpVT, SDValue(SN, 0), getRC(MVT::i64)); - } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { - return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, - SDValue(emitBuildVector(i64vec.getNode()), 0), - getRC(MVT::i64)); - } else { - report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" - "condition"); - } -} - -/// createSPUISelDag - This pass converts a legalized DAG into a -/// SPU-specific DAG, ready for instruction scheduling. -/// -FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { - return new SPUDAGToDAGISel(TM); -} diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp deleted file mode 100644 index 31b87331a9..0000000000 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ /dev/null @@ -1,3267 +0,0 @@ -//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SPUTargetLowering class. -// -//===----------------------------------------------------------------------===// - -#include "SPUISelLowering.h" -#include "SPUTargetMachine.h" -#include "SPUFrameLowering.h" -#include "SPUMachineFunction.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/CallingConv.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - // Byte offset of the preferred slot (counted from the MSB) - int prefslotOffset(EVT VT) { - int retval=0; - if (VT==MVT::i1) retval=3; - if (VT==MVT::i8) retval=3; - if (VT==MVT::i16) retval=2; - - return retval; - } - - //! Expand a library call into an actual call DAG node - /*! - \note - This code is taken from SelectionDAGLegalize, since it is not exposed as - part of the LLVM SelectionDAG API. - */ - - SDValue - ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, - bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - EVT ArgVT = Op.getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op.getOperand(i); - Entry.Ty = ArgTy; - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); - - // Splice the libcall in wherever FindInputOutputChains tells us to. - Type *RetTy = - Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, - false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotRet=*/false, - /*isReturnValueUsed=*/true, - Callee, Args, DAG, Op.getDebugLoc()); - std::pair CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; - } -} - -SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()), - SPUTM(TM) { - - // Use _setjmp/_longjmp instead of setjmp/longjmp. - setUseUnderscoreSetJmp(true); - setUseUnderscoreLongJmp(true); - - // Set RTLIB libcall names as used by SPU: - setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); - - // Set up the SPU's register classes: - addRegisterClass(MVT::i8, &SPU::R8CRegClass); - addRegisterClass(MVT::i16, &SPU::R16CRegClass); - addRegisterClass(MVT::i32, &SPU::R32CRegClass); - addRegisterClass(MVT::i64, &SPU::R64CRegClass); - addRegisterClass(MVT::f32, &SPU::R32FPRegClass); - addRegisterClass(MVT::f64, &SPU::R64FPRegClass); - addRegisterClass(MVT::i128, &SPU::GPRCRegClass); - - // SPU has no sign or zero extended loads for i1, i8, i16: - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - - setTruncStoreAction(MVT::i128, MVT::i64, Expand); - setTruncStoreAction(MVT::i128, MVT::i32, Expand); - setTruncStoreAction(MVT::i128, MVT::i16, Expand); - setTruncStoreAction(MVT::i128, MVT::i8, Expand); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // SPU constant load actions are custom lowered: - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Custom); - - // SPU's loads and stores have to be custom lowered: - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, Custom); - setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, Custom); - - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { - MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; - setTruncStoreAction(VT, StoreVT, Expand); - } - } - - for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; - - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - - for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { - MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; - setTruncStoreAction(VT, StoreVT, Expand); - } - } - - // Expand the jumptable branches - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - - // Custom lower SELECT_CC for most cases, but expand by default - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - - // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - - // SPU has no division/remainder instructions - setOperationAction(ISD::SREM, MVT::i8, Expand); - setOperationAction(ISD::UREM, MVT::i8, Expand); - setOperationAction(ISD::SDIV, MVT::i8, Expand); - setOperationAction(ISD::UDIV, MVT::i8, Expand); - setOperationAction(ISD::SDIVREM, MVT::i8, Expand); - setOperationAction(ISD::UDIVREM, MVT::i8, Expand); - setOperationAction(ISD::SREM, MVT::i16, Expand); - setOperationAction(ISD::UREM, MVT::i16, Expand); - setOperationAction(ISD::SDIV, MVT::i16, Expand); - setOperationAction(ISD::UDIV, MVT::i16, Expand); - setOperationAction(ISD::SDIVREM, MVT::i16, Expand); - setOperationAction(ISD::UDIVREM, MVT::i16, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - setOperationAction(ISD::SDIV, MVT::i64, Expand); - setOperationAction(ISD::UDIV, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i128, Expand); - setOperationAction(ISD::UREM, MVT::i128, Expand); - setOperationAction(ISD::SDIV, MVT::i128, Expand); - setOperationAction(ISD::UDIV, MVT::i128, Expand); - setOperationAction(ISD::SDIVREM, MVT::i128, Expand); - setOperationAction(ISD::UDIVREM, MVT::i128, Expand); - - // We don't support sin/cos/sqrt/fmod - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); - - // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt - // for f32!) - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSQRT, MVT::f32, Expand); - - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); - - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - - // SPU can do rotate right and left, so legalize it... but customize for i8 - // because instructions don't exist. - - // FIXME: Change from "expand" to appropriate type once ROTR is supported in - // .td files. - setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); - setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); - setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); - - setOperationAction(ISD::ROTL, MVT::i32, Legal); - setOperationAction(ISD::ROTL, MVT::i16, Legal); - setOperationAction(ISD::ROTL, MVT::i8, Custom); - - // SPU has no native version of shift left/right for i8 - setOperationAction(ISD::SHL, MVT::i8, Custom); - setOperationAction(ISD::SRL, MVT::i8, Custom); - setOperationAction(ISD::SRA, MVT::i8, Custom); - - // Make these operations legal and handle them during instruction selection: - setOperationAction(ISD::SHL, MVT::i64, Legal); - setOperationAction(ISD::SRL, MVT::i64, Legal); - setOperationAction(ISD::SRA, MVT::i64, Legal); - - // Custom lower i8, i32 and i64 multiplications - setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i32, Legal); - setOperationAction(ISD::MUL, MVT::i64, Legal); - - // Expand double-width multiplication - // FIXME: It would probably be reasonable to support some of these operations - setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::MULHU, MVT::i8, Expand); - setOperationAction(ISD::MULHS, MVT::i8, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::MULHU, MVT::i16, Expand); - setOperationAction(ISD::MULHS, MVT::i16, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - - // Need to custom handle (some) common i8, i64 math ops - setOperationAction(ISD::ADD, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i64, Legal); - setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i64, Legal); - - // SPU does not have BSWAP. It does have i32 support CTLZ. - // CTPOP has to be custom lowered. - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i8, Custom); - setOperationAction(ISD::CTPOP, MVT::i16, Custom); - setOperationAction(ISD::CTPOP, MVT::i32, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); - setOperationAction(ISD::CTPOP, MVT::i128, Expand); - - setOperationAction(ISD::CTTZ , MVT::i8, Expand); - setOperationAction(ISD::CTTZ , MVT::i16, Expand); - setOperationAction(ISD::CTTZ , MVT::i32, Expand); - setOperationAction(ISD::CTTZ , MVT::i64, Expand); - setOperationAction(ISD::CTTZ , MVT::i128, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand); - - setOperationAction(ISD::CTLZ , MVT::i8, Promote); - setOperationAction(ISD::CTLZ , MVT::i16, Promote); - setOperationAction(ISD::CTLZ , MVT::i32, Legal); - setOperationAction(ISD::CTLZ , MVT::i64, Expand); - setOperationAction(ISD::CTLZ , MVT::i128, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand); - - // SPU has a version of select that implements (a&~c)|(b&c), just like - // select ought to work: - setOperationAction(ISD::SELECT, MVT::i8, Legal); - setOperationAction(ISD::SELECT, MVT::i16, Legal); - setOperationAction(ISD::SELECT, MVT::i32, Legal); - setOperationAction(ISD::SELECT, MVT::i64, Legal); - - setOperationAction(ISD::SETCC, MVT::i8, Legal); - setOperationAction(ISD::SETCC, MVT::i16, Legal); - setOperationAction(ISD::SETCC, MVT::i32, Legal); - setOperationAction(ISD::SETCC, MVT::i64, Legal); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - - // Custom lower i128 -> i64 truncates - setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); - - // Custom lower i32/i64 -> i128 sign extend - setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); - - setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); - // SPU has a legal FP -> signed INT instruction for f32, but for f64, need - // to expand to a libcall, hence the custom lowering: - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); - - // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall - - // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - - setOperationAction(ISD::BITCAST, MVT::i32, Legal); - setOperationAction(ISD::BITCAST, MVT::f32, Legal); - setOperationAction(ISD::BITCAST, MVT::i64, Legal); - setOperationAction(ISD::BITCAST, MVT::f64, Legal); - - // We cannot sextinreg(i1). Expand to shifts. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // We want to legalize GlobalAddress and ConstantPool nodes into the - // appropriate instructions to materialize the address. - for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; - ++sctype) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; - - setOperationAction(ISD::GlobalAddress, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::JumpTable, VT, Custom); - } - - // VASTART needs to be custom lowered to use the VarArgsFrameIndex - setOperationAction(ISD::VASTART , MVT::Other, Custom); - - // Use the default implementation. - setOperationAction(ISD::VAARG , MVT::Other, Expand); - setOperationAction(ISD::VACOPY , MVT::Other, Expand); - setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); - - // Cell SPU has instructions for converting between i64 and fp. - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - - // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); - - // BUILD_PAIR can't be handled natively, and should be expanded to shl/or - setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); - - // First set operation action for all vector types to expand. Then we - // will selectively turn on ones that can be effectively codegen'd. - addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass); - addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass); - addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass); - addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass); - addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass); - addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass); - - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - - // Set operation actions to legal types only. - if (!isTypeLegal(VT)) continue; - - // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD, VT, Legal); - setOperationAction(ISD::SUB, VT, Legal); - // mul has to be custom lowered. - setOperationAction(ISD::MUL, VT, Legal); - - setOperationAction(ISD::AND, VT, Legal); - setOperationAction(ISD::OR, VT, Legal); - setOperationAction(ISD::XOR, VT, Legal); - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Custom); - - // These operations need to be expanded: - setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); - setOperationAction(ISD::UREM, VT, Expand); - setOperationAction(ISD::FFLOOR, VT, Expand); - - // Expand all trunc stores - for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { - MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j; - setTruncStoreAction(VT, TargetVT, Expand); - } - - // Custom lower build_vector, constant pool spills, insert and - // extract vector elements: - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::ConstantPool, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } - - setOperationAction(ISD::SHL, MVT::v2i64, Expand); - - setOperationAction(ISD::AND, MVT::v16i8, Custom); - setOperationAction(ISD::OR, MVT::v16i8, Custom); - setOperationAction(ISD::XOR, MVT::v16i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); - - setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - - setBooleanContents(ZeroOrNegativeOneBooleanContent); - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? - - setStackPointerRegisterToSaveRestore(SPU::R1); - - // We have target-specific dag combine patterns for the following nodes: - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::ANY_EXTEND); - - setMinFunctionAlignment(3); - - computeRegisterProperties(); - - // Set pre-RA register scheduler default to BURR, which produces slightly - // better code than the default (could also be TDRR, but TargetLowering.h - // needs a mod to support that model): - setSchedulingPreference(Sched::RegPressure); -} - -const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return 0; - case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG"; - case SPUISD::Hi: return "SPUISD::Hi"; - case SPUISD::Lo: return "SPUISD::Lo"; - case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr"; - case SPUISD::AFormAddr: return "SPUISD::AFormAddr"; - case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr"; - case SPUISD::LDRESULT: return "SPUISD::LDRESULT"; - case SPUISD::CALL: return "SPUISD::CALL"; - case SPUISD::SHUFB: return "SPUISD::SHUFB"; - case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK"; - case SPUISD::CNTB: return "SPUISD::CNTB"; - case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC"; - case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT"; - case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS"; - case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES"; - case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL"; - case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR"; - case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT"; - case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS"; - case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK"; - case SPUISD::SELB: return "SPUISD::SELB"; - case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER"; - case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER"; - case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER"; - } -} - -//===----------------------------------------------------------------------===// -// Return the Cell SPU's SETCC result type -//===----------------------------------------------------------------------===// - -EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { - // i8, i16 and i32 are valid SETCC result types - MVT::SimpleValueType retval; - - switch(VT.getSimpleVT().SimpleTy){ - case MVT::i1: - case MVT::i8: - retval = MVT::i8; break; - case MVT::i16: - retval = MVT::i16; break; - case MVT::i32: - default: - retval = MVT::i32; - } - return retval; -} - -//===----------------------------------------------------------------------===// -// Calling convention code: -//===----------------------------------------------------------------------===// - -#include "SPUGenCallingConv.inc" - -//===----------------------------------------------------------------------===// -// LowerOperation implementation -//===----------------------------------------------------------------------===// - -/// Custom lower loads for CellSPU -/*! - All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements - within a 16-byte block, we have to rotate to extract the requested element. - - For extending loads, we also want to ensure that the following sequence is - emitted, e.g. for MVT::f32 extending load to MVT::f64: - -\verbatim -%1 v16i8,ch = load -%2 v16i8,ch = rotate %1 -%3 v4f8, ch = bitconvert %2 -%4 f32 = vec2perfslot %3 -%5 f64 = fp_extend %4 -\endverbatim -*/ -static SDValue -LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - LoadSDNode *LN = cast(Op); - SDValue the_chain = LN->getChain(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - EVT InVT = LN->getMemoryVT(); - EVT OutVT = Op.getValueType(); - ISD::LoadExtType ExtType = LN->getExtensionType(); - unsigned alignment = LN->getAlignment(); - int pso = prefslotOffset(InVT); - DebugLoc dl = Op.getDebugLoc(); - EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, - (128 / InVT.getSizeInBits())); - - // two sanity checks - assert( LN->getAddressingMode() == ISD::UNINDEXED - && "we should get only UNINDEXED adresses"); - // clean aligned loads can be selected as-is - if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) - return SDValue(); - - // Get pointerinfos to the memory chunk(s) that contain the data to load - uint64_t mpi_offset = LN->getPointerInfo().Offset; - mpi_offset -= mpi_offset%16; - MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); - MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); - - SDValue result; - SDValue basePtr = LN->getBasePtr(); - SDValue rotate; - - if ((alignment%16) == 0) { - ConstantSDNode *CN; - - // Special cases for a known aligned load to simplify the base pointer - // and the rotation amount: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - int64_t rotamt = int64_t((offset & 0xf) - pso); - - if (rotamt < 0) - rotamt += 16; - - rotate = DAG.getConstant(rotamt, MVT::i16); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) - || (basePtr.getOpcode() == SPUISD::IndirectAddr - && basePtr.getOperand(0).getOpcode() == SPUISD::Hi - && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { - // Plain aligned a-form address: rotate into preferred slot - // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) - int64_t rotamt = -pso; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getConstant(rotamt, MVT::i16); - } else { - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - int64_t rotamt = -pso; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(rotamt, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa(Op1)) { - // Convert the (add , ) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add , ) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(-pso, PtrVT)); - } - - // Do the load as a i128 to allow possible shifting - SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, - lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), false, 16); - - // When the size is not greater than alignment we get all data with just - // one load - if (alignment >= InVT.getSizeInBits()/8) { - // Update the chain - the_chain = low.getValue(1); - - // Rotate into the preferred slot: - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, - low.getValue(0), rotate); - - // Convert the loaded v16i8 vector to the appropriate vector type - // specified by the operand: - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - InVT, (128 / InVT.getSizeInBits())); - result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, - DAG.getNode(ISD::BITCAST, dl, vecVT, result)); - } - // When alignment is less than the size, we might need (known only at - // run-time) two loads - // TODO: if the memory address is composed only from constants, we have - // extra kowledge, and might avoid the second load - else { - // storage position offset from lower 16 byte aligned memory chunk - SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, - basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); - // get a registerfull of ones. (this implementation is a workaround: LLVM - // cannot handle 128 bit signed int constants) - SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); - ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); - - SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, - DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(16, PtrVT)), - highMemPtr, - LN->isVolatile(), LN->isNonTemporal(), false, - 16); - - the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), - high.getValue(1)); - - // Shift the (possible) high part right to compensate the misalignemnt. - // if there is no highpart (i.e. value is i64 and offset is 4), this - // will zero out the high value. - high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, - DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - offset - )); - - // Shift the low similarly - // TODO: add SPUISD::SHL_BYTES - low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); - - // Merge the two parts - result = DAG.getNode(ISD::BITCAST, dl, vecVT, - DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); - - if (!InVT.isVector()) { - result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); - } - - } - // Handle extending loads by extending the scalar result: - if (ExtType == ISD::SEXTLOAD) { - result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); - } else if (ExtType == ISD::ZEXTLOAD) { - result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); - } else if (ExtType == ISD::EXTLOAD) { - unsigned NewOpc = ISD::ANY_EXTEND; - - if (OutVT.isFloatingPoint()) - NewOpc = ISD::FP_EXTEND; - - result = DAG.getNode(NewOpc, dl, OutVT, result); - } - - SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); - SDValue retops[2] = { - result, - the_chain - }; - - result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, - retops, sizeof(retops) / sizeof(retops[0])); - return result; -} - -/// Custom lower stores for CellSPU -/*! - All CellSPU stores are aligned to 16-byte boundaries, so for elements - within a 16-byte block, we have to generate a shuffle to insert the - requested element into its place, then store the resulting block. - */ -static SDValue -LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - StoreSDNode *SN = cast(Op); - SDValue Value = SN->getValue(); - EVT VT = Value.getValueType(); - EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); - unsigned alignment = SN->getAlignment(); - SDValue result; - EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, - (128 / StVT.getSizeInBits())); - // Get pointerinfos to the memory chunk(s) that contain the data to load - uint64_t mpi_offset = SN->getPointerInfo().Offset; - mpi_offset -= mpi_offset%16; - MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); - MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); - - - // two sanity checks - assert( SN->getAddressingMode() == ISD::UNINDEXED - && "we should get only UNINDEXED adresses"); - // clean aligned loads can be selected as-is - if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) - return SDValue(); - - SDValue alignLoadVec; - SDValue basePtr = SN->getBasePtr(); - SDValue the_chain = SN->getChain(); - SDValue insertEltOffs; - - if ((alignment%16) == 0) { - ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer - // and insertion byte: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & 0xf), PtrVT)); - - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else { - // Otherwise, assume it's at byte 0 of basePtr - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa(Op1)) { - // Convert the (add , ) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add , ) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Insertion point is solely determined by basePtr's contents - insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - - // Load the lower part of the memory to which to store. - SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, - lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), - false, 16); - - // if we don't need to store over the 16 byte boundary, one store suffices - if (alignment >= StVT.getSizeInBits()/8) { - // Update the chain - the_chain = low.getValue(1); - - LoadSDNode *LN = cast(low); - SDValue theValue = SN->getValue(); - - if (StVT != VT - && (theValue.getOpcode() == ISD::AssertZext - || theValue.getOpcode() == ISD::AssertSext)) { - // Drill down and get the value for zero- and sign-extended - // quantities - theValue = theValue.getOperand(0); - } - - // If the base pointer is already a D-form address, then just create - // a new D-form address with a slot offset and the orignal base pointer. - // Otherwise generate a D-form address with the slot offset relative - // to the stack pointer, which is always aligned. -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "CellSPU LowerSTORE: basePtr = "; - basePtr.getNode()->dump(&DAG); - errs() << "\n"; - } -#endif - - SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, - insertEltOffs); - SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, - theValue); - - result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, - vectorizeOp, low, - DAG.getNode(ISD::BITCAST, dl, - MVT::v4i32, insertEltOp)); - - result = DAG.getStore(the_chain, dl, result, basePtr, - lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), - 16); - - } - // do the store when it might cross the 16 byte memory access boundary. - else { - // TODO issue a warning if SN->isVolatile()== true? This is likely not - // what the user wanted. - - // address offset from nearest lower 16byte alinged address - SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, - SN->getBasePtr(), - DAG.getConstant(0xf, MVT::i32)); - // 16 - offset - SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - offset); - // 16 - sizeof(Value) - SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant( 16, MVT::i32), - DAG.getConstant( VT.getSizeInBits()/8, - MVT::i32)); - // get a registerfull of ones - SDValue ones = DAG.getConstant(-1, MVT::v4i32); - ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); - - // Create the 128 bit masks that have ones where the data to store is - // located. - SDValue lowmask, himask; - // if the value to store don't fill up the an entire 128 bits, zero - // out the last bits of the mask so that only the value we want to store - // is masked. - // this is e.g. in the case of store i32, align 2 - if (!VT.isVector()){ - Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); - lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); - lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, - surplus); - Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); - Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); - - } - else { - lowmask = ones; - Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); - } - // this will zero, if there are no data that goes to the high quad - himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, - offset_compl); - lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, - offset); - - // Load in the old data and zero out the parts that will be overwritten with - // the new data to store. - SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, - DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, - DAG.getConstant( 16, PtrVT)), - highMemPtr, - SN->isVolatile(), SN->isNonTemporal(), - false, 16); - the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), - hi.getValue(1)); - - low = DAG.getNode(ISD::AND, dl, MVT::i128, - DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), - DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); - hi = DAG.getNode(ISD::AND, dl, MVT::i128, - DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), - DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); - - // Shift the Value to store into place. rlow contains the parts that go to - // the lower memory chunk, rhi has the parts that go to the upper one. - SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); - rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); - SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, - offset_compl); - - // Merge the old data and the new data and store the results - // Need to convert vectors here to integer as 'OR'ing floats assert - rlow = DAG.getNode(ISD::OR, dl, MVT::i128, - DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), - DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); - rhi = DAG.getNode(ISD::OR, dl, MVT::i128, - DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), - DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); - - low = DAG.getStore(the_chain, dl, rlow, basePtr, - lowMemPtr, - SN->isVolatile(), SN->isNonTemporal(), 16); - hi = DAG.getStore(the_chain, dl, rhi, - DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, - DAG.getConstant( 16, PtrVT)), - highMemPtr, - SN->isVolatile(), SN->isNonTemporal(), 16); - result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), - hi.getValue(0)); - } - - return result; -} - -//! Generate the address of a constant pool entry. -static SDValue -LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - EVT PtrVT = Op.getValueType(); - ConstantPoolSDNode *CP = cast(Op); - const Constant *C = CP->getConstVal(); - SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - SDValue Zero = DAG.getConstant(0, PtrVT); - const TargetMachine &TM = DAG.getTarget(); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - // Just return the SDValue with the constant pool address in it. - return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); - } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); - } - } - - llvm_unreachable("LowerConstantPool: Relocation model other than static" - " not supported."); -} - -//! Alternate entry point for generating the address of a constant pool entry -SDValue -SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { - return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); -} - -static SDValue -LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - SDValue Zero = DAG.getConstant(0, PtrVT); - const TargetMachine &TM = DAG.getTarget(); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); - } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); - return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); - } - } - - llvm_unreachable("LowerJumpTable: Relocation model other than static" - " not supported."); -} - -static SDValue -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - EVT PtrVT = Op.getValueType(); - GlobalAddressSDNode *GSDN = cast(Op); - const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), - PtrVT, GSDN->getOffset()); - const TargetMachine &TM = DAG.getTarget(); - SDValue Zero = DAG.getConstant(0, PtrVT); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (TM.getRelocationModel() == Reloc::Static) { - if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); - } else { - SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); - SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); - return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); - } - } else { - report_fatal_error("LowerGlobalAddress: Relocation model other than static" - "not supported."); - /*NOTREACHED*/ - } -} - -//! Custom lower double precision floating point constants -static SDValue -LowerConstantFP(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); - - if (VT == MVT::f64) { - ConstantFPSDNode *FP = cast(Op.getNode()); - - assert((FP != 0) && - "LowerConstantFP: Node is not ConstantFPSDNode"); - - uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); - SDValue T = DAG.getConstant(dbits, MVT::i64); - SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); - return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); - } - - return SDValue(); -} - -SDValue -SPUTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { - - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SPUFunctionInfo *FuncInfo = MF.getInfo(); - - unsigned ArgOffset = SPUFrameLowering::minStackSize(); - unsigned ArgRegIdx = 0; - unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - // FIXME: allow for other calling conventions - CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); - - // Add DAG nodes to load the arguments or copy them out of registers. - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { - EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; - SDValue ArgVal; - CCValAssign &VA = ArgLocs[ArgNo]; - - if (VA.isRegLoc()) { - const TargetRegisterClass *ArgRegClass; - - switch (ObjectVT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("LowerFormalArguments Unhandled argument type: " + - Twine(ObjectVT.getEVTString())); - case MVT::i8: - ArgRegClass = &SPU::R8CRegClass; - break; - case MVT::i16: - ArgRegClass = &SPU::R16CRegClass; - break; - case MVT::i32: - ArgRegClass = &SPU::R32CRegClass; - break; - case MVT::i64: - ArgRegClass = &SPU::R64CRegClass; - break; - case MVT::i128: - ArgRegClass = &SPU::GPRCRegClass; - break; - case MVT::f32: - ArgRegClass = &SPU::R32FPRegClass; - break; - case MVT::f64: - ArgRegClass = &SPU::R64FPRegClass; - break; - case MVT::v2f64: - case MVT::v4f32: - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - ArgRegClass = &SPU::VECREGRegClass; - break; - } - - unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); - ++ArgRegIdx; - } else { - // We need to load the argument to a virtual register if we determined - // above that we ran out of physical registers of the appropriate type - // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); - SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, false, 0); - ArgOffset += StackSlotSize; - } - - InVals.push_back(ArgVal); - // Update the chain - Chain = ArgVal.getOperand(0); - } - - // vararg handling: - if (isVarArg) { - // FIXME: we should be able to query the argument registers from - // tablegen generated code. - static const uint16_t ArgRegs[] = { - SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, - SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, - SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, - SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, - SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, - SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, - SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, - SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, - SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, - SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, - SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 - }; - // size of ArgRegs array - const unsigned NumArgRegs = 77; - - // We will spill (79-3)+1 registers to the stack - SmallVector MemOps; - - // Create the frame slot - for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - FuncInfo->setVarArgsFrameIndex( - MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); - SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass); - SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); - SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), - false, false, 0); - Chain = Store.getOperand(0); - MemOps.push_back(Store); - - // Increment address by stack slot size for the next stored argument - ArgOffset += StackSlotSize; - } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); - } - - return Chain; -} - -/// isLSAAddress - Return the immediate to use if the specified -/// value is representable as a LSA address. -static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { - ConstantSDNode *C = dyn_cast(Op); - if (!C) return 0; - - int Addr = C->getZExtValue(); - if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 14 >> 14) != Addr) - return 0; // Top 14 bits have to be sext of immediate. - - return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); -} - -SDValue -SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool isVarArg = CLI.IsVarArg; - - // CellSPU target does not yet support tail call optimization. - isTailCall = false; - - const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); - unsigned NumOps = Outs.size(); - unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); - - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - // FIXME: allow for other calling conventions - CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); - - const unsigned NumArgRegs = ArgLocs.size(); - - - // Handy pointer type - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - // Set up a copy of the stack pointer for use loading and storing any - // arguments that may not fit in the registers available for argument - // passing. - SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); - - // Figure out which arguments are going to go in registers, and which in - // memory. - unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] - unsigned ArgRegIdx = 0; - - // Keep track of registers passing arguments - std::vector > RegsToPass; - // And the arguments passed on the stack - SmallVector MemOpChains; - - for (; ArgRegIdx != NumOps; ++ArgRegIdx) { - SDValue Arg = OutVals[ArgRegIdx]; - CCValAssign &VA = ArgLocs[ArgRegIdx]; - - // PtrOff will be used to store the current argument to the stack if a - // register cannot be found for it. - SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - - switch (Arg.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected ValueType for argument!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::i128: - case MVT::f32: - case MVT::f64: - case MVT::v2i64: - case MVT::v2f64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; - } - } - - // Accumulate how many bytes are to be pushed on the stack, including the - // linkage area, and parameter passing area. According to the SPU ABI, - // we minimally need space for [LR] and [SP]. - unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); - - // Insert a call sequence start - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, - true)); - - if (!MemOpChains.empty()) { - // Adjust the stack pointer for the stack arguments. - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); - } - - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into the appropriate regs. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - SmallVector Ops; - unsigned CallOpc = SPUISD::CALL; - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - EVT CalleeVT = Callee.getValueType(); - SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); - - if (!ST->usingLargeMem()) { - // Turn calls to targets that are defined (i.e., have bodies) into BRSL - // style calls, otherwise, external symbols are BRASL calls. This assumes - // that declared/defined symbols are in the same compilation unit and can - // be reached through PC-relative jumps. - // - // NOTE: - // This may be an unsafe assumption for JIT and really large compilation - // units. - if (GV->isDeclaration()) { - Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); - } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); - } - } else { - // "Large memory" mode: Turn all calls into indirect calls with a X-form - // address pairs: - Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); - } - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - EVT CalleeVT = Callee.getValueType(); - SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), - Callee.getValueType()); - - if (!ST->usingLargeMem()) { - Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); - } else { - Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); - } - } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { - // If this is an absolute destination address that appears to be a legal - // local store address, use the munged value. - Callee = SDValue(Dest, 0); - } - - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - if (InFlag.getNode()) - Ops.push_back(InFlag); - // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), - &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); - if (!Ins.empty()) - InFlag = Chain.getValue(1); - - // If the function returns void, just return the chain. - if (Ins.empty()) - return Chain; - - // Now handle the return value(s) - SmallVector RVLocs; - CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); - - - // If the call has results, copy the values out of the ret val registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign VA = RVLocs[i]; - - SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), - InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); - InVals.push_back(Val); - } - - return Chain; -} - -SDValue -SPUTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC_SPU); - - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - - SDValue Flag; - - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); - Flag = Chain.getValue(1); - } - - if (Flag.getNode()) - return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else - return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); -} - - -//===----------------------------------------------------------------------===// -// Vector related lowering: -//===----------------------------------------------------------------------===// - -static ConstantSDNode * -getVecImm(SDNode *N) { - SDValue OpVal(0, 0); - - // Check to see if this buildvec has a single non-undef value in its elements. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) - OpVal = N->getOperand(i); - else if (OpVal != N->getOperand(i)) - return 0; - } - - if (OpVal.getNode() != 0) { - if (ConstantSDNode *CN = dyn_cast(OpVal)) { - return CN; - } - } - - return 0; -} - -/// get_vec_i18imm - Test if this vector is a vector filled with the same value -/// and the value fits into an unsigned 18-bit constant, and if so, return the -/// constant -SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - uint64_t Value = CN->getZExtValue(); - if (ValueType == MVT::i64) { - uint64_t UValue = CN->getZExtValue(); - uint32_t upper = uint32_t(UValue >> 32); - uint32_t lower = uint32_t(UValue); - if (upper != lower) - return SDValue(); - Value = Value >> 32; - } - if (Value <= 0x3ffff) - return DAG.getTargetConstant(Value, ValueType); - } - - return SDValue(); -} - -/// get_vec_i16imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 16-bit constant, and if so, return the -/// constant -SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - int64_t Value = CN->getSExtValue(); - if (ValueType == MVT::i64) { - uint64_t UValue = CN->getZExtValue(); - uint32_t upper = uint32_t(UValue >> 32); - uint32_t lower = uint32_t(UValue); - if (upper != lower) - return SDValue(); - Value = Value >> 32; - } - if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { - return DAG.getTargetConstant(Value, ValueType); - } - } - - return SDValue(); -} - -/// get_vec_i10imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 10-bit constant, and if so, return the -/// constant -SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - int64_t Value = CN->getSExtValue(); - if (ValueType == MVT::i64) { - uint64_t UValue = CN->getZExtValue(); - uint32_t upper = uint32_t(UValue >> 32); - uint32_t lower = uint32_t(UValue); - if (upper != lower) - return SDValue(); - Value = Value >> 32; - } - if (isInt<10>(Value)) - return DAG.getTargetConstant(Value, ValueType); - } - - return SDValue(); -} - -/// get_vec_i8imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 8-bit constant, and if so, return the -/// constant. -/// -/// @note: The incoming vector is v16i8 because that's the only way we can load -/// constant vectors. Thus, we test to see if the upper and lower bytes are the -/// same value. -SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - int Value = (int) CN->getZExtValue(); - if (ValueType == MVT::i16 - && Value <= 0xffff /* truncated from uint64_t */ - && ((short) Value >> 8) == ((short) Value & 0xff)) - return DAG.getTargetConstant(Value & 0xff, ValueType); - else if (ValueType == MVT::i8 - && (Value & 0xff) == Value) - return DAG.getTargetConstant(Value, ValueType); - } - - return SDValue(); -} - -/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value -/// and the value fits into a signed 16-bit constant, and if so, return the -/// constant -SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType) { - if (ConstantSDNode *CN = getVecImm(N)) { - uint64_t Value = CN->getZExtValue(); - if ((ValueType == MVT::i32 - && ((unsigned) Value & 0xffff0000) == (unsigned) Value) - || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) - return DAG.getTargetConstant(Value >> 16, ValueType); - } - - return SDValue(); -} - -/// get_v4i32_imm - Catch-all for general 32-bit constant vectors -SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { - if (ConstantSDNode *CN = getVecImm(N)) { - return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); - } - - return SDValue(); -} - -/// get_v4i32_imm - Catch-all for general 64-bit constant vectors -SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { - if (ConstantSDNode *CN = getVecImm(N)) { - return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); - } - - return SDValue(); -} - -//! Lower a BUILD_VECTOR instruction creatively: -static SDValue -LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = Op.getDebugLoc(); - BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); - assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); - unsigned minSplatBits = EltVT.getSizeInBits(); - - if (minSplatBits < 16) - minSplatBits = 16; - - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, minSplatBits) - || minSplatBits < SplatBitSize) - return SDValue(); // Wasn't a constant vector or splat exceeded min - - uint64_t SplatBits = APSplatBits.getZExtValue(); - - switch (VT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + - Twine(VT.getEVTString())); - /*NOTREACHED*/ - case MVT::v4f32: { - uint32_t Value32 = uint32_t(SplatBits); - assert(SplatBitSize == 32 - && "LowerBUILD_VECTOR: Unexpected floating point vector element."); - // NOTE: pretend the constant is an integer. LLVM won't load FP constants - SDValue T = DAG.getConstant(Value32, MVT::i32); - return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); - } - case MVT::v2f64: { - uint64_t f64val = uint64_t(SplatBits); - assert(SplatBitSize == 64 - && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); - // NOTE: pretend the constant is an integer. LLVM won't load FP constants - SDValue T = DAG.getConstant(f64val, MVT::i64); - return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); - } - case MVT::v16i8: { - // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; - SmallVector Ops; - - Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); - } - case MVT::v8i16: { - unsigned short Value16 = SplatBits; - SDValue T = DAG.getConstant(Value16, EltVT); - SmallVector Ops; - - Ops.assign(8, T); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); - } - case MVT::v4i32: { - SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); - } - case MVT::v2i64: { - return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); - } - } -} - -/*! - */ -SDValue -SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, - DebugLoc dl) { - uint32_t upper = uint32_t(SplatVal >> 32); - uint32_t lower = uint32_t(SplatVal); - - if (upper == lower) { - // Magic constant that can be matched by IL, ILA, et. al. - SDValue Val = DAG.getTargetConstant(upper, MVT::i32); - return DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Val, Val, Val, Val)); - } else { - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); - lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); - - // Both upper and lower are special, lower to a constant pool load: - if (lower_special && upper_special) { - SDValue UpperVal = DAG.getConstant(upper, MVT::i32); - SDValue LowerVal = DAG.getConstant(lower, MVT::i32); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - UpperVal, LowerVal, UpperVal, LowerVal); - return DAG.getNode(ISD::BITCAST, dl, OpVT, BV); - } - - SDValue LO32; - SDValue HI32; - SmallVector ShufBytes; - SDValue Result; - - // Create lower vector if not a special pattern - if (!lower_special) { - SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } - - // Create upper vector if not a special pattern - if (!upper_special) { - SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } - - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - - for (int i = 0; i < 4; ++i) { - uint64_t val = 0; - for (int j = 0; j < 4; ++j) { - SDValue V; - bool process_upper, process_lower; - val <<= 8; - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val |= 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val |= 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val |= (j == 0 ? 0xe0 : 0x80); - } else - val |= i * 4 + j + ((i & 1) * 16); - } - - ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); - } - - return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufBytes[0], ShufBytes.size())); - } -} - -/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on -/// which the Cell can operate. The code inspects V3 to ascertain whether the -/// permutation vector, V3, is monotonically increasing with one "exception" -/// element, e.g., (0, 1, _, 3). If this is the case, then generate a -/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. -/// In either case, the net result is going to eventually invoke SHUFB to -/// permute/shuffle the bytes from V1 and V2. -/// \note -/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate -/// control word for byte/halfword/word insertion. This takes care of a single -/// element move from V2 into V1. -/// \note -/// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. -static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - const ShuffleVectorSDNode *SVN = cast(Op); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - if (V2.getOpcode() == ISD::UNDEF) V2 = V1; - - // If we have a single element being moved from V1 to V2, this can be handled - // using the C*[DX] compute mask instructions, but the vector elements have - // to be monotonically increasing with one exception element, and the source - // slot of the element to move must be the same as the destination. - EVT VecVT = V1.getValueType(); - EVT EltVT = VecVT.getVectorElementType(); - unsigned EltsFromV2 = 0; - unsigned V2EltOffset = 0; - unsigned V2EltIdx0 = 0; - unsigned CurrElt = 0; - unsigned MaxElts = VecVT.getVectorNumElements(); - unsigned PrevElt = 0; - bool monotonic = true; - bool rotate = true; - int rotamt=0; - EVT maskVT; // which of the c?d instructions to use - - if (EltVT == MVT::i8) { - V2EltIdx0 = 16; - maskVT = MVT::v16i8; - } else if (EltVT == MVT::i16) { - V2EltIdx0 = 8; - maskVT = MVT::v8i16; - } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { - V2EltIdx0 = 4; - maskVT = MVT::v4i32; - } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { - V2EltIdx0 = 2; - maskVT = MVT::v2i64; - } else - llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); - - for (unsigned i = 0; i != MaxElts; ++i) { - if (SVN->getMaskElt(i) < 0) - continue; - - unsigned SrcElt = SVN->getMaskElt(i); - - if (monotonic) { - if (SrcElt >= V2EltIdx0) { - // TODO: optimize for the monotonic case when several consecutive - // elements are taken form V2. Do we ever get such a case? - if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) - V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); - else - monotonic = false; - ++EltsFromV2; - } else if (CurrElt != SrcElt) { - monotonic = false; - } - - ++CurrElt; - } - - if (rotate) { - if (PrevElt > 0 && SrcElt < MaxElts) { - if ((PrevElt == SrcElt - 1) - || (PrevElt == MaxElts - 1 && SrcElt == 0)) { - PrevElt = SrcElt; - } else { - rotate = false; - } - } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { - // First time or after a "wrap around" - rotamt = SrcElt-i; - PrevElt = SrcElt; - } else { - // This isn't a rotation, takes elements from vector 2 - rotate = false; - } - } - } - - if (EltsFromV2 == 1 && monotonic) { - // Compute mask and shuffle - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - // As SHUFFLE_MASK becomes a c?d instruction, feed it an address - // R1 ($sp) is used here only as it is guaranteed to have last bits zero - SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(V2EltOffset, MVT::i32)); - SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, - maskVT, Pointer); - - // Use shuffle mask in SHUFB synthetic instruction: - return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, - ShufMaskOp); - } else if (rotate) { - if (rotamt < 0) - rotamt +=MaxElts; - rotamt *= EltVT.getSizeInBits()/8; - return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), - V1, DAG.getConstant(rotamt, MVT::i16)); - } else { - // Convert the SHUFFLE_VECTOR mask's input element units to the - // actual bytes. - unsigned BytesPerElement = EltVT.getSizeInBits()/8; - - SmallVector ResultMask; - for (unsigned i = 0, e = MaxElts; i != e; ++i) { - unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); - - for (unsigned j = 0; j < BytesPerElement; ++j) - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); - } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, - &ResultMask[0], ResultMask.size()); - return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); - } -} - -static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { - SDValue Op0 = Op.getOperand(0); // Op0 = the scalar - DebugLoc dl = Op.getDebugLoc(); - - if (Op0.getNode()->getOpcode() == ISD::Constant) { - // For a constant, build the appropriate constant vector, which will - // eventually simplify to a vector register load. - - ConstantSDNode *CN = cast(Op0.getNode()); - SmallVector ConstVecValues; - EVT VT; - size_t n_copies; - - // Create a constant vector: - switch (Op.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected constant value type in " - "LowerSCALAR_TO_VECTOR"); - case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; - case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; - case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; - case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; - case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; - case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; - } - - SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); - for (size_t j = 0; j < n_copies; ++j) - ConstVecValues.push_back(CValue); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), - &ConstVecValues[0], ConstVecValues.size()); - } else { - // Otherwise, copy the value from one register to another: - switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::f32: - case MVT::f64: - return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); - } - } -} - -static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - SDValue N = Op.getOperand(0); - SDValue Elt = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - SDValue retval; - - if (ConstantSDNode *C = dyn_cast(Elt)) { - // Constant argument: - int EltNo = (int) C->getZExtValue(); - - // sanity checks: - if (VT == MVT::i8 && EltNo >= 16) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); - else if (VT == MVT::i16 && EltNo >= 8) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); - else if (VT == MVT::i32 && EltNo >= 4) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); - else if (VT == MVT::i64 && EltNo >= 2) - llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); - - if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { - // i32 and i64: Element 0 is the preferred slot - return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); - } - - // Need to generate shuffle mask and extract: - int prefslot_begin = -1, prefslot_end = -1; - int elt_byte = EltNo * VT.getSizeInBits() / 8; - - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid value type!"); - case MVT::i8: { - prefslot_begin = prefslot_end = 3; - break; - } - case MVT::i16: { - prefslot_begin = 2; prefslot_end = 3; - break; - } - case MVT::i32: - case MVT::f32: { - prefslot_begin = 0; prefslot_end = 3; - break; - } - case MVT::i64: - case MVT::f64: { - prefslot_begin = 0; prefslot_end = 7; - break; - } - } - - assert(prefslot_begin != -1 && prefslot_end != -1 && - "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); - - unsigned int ShufBytes[16] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - for (int i = 0; i < 16; ++i) { - // zero fill uppper part of preferred slot, don't care about the - // other slots: - unsigned int mask_val; - if (i <= prefslot_end) { - mask_val = - ((i < prefslot_begin) - ? 0x80 - : elt_byte + (i - prefslot_begin)); - - ShufBytes[i] = mask_val; - } else - ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; - } - - SDValue ShufMask[4]; - for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { - unsigned bidx = i * 4; - unsigned int bits = ((ShufBytes[bidx] << 24) | - (ShufBytes[bidx+1] << 16) | - (ShufBytes[bidx+2] << 8) | - ShufBytes[bidx+3]); - ShufMask[i] = DAG.getConstant(bits, MVT::i32); - } - - SDValue ShufMaskVec = - DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); - - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), - N, N, ShufMaskVec)); - } else { - // Variable index: Rotate the requested element into slot 0, then replicate - // slot 0 across the vector - EVT VecVT = N.getValueType(); - if (!VecVT.isSimple() || !VecVT.isVector()) { - report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" - "vector type!"); - } - - // Make life easier by making sure the index is zero-extended to i32 - if (Elt.getValueType() != MVT::i32) - Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); - - // Scale the index to a bit/byte shift quantity - APInt scaleFactor = - APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); - unsigned scaleShift = scaleFactor.logBase2(); - SDValue vecShift; - - if (scaleShift > 0) { - // Scale the shift factor: - Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, - DAG.getConstant(scaleShift, MVT::i32)); - } - - vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); - - // Replicate the bytes starting at byte 0 across the entire vector (for - // consistency with the notion of a unified register set) - SDValue replicate; - - switch (VT.getSimpleVT().SimpleTy) { - default: - report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" - "type"); - /*NOTREACHED*/ - case MVT::i8: { - SDValue factor = DAG.getConstant(0x00000000, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - factor, factor, factor, factor); - break; - } - case MVT::i16: { - SDValue factor = DAG.getConstant(0x00010001, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - factor, factor, factor, factor); - break; - } - case MVT::i32: - case MVT::f32: { - SDValue factor = DAG.getConstant(0x00010203, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - factor, factor, factor, factor); - break; - } - case MVT::i64: - case MVT::f64: { - SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); - SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); - replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - loFactor, hiFactor, loFactor, hiFactor); - break; - } - } - - retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(SPUISD::SHUFB, dl, VecVT, - vecShift, vecShift, replicate)); - } - - return retval; -} - -static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - SDValue VecOp = Op.getOperand(0); - SDValue ValOp = Op.getOperand(1); - SDValue IdxOp = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - EVT eltVT = ValOp.getValueType(); - - // use 0 when the lane to insert to is 'undef' - int64_t Offset=0; - if (IdxOp.getOpcode() != ISD::UNDEF) { - ConstantSDNode *CN = cast(IdxOp); - assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; - } - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Use $sp ($1) because it's always 16-byte aligned and it's available: - SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(Offset, PtrVT)); - // widen the mask when dealing with half vectors - EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), - 128/ VT.getVectorElementType().getSizeInBits()); - SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); - - SDValue result = - DAG.getNode(SPUISD::SHUFB, dl, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), - VecOp, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); - - return result; -} - -static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, - const TargetLowering &TLI) -{ - SDValue N0 = Op.getOperand(0); // Everything has at least one operand - DebugLoc dl = Op.getDebugLoc(); - EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); - - assert(Op.getValueType() == MVT::i8); - switch (Opc) { - default: - llvm_unreachable("Unhandled i8 math operator"); - case ISD::ADD: { - // 8-bit addition: Promote the arguments up to 16-bits and truncate - // the result: - SDValue N1 = Op.getOperand(1); - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - - } - - case ISD::SUB: { - // 8-bit subtraction: Promote the arguments up to 16-bits and truncate - // the result: - SDValue N1 = Op.getOperand(1); - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - case ISD::ROTR: - case ISD::ROTL: { - SDValue N1 = Op.getOperand(1); - EVT N1VT = N1.getValueType(); - - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); - if (!N1VT.bitsEq(ShiftVT)) { - unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) - ? ISD::ZERO_EXTEND - : ISD::TRUNCATE; - N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); - } - - // Replicate lower 8-bits into upper 8: - SDValue ExpandArg = - DAG.getNode(ISD::OR, dl, MVT::i16, N0, - DAG.getNode(ISD::SHL, dl, MVT::i16, - N0, DAG.getConstant(8, MVT::i32))); - - // Truncate back down to i8 - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); - } - case ISD::SRL: - case ISD::SHL: { - SDValue N1 = Op.getOperand(1); - EVT N1VT = N1.getValueType(); - - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); - if (!N1VT.bitsEq(ShiftVT)) { - unsigned N1Opc = ISD::ZERO_EXTEND; - - if (N1.getValueType().bitsGT(ShiftVT)) - N1Opc = ISD::TRUNCATE; - - N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); - } - - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - case ISD::SRA: { - SDValue N1 = Op.getOperand(1); - EVT N1VT = N1.getValueType(); - - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - if (!N1VT.bitsEq(ShiftVT)) { - unsigned N1Opc = ISD::SIGN_EXTEND; - - if (N1VT.bitsGT(ShiftVT)) - N1Opc = ISD::TRUNCATE; - N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); - } - - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - case ISD::MUL: { - SDValue N1 = Op.getOperand(1); - - N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); - N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, - DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - } - } -} - -//! Lower byte immediate operations for v16i8 vectors: -static SDValue -LowerByteImmed(SDValue Op, SelectionDAG &DAG) { - SDValue ConstVec; - SDValue Arg; - EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - - ConstVec = Op.getOperand(0); - Arg = Op.getOperand(1); - if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { - if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { - ConstVec = ConstVec.getOperand(0); - } else { - ConstVec = Op.getOperand(1); - Arg = Op.getOperand(0); - if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { - ConstVec = ConstVec.getOperand(0); - } - } - } - - if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { - BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); - assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); - - APInt APSplatBits, APSplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); - - if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs, minSplatBits) - && minSplatBits <= SplatBitSize) { - uint64_t SplatBits = APSplatBits.getZExtValue(); - SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - - SmallVector tcVec; - tcVec.assign(16, tc); - return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); - } - } - - // These operations (AND, OR, XOR) are legal, they just couldn't be custom - // lowered. Return the operation, rather than a null SDValue. - return Op; -} - -//! Custom lowering for CTPOP (count population) -/*! - Custom lowering code that counts the number ones in the input - operand. SPU has such an instruction, but it counts the number of - ones per byte, which then have to be accumulated. -*/ -static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - VT, (128 / VT.getSizeInBits())); - DebugLoc dl = Op.getDebugLoc(); - - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid value type!"); - case MVT::i8: { - SDValue N = Op.getOperand(0); - SDValue Elt0 = DAG.getConstant(0, MVT::i32); - - SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); - } - - case MVT::i16: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); - - SDValue N = Op.getOperand(0); - SDValue Elt0 = DAG.getConstant(0, MVT::i16); - SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); - SDValue Shift1 = DAG.getConstant(8, MVT::i32); - - SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - - // CNTB_result becomes the chain to which all of the virtual registers - // CNTB_reg, SUM1_reg become associated: - SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); - - SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - - SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); - - return DAG.getNode(ISD::AND, dl, MVT::i16, - DAG.getNode(ISD::ADD, dl, MVT::i16, - DAG.getNode(ISD::SRL, dl, MVT::i16, - Tmp1, Shift1), - Tmp1), - Mask0); - } - - case MVT::i32: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - - SDValue N = Op.getOperand(0); - SDValue Elt0 = DAG.getConstant(0, MVT::i32); - SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); - SDValue Shift1 = DAG.getConstant(16, MVT::i32); - SDValue Shift2 = DAG.getConstant(8, MVT::i32); - - SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); - SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - - // CNTB_result becomes the chain to which all of the virtual registers - // CNTB_reg, SUM1_reg become associated: - SDValue CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); - - SDValue CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - - SDValue Comp1 = - DAG.getNode(ISD::SRL, dl, MVT::i32, - DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), - Shift1); - - SDValue Sum1 = - DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, - DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); - - SDValue Sum1_rescopy = - DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); - - SDValue Comp2 = - DAG.getNode(ISD::SRL, dl, MVT::i32, - DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), - Shift2); - SDValue Sum2 = - DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, - DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); - - return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); - } - - case MVT::i64: - break; - } - - return SDValue(); -} - -//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 -/*! - f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. - All conversions to i64 are expanded to a libcall. - */ -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - const SPUTargetLowering &TLI) { - EVT OpVT = Op.getValueType(); - SDValue Op0 = Op.getOperand(0); - EVT Op0VT = Op0.getValueType(); - - if ((OpVT == MVT::i32 && Op0VT == MVT::f64) - || OpVT == MVT::i64) { - // Convert f32 / f64 to i32 / i64 via libcall. - RTLIB::Libcall LC = - (Op.getOpcode() == ISD::FP_TO_SINT) - ? RTLIB::getFPTOSINT(Op0VT, OpVT) - : RTLIB::getFPTOUINT(Op0VT, OpVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); - SDValue Dummy; - return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); - } - - return Op; -} - -//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 -/*! - i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. - All conversions from i64 are expanded to a libcall. - */ -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - const SPUTargetLowering &TLI) { - EVT OpVT = Op.getValueType(); - SDValue Op0 = Op.getOperand(0); - EVT Op0VT = Op0.getValueType(); - - if ((OpVT == MVT::f64 && Op0VT == MVT::i32) - || Op0VT == MVT::i64) { - // Convert i32, i64 to f64 via libcall: - RTLIB::Libcall LC = - (Op.getOpcode() == ISD::SINT_TO_FP) - ? RTLIB::getSINTTOFP(Op0VT, OpVT) - : RTLIB::getUINTTOFP(Op0VT, OpVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); - SDValue Dummy; - return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); - } - - return Op; -} - -//! Lower ISD::SETCC -/*! - This handles MVT::f64 (double floating point) condition lowering - */ -static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { - CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); - DebugLoc dl = Op.getDebugLoc(); - assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); - - SDValue lhs = Op.getOperand(0); - SDValue rhs = Op.getOperand(1); - EVT lhsVT = lhs.getValueType(); - assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); - - EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); - APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); - EVT IntVT(MVT::i64); - - // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently - // selected to a NOP: - SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); - SDValue lhsHi32 = - DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::SRL, dl, IntVT, - i64lhs, DAG.getConstant(32, MVT::i32))); - SDValue lhsHi32abs = - DAG.getNode(ISD::AND, dl, MVT::i32, - lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); - SDValue lhsLo32 = - DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); - - // SETO and SETUO only use the lhs operand: - if (CC->get() == ISD::SETO) { - // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of - // SETUO - APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); - return DAG.getNode(ISD::XOR, dl, ccResultVT, - DAG.getSetCC(dl, ccResultVT, - lhs, DAG.getConstantFP(0.0, lhsVT), - ISD::SETUO), - DAG.getConstant(ccResultAllOnes, ccResultVT)); - } else if (CC->get() == ISD::SETUO) { - // Evaluates to true if Op0 is [SQ]NaN - return DAG.getNode(ISD::AND, dl, ccResultVT, - DAG.getSetCC(dl, ccResultVT, - lhsHi32abs, - DAG.getConstant(0x7ff00000, MVT::i32), - ISD::SETGE), - DAG.getSetCC(dl, ccResultVT, - lhsLo32, - DAG.getConstant(0, MVT::i32), - ISD::SETGT)); - } - - SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); - SDValue rhsHi32 = - DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, - DAG.getNode(ISD::SRL, dl, IntVT, - i64rhs, DAG.getConstant(32, MVT::i32))); - - // If a value is negative, subtract from the sign magnitude constant: - SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); - - // Convert the sign-magnitude representation into 2's complement: - SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, - lhsHi32, DAG.getConstant(31, MVT::i32)); - SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); - SDValue lhsSelect = - DAG.getNode(ISD::SELECT, dl, IntVT, - lhsSelectMask, lhsSignMag2TC, i64lhs); - - SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, - rhsHi32, DAG.getConstant(31, MVT::i32)); - SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); - SDValue rhsSelect = - DAG.getNode(ISD::SELECT, dl, IntVT, - rhsSelectMask, rhsSignMag2TC, i64rhs); - - unsigned compareOp; - - switch (CC->get()) { - case ISD::SETOEQ: - case ISD::SETUEQ: - compareOp = ISD::SETEQ; break; - case ISD::SETOGT: - case ISD::SETUGT: - compareOp = ISD::SETGT; break; - case ISD::SETOGE: - case ISD::SETUGE: - compareOp = ISD::SETGE; break; - case ISD::SETOLT: - case ISD::SETULT: - compareOp = ISD::SETLT; break; - case ISD::SETOLE: - case ISD::SETULE: - compareOp = ISD::SETLE; break; - case ISD::SETUNE: - case ISD::SETONE: - compareOp = ISD::SETNE; break; - default: - report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); - } - - SDValue result = - DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, - (ISD::CondCode) compareOp); - - if ((CC->get() & 0x8) == 0) { - // Ordered comparison: - SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, - lhs, DAG.getConstantFP(0.0, MVT::f64), - ISD::SETO); - SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, - rhs, DAG.getConstantFP(0.0, MVT::f64), - ISD::SETO); - SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); - - result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); - } - - return result; -} - -//! Lower ISD::SELECT_CC -/*! - ISD::SELECT_CC can (generally) be implemented directly on the SPU using the - SELB instruction. - - \note Need to revisit this in the future: if the code path through the true - and false value computations is longer than the latency of a branch (6 - cycles), then it would be more advantageous to branch and insert a new basic - block and branch on the condition. However, this code does not make that - assumption, given the simplisitc uses so far. - */ - -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { - EVT VT = Op.getValueType(); - SDValue lhs = Op.getOperand(0); - SDValue rhs = Op.getOperand(1); - SDValue trueval = Op.getOperand(2); - SDValue falseval = Op.getOperand(3); - SDValue condition = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); - - // NOTE: SELB's arguments: $rA, $rB, $mask - // - // SELB selects bits from $rA where bits in $mask are 0, bits from $rB - // where bits in $mask are 1. CCond will be inverted, having 1s where the - // condition was true and 0s where the condition was false. Hence, the - // arguments to SELB get reversed. - - // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's - // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up - // with another "cannot select select_cc" assert: - - SDValue compare = DAG.getNode(ISD::SETCC, dl, - TLI.getSetCCResultType(Op.getValueType()), - lhs, rhs, condition); - return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); -} - -//! Custom lower ISD::TRUNCATE -static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) -{ - // Type to truncate to - EVT VT = Op.getValueType(); - MVT simpleVT = VT.getSimpleVT(); - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), - VT, (128 / VT.getSizeInBits())); - DebugLoc dl = Op.getDebugLoc(); - - // Type to truncate from - SDValue Op0 = Op.getOperand(0); - EVT Op0VT = Op0.getValueType(); - - if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { - // Create shuffle mask, least significant doubleword of quadword - unsigned maskHigh = 0x08090a0b; - unsigned maskLow = 0x0c0d0e0f; - // Use a shuffle to perform the truncation - SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32), - DAG.getConstant(maskHigh, MVT::i32), - DAG.getConstant(maskLow, MVT::i32)); - - SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, - Op0, Op0, shufMask); - - return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); - } - - return SDValue(); // Leave the truncate unmolested -} - -/*! - * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic - * algorithm is to duplicate the sign bit using rotmai to generate at - * least one byte full of sign bits. Then propagate the "sign-byte" into - * the leftmost words and the i64/i32 into the rightmost words using shufb. - * - * @param Op The sext operand - * @param DAG The current DAG - * @return The SDValue with the entire instruction sequence - */ -static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) -{ - DebugLoc dl = Op.getDebugLoc(); - - // Type to extend to - MVT OpVT = Op.getValueType().getSimpleVT(); - - // Type to extend from - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType().getSimpleVT(); - - // extend i8 & i16 via i32 - if (Op0VT == MVT::i8 || Op0VT == MVT::i16) { - Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0); - Op0VT = MVT::i32; - } - - // The type to extend to needs to be a i128 and - // the type to extend from needs to be i64 or i32. - assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && - "LowerSIGN_EXTEND: input and/or output operand have wrong size"); - (void)OpVT; - - // Create shuffle mask - unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 - unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 - unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 - SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - DAG.getConstant(mask1, MVT::i32), - DAG.getConstant(mask1, MVT::i32), - DAG.getConstant(mask2, MVT::i32), - DAG.getConstant(mask3, MVT::i32)); - - // Word wise arithmetic right shift to generate at least one byte - // that contains sign bits. - MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; - SDValue sraVal = DAG.getNode(ISD::SRA, - dl, - mvt, - DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), - DAG.getConstant(31, MVT::i32)); - - // reinterpret as a i128 (SHUFB requires it). This gets lowered away. - SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - dl, Op0VT, Op0, - DAG.getTargetConstant( - SPU::GPRCRegClass.getID(), - MVT::i32)), 0); - // Shuffle bytes - Copy the sign bits into the upper 64 bits - // and the input value into the lower 64 bits. - SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, - extended, sraVal, shufMask); - return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); -} - -//! Custom (target-specific) lowering entry point -/*! - This is where LLVM's DAG selection process calls to do target-specific - lowering of nodes. - */ -SDValue -SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const -{ - unsigned Opc = (unsigned) Op.getOpcode(); - EVT VT = Op.getValueType(); - - switch (Opc) { - default: { -#ifndef NDEBUG - errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; - errs() << "Op.getOpcode() = " << Opc << "\n"; - errs() << "*Op.getNode():\n"; - Op.getNode()->dump(); -#endif - llvm_unreachable(0); - } - case ISD::LOAD: - case ISD::EXTLOAD: - case ISD::SEXTLOAD: - case ISD::ZEXTLOAD: - return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::STORE: - return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::ConstantPool: - return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::JumpTable: - return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::ConstantFP: - return LowerConstantFP(Op, DAG); - - // i8, i64 math ops: - case ISD::ADD: - case ISD::SUB: - case ISD::ROTR: - case ISD::ROTL: - case ISD::SRL: - case ISD::SHL: - case ISD::SRA: { - if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc, *this); - break; - } - - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - return LowerFP_TO_INT(Op, DAG, *this); - - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return LowerINT_TO_FP(Op, DAG, *this); - - // Vector-related lowering. - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG); - case ISD::SCALAR_TO_VECTOR: - return LowerSCALAR_TO_VECTOR(Op, DAG); - case ISD::VECTOR_SHUFFLE: - return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: - return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return LowerINSERT_VECTOR_ELT(Op, DAG); - - // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - return LowerByteImmed(Op, DAG); - - // Vector and i8 multiply: - case ISD::MUL: - if (VT == MVT::i8) - return LowerI8Math(Op, DAG, Opc, *this); - - case ISD::CTPOP: - return LowerCTPOP(Op, DAG); - - case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG, *this); - - case ISD::SETCC: - return LowerSETCC(Op, DAG, *this); - - case ISD::TRUNCATE: - return LowerTRUNCATE(Op, DAG); - - case ISD::SIGN_EXTEND: - return LowerSIGN_EXTEND(Op, DAG); - } - - return SDValue(); -} - -void SPUTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl&Results, - SelectionDAG &DAG) const -{ -#if 0 - unsigned Opc = (unsigned) N->getOpcode(); - EVT OpVT = N->getValueType(0); - - switch (Opc) { - default: { - errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; - errs() << "Op.getOpcode() = " << Opc << "\n"; - errs() << "*Op.getNode():\n"; - N->dump(); - abort(); - /*NOTREACHED*/ - } - } -#endif - - /* Otherwise, return unchanged */ -} - -//===----------------------------------------------------------------------===// -// Target Optimization Hooks -//===----------------------------------------------------------------------===// - -SDValue -SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const -{ -#if 0 - TargetMachine &TM = getTargetMachine(); -#endif - const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); - SelectionDAG &DAG = DCI.DAG; - SDValue Op0 = N->getOperand(0); // everything has at least one operand - EVT NodeVT = N->getValueType(0); // The node's value type - EVT Op0VT = Op0.getValueType(); // The first operand's result - SDValue Result; // Initially, empty result - DebugLoc dl = N->getDebugLoc(); - - switch (N->getOpcode()) { - default: break; - case ISD::ADD: { - SDValue Op1 = N->getOperand(1); - - if (Op0.getOpcode() == SPUISD::IndirectAddr - || Op1.getOpcode() == SPUISD::IndirectAddr) { - // Normalize the operands to reduce repeated code - SDValue IndirectArg = Op0, AddArg = Op1; - - if (Op1.getOpcode() == SPUISD::IndirectAddr) { - IndirectArg = Op1; - AddArg = Op0; - } - - if (isa(AddArg)) { - ConstantSDNode *CN0 = cast (AddArg); - SDValue IndOp1 = IndirectArg.getOperand(1); - - if (CN0->isNullValue()) { - // (add (SPUindirect , ), 0) -> - // (SPUindirect , ) - -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\n" - << "Replace: (add (SPUindirect , ), 0)\n" - << "With: (SPUindirect , )\n"; - } -#endif - - return IndirectArg; - } else if (isa(IndOp1)) { - // (add (SPUindirect , ), ) -> - // (SPUindirect , ) - ConstantSDNode *CN1 = cast (IndOp1); - int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); - SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); - -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\n" - << "Replace: (add (SPUindirect , " << CN1->getSExtValue() - << "), " << CN0->getSExtValue() << ")\n" - << "With: (SPUindirect , " - << combinedConst << ")\n"; - } -#endif - - return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, - IndirectArg, combinedValue); - } - } - } - break; - } - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: { - if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { - // (any_extend (SPUextract_elt0 )) -> - // (SPUextract_elt0 ) - // Types must match, however... -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\nReplace: "; - N->dump(&DAG); - errs() << "\nWith: "; - Op0.getNode()->dump(&DAG); - errs() << "\n"; - } -#endif - - return Op0; - } - break; - } - case SPUISD::IndirectAddr: { - if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { - ConstantSDNode *CN = dyn_cast(N->getOperand(1)); - if (CN != 0 && CN->isNullValue()) { - // (SPUindirect (SPUaform , 0), 0) -> - // (SPUaform , 0) - - DEBUG(errs() << "Replace: "); - DEBUG(N->dump(&DAG)); - DEBUG(errs() << "\nWith: "); - DEBUG(Op0.getNode()->dump(&DAG)); - DEBUG(errs() << "\n"); - - return Op0; - } - } else if (Op0.getOpcode() == ISD::ADD) { - SDValue Op1 = N->getOperand(1); - if (ConstantSDNode *CN1 = dyn_cast(Op1)) { - // (SPUindirect (add , ), 0) -> - // (SPUindirect , ) - if (CN1->isNullValue()) { - -#if !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - errs() << "\n" - << "Replace: (SPUindirect (add , ), 0)\n" - << "With: (SPUindirect , )\n"; - } -#endif - - return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, - Op0.getOperand(0), Op0.getOperand(1)); - } - } - } - break; - } - case SPUISD::SHL_BITS: - case SPUISD::SHL_BYTES: - case SPUISD::ROTBYTES_LEFT: { - SDValue Op1 = N->getOperand(1); - - // Kill degenerate vector shifts: - if (ConstantSDNode *CN = dyn_cast(Op1)) { - if (CN->isNullValue()) { - Result = Op0; - } - } - break; - } - case SPUISD::PREFSLOT2VEC: { - switch (Op0.getOpcode()) { - default: - break; - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: { - // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> - // - // but only if the SPUprefslot2vec and types match. - SDValue Op00 = Op0.getOperand(0); - if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { - SDValue Op000 = Op00.getOperand(0); - if (Op000.getValueType() == NodeVT) { - Result = Op000; - } - } - break; - } - case SPUISD::VEC2PREFSLOT: { - // (SPUprefslot2vec (SPUvec2prefslot )) -> - // - Result = Op0.getOperand(0); - break; - } - } - break; - } - } - - // Otherwise, return unchanged. -#ifndef NDEBUG - if (Result.getNode()) { - DEBUG(errs() << "\nReplace.SPU: "); - DEBUG(N->dump(&DAG)); - DEBUG(errs() << "\nWith: "); - DEBUG(Result.getNode()->dump(&DAG)); - DEBUG(errs() << "\n"); - } -#endif - - return Result; -} - -//===----------------------------------------------------------------------===// -// Inline Assembly Support -//===----------------------------------------------------------------------===// - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -SPUTargetLowering::ConstraintType -SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { - if (ConstraintLetter.size() == 1) { - switch (ConstraintLetter[0]) { - default: break; - case 'b': - case 'r': - case 'f': - case 'v': - case 'y': - return C_RegisterClass; - } - } - return TargetLowering::getConstraintType(ConstraintLetter); -} - -/// Examine constraint type and operand type and determine a weight value. -/// This object must already have been set up with the operand type -/// and the current alternative constraint selected. -TargetLowering::ConstraintWeight -SPUTargetLowering::getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const { - ConstraintWeight weight = CW_Invalid; - Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. - if (CallOperandVal == NULL) - return CW_Default; - // Look at the constraint type. - switch (*constraint) { - default: - weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; - //FIXME: Seems like the supported constraint letters were just copied - // from PPC, as the following doesn't correspond to the GCC docs. - // I'm leaving it so until someone adds the corresponding lowering support. - case 'b': - case 'r': - case 'f': - case 'd': - case 'v': - case 'y': - weight = CW_Register; - break; - } - return weight; -} - -std::pair -SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const -{ - if (Constraint.size() == 1) { - // GCC RS6000 Constraint Letters - switch (Constraint[0]) { - case 'b': // R1-R31 - case 'r': // R0-R31 - if (VT == MVT::i64) - return std::make_pair(0U, &SPU::R64CRegClass); - return std::make_pair(0U, &SPU::R32CRegClass); - case 'f': - if (VT == MVT::f32) - return std::make_pair(0U, &SPU::R32FPRegClass); - if (VT == MVT::f64) - return std::make_pair(0U, &SPU::R64FPRegClass); - break; - case 'v': - return std::make_pair(0U, &SPU::GPRCRegClass); - } - } - - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -//! Compute used/known bits for a SPU operand -void -SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth ) const { -#if 0 - const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; - - switch (Op.getOpcode()) { - default: - // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); - break; - case CALL: - case SHUFB: - case SHUFFLE_MASK: - case CNTB: - case SPUISD::PREFSLOT2VEC: - case SPUISD::LDRESULT: - case SPUISD::VEC2PREFSLOT: - case SPUISD::SHLQUAD_L_BITS: - case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_ROTL: - case SPUISD::VEC_ROTR: - case SPUISD::ROTBYTES_LEFT: - case SPUISD::SELECT_MASK: - case SPUISD::SELB: - } -#endif -} - -unsigned -SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, - unsigned Depth) const { - switch (Op.getOpcode()) { - default: - return 1; - - case ISD::SETCC: { - EVT VT = Op.getValueType(); - - if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { - VT = MVT::i32; - } - return VT.getSizeInBits(); - } - } -} - -// LowerAsmOperandForConstraint -void -SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const { - // Default, for the time being, to the base class handler - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -/// isLegalAddressImmediate - Return true if the integer value can be used -/// as the offset of the target addressing mode. -bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, - Type *Ty) const { - // SPU's addresses are 256K: - return (V > -(1 << 18) && V < (1 << 18) - 1); -} - -bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { - return false; -} - -bool -SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The SPU target isn't yet aware of offsets. - return false; -} - -// can we compare to Imm without writing it into a register? -bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { - //ceqi, cgti, etc. all take s10 operand - return isInt<10>(Imm); -} - -bool -SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type * ) const{ - - // A-form: 18bit absolute address. - if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) - return true; - - // D-form: reg + 14bit offset - if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) - return true; - - // X-form: reg+reg - if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) - return true; - - return false; -} diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h deleted file mode 100644 index 9f1599fa6f..0000000000 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ /dev/null @@ -1,178 +0,0 @@ -//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that Cell SPU uses to lower LLVM code into -// a selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_ISELLOWERING_H -#define SPU_ISELLOWERING_H - -#include "SPU.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/CodeGen/SelectionDAG.h" - -namespace llvm { - namespace SPUISD { - enum NodeType { - // Start the numbering where the builting ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // Pseudo instructions: - RET_FLAG, ///< Return with flag, matched by bi instruction - - Hi, ///< High address component (upper 16) - Lo, ///< Low address component (lower 16) - PCRelAddr, ///< Program counter relative address - AFormAddr, ///< A-form address (local store) - IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)" - - LDRESULT, ///< Load result (value, chain) - CALL, ///< CALL instruction - SHUFB, ///< Vector shuffle (permute) - SHUFFLE_MASK, ///< Shuffle mask - CNTB, ///< Count leading ones in bytes - PREFSLOT2VEC, ///< Promote scalar->vector - VEC2PREFSLOT, ///< Extract element 0 - SHL_BITS, ///< Shift quad left, by bits - SHL_BYTES, ///< Shift quad left, by bytes - SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros. - VEC_ROTL, ///< Vector rotate left - VEC_ROTR, ///< Vector rotate right - ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) - ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count - SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) - SELB, ///< Select bits -> (b & mask) | (a & ~mask) - // Markers: These aren't used to generate target-dependent nodes, but - // are used during instruction selection. - ADD64_MARKER, ///< i64 addition marker - SUB64_MARKER, ///< i64 subtraction marker - MUL64_MARKER, ///< i64 multiply marker - LAST_SPUISD ///< Last user-defined instruction - }; - } - - //! Utility functions specific to CellSPU: - namespace SPU { - SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - EVT ValueType); - SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); - SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); - - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, - const SPUTargetMachine &TM); - //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form - SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat, - DebugLoc dl); - } - - class SPUTargetMachine; // forward dec'l. - - class SPUTargetLowering : - public TargetLowering - { - SPUTargetMachine &SPUTM; - - public: - //! The venerable constructor - /*! - This is where the CellSPU backend sets operation handling (i.e., legal, - custom, expand or promote.) - */ - SPUTargetLowering(SPUTargetMachine &TM); - - //! Get the target machine - SPUTargetMachine &getSPUTargetMachine() { - return SPUTM; - } - - /// getTargetNodeName() - This method returns the name of a target specific - /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; - - /// getSetCCResultType - Return the ValueType for ISD::SETCC - virtual EVT getSetCCResultType(EVT VT) const; - - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } - - //! Custom lowering hooks - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - - //! Custom lowering hook for nodes with illegal result types. - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, - SelectionDAG &DAG) const; - - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; - - virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, - unsigned Depth = 0) const; - - ConstraintType getConstraintType(const std::string &ConstraintLetter) const; - - /// Examine constraint string and operand type and determine a weight value. - /// The operand object must already have been set up with the operand type. - ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - - void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; - - /// isLegalAddressImmediate - Return true if the integer value can be used - /// as the offset of the target addressing mode. - virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; - virtual bool isLegalAddressImmediate(GlobalValue *) const; - - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - - virtual bool isLegalICmpImmediate(int64_t Imm) const; - - virtual bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const; - }; -} - -#endif diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h deleted file mode 100644 index b495537fc2..0000000000 --- a/lib/Target/CellSPU/SPUInstrBuilder.h +++ /dev/null @@ -1,43 +0,0 @@ -//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file exposes functions that may be used with BuildMI from the -// MachineInstrBuilder.h file to simplify generating frame and constant pool -// references. -// -// For reference, the order of operands for memory references is: -// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate -// Displacement. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_INSTRBUILDER_H -#define SPU_INSTRBUILDER_H - -#include "llvm/CodeGen/MachineInstrBuilder.h" - -namespace llvm { - -/// addFrameReference - This function is used to add a reference to the base of -/// an abstract object on the stack frame of the current function. This -/// reference has base register as the FrameIndex offset until it is resolved. -/// This allows a constant offset to be specified as well... -/// -inline const MachineInstrBuilder& -addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, - bool mem = true) { - if (mem) - return MIB.addImm(Offset).addFrameIndex(FI); - else - return MIB.addFrameIndex(FI).addImm(Offset); -} - -} // End llvm namespace - -#endif diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td deleted file mode 100644 index cd3f422143..0000000000 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ /dev/null @@ -1,320 +0,0 @@ -//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// -// Cell SPU instruction formats. Note that these are notationally similar to -// PowerPC, like "A-Form". But the sizes of operands and fields differ. - -// This was kiped from the PPC instruction formats (seemed like a good idea...) - -class SPUInstr - : Instruction { - field bits<32> Inst; - - let Namespace = "SPU"; - let OutOperandList = OOL; - let InOperandList = IOL; - let AsmString = asmstr; - let Itinerary = itin; -} - -// RR Format -class RRForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr { - bits<7> RA; - bits<7> RB; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-10} = opcode; - let Inst{11-17} = RB; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -let RB = 0 in { - // RR Format, where RB is zeroed (dont care): - class RRForm_1 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } - - let RA = 0 in { - // RR Format, where RA and RB are zeroed (dont care): - // Used for reads from status control registers (see FPSCRRr32) - class RRForm_2 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } - } -} - -let RT = 0 in { - // RR Format, where RT is zeroed (don't care), or as the instruction handbook - // says, "RT is a false target." Used in "Halt if" instructions - class RRForm_3 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } -} - -// RRR Format -class RRRForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<7> RA; - bits<7> RB; - bits<7> RC; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-3} = opcode; - let Inst{4-10} = RT; - let Inst{11-17} = RB; - let Inst{18-24} = RA; - let Inst{25-31} = RC; -} - -// RI7 Format -class RI7Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<7> i7; - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-10} = opcode; - let Inst{11-17} = i7; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -// CVTIntFp Format -class CVTIntFPForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-9} = opcode; - let Inst{10-17} = 0; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -let RA = 0 in { - class BICondForm opcode, dag OOL, dag IOL, string asmstr, list pattern> - : RRForm - { } - - let RT = 0 in { - // Branch instruction format (without D/E flag settings) - class BRForm opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RRForm - { } - - class BIForm opcode, string asmstr, list pattern> - : RRForm - { } - - let RB = 0 in { - // Return instruction (bi, branch indirect), RA is zero (LR): - class RETForm pattern> - : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv, - pattern> - { } - } - } -} - -// Branch indirect external data forms: -class BISLEDForm DE_flag, string asmstr, list pattern> - : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv> -{ - bits<7> Rcalldest; - - let Pattern = pattern; - - let Inst{0-10} = 0b11010101100; - let Inst{11} = 0; - let Inst{12-13} = DE_flag; - let Inst{14-17} = 0b0000; - let Inst{18-24} = Rcalldest; - let Inst{25-31} = 0b0000000; -} - -// RI10 Format -class RI10Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<10> i10; - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-7} = opcode; - let Inst{8-17} = i10; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -// RI10 Format, where the constant is zero (or effectively ignored by the -// SPU) -let i10 = 0 in { - class RI10Form_1 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RI10Form - { } -} - -// RI10 Format, where RT is ignored. -// This format is used primarily by the Halt If ... Immediate set of -// instructions -let RT = 0 in { - class RI10Form_2 opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : RI10Form - { } -} - -// RI16 Format -class RI16Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<16> i16; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-8} = opcode; - let Inst{9-24} = i16; - let Inst{25-31} = RT; -} - -// Specialized version of the RI16 Format for unconditional branch relative and -// branch absolute, branch and set link. Note that for branch and set link, the -// link register doesn't have to be $lr, but this is actually hard coded into -// the instruction pattern. - -let RT = 0 in { - class UncondBranch opcode, dag OOL, dag IOL, string asmstr, - list pattern> - : RI16Form - { } - - class BranchSetLink opcode, dag OOL, dag IOL, string asmstr, - list pattern> - : RI16Form - { } -} - -//===----------------------------------------------------------------------===// -// Specialized versions of RI16: -//===----------------------------------------------------------------------===// - -// RI18 Format -class RI18Form opcode, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list pattern> - : SPUInstr -{ - bits<18> i18; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-6} = opcode; - let Inst{7-24} = i18; - let Inst{25-31} = RT; -} - -//===----------------------------------------------------------------------===// -// Instruction formats for intrinsics: -//===----------------------------------------------------------------------===// - -// RI10 Format for v8i16 intrinsics -class RI10_Int_v8i16 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RI10Form; - -class RI10_Int_v4i32 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RI10Form; - -// RR Format for v8i16 intrinsics -class RR_Int_v8i16 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RRForm; - -// RR Format for v4i32 intrinsics -class RR_Int_v4i32 opcode, string opc, InstrItinClass itin, - Intrinsic IntID> : - RRForm; - -//===----------------------------------------------------------------------===// -// Pseudo instructions, like call frames: -//===----------------------------------------------------------------------===// - -class Pseudo pattern> - : SPUInstr { - let OutOperandList = OOL; - let InOperandList = IOL; - let AsmString = asmstr; - let Pattern = pattern; - let Inst{31-0} = 0; -} - -//===----------------------------------------------------------------------===// -// Branch hint formats -//===----------------------------------------------------------------------===// -// For hbrr and hbra -class HBI16Form opcode, dag IOL, string asmstr> - : Instruction { - field bits<32> Inst; - bits<16>i16; - bits<9>RO; - - let Namespace = "SPU"; - let InOperandList = IOL; - let OutOperandList = (outs); //no output - let AsmString = asmstr; - let Itinerary = BranchHints; - - let Inst{0-6} = opcode; - let Inst{7-8} = RO{8-7}; - let Inst{9-24} = i16; - let Inst{25-31} = RO{6-0}; -} diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp deleted file mode 100644 index b25a6397ec..0000000000 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ /dev/null @@ -1,449 +0,0 @@ -//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Cell SPU implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "SPUInstrInfo.h" -#include "SPUInstrBuilder.h" -#include "SPUTargetMachine.h" -#include "SPUHazardRecognizers.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -#define GET_INSTRINFO_CTOR -#include "SPUGenInstrInfo.inc" - -using namespace llvm; - -namespace { - //! Predicate for an unconditional branch instruction - inline bool isUncondBranch(const MachineInstr *I) { - unsigned opc = I->getOpcode(); - - return (opc == SPU::BR - || opc == SPU::BRA - || opc == SPU::BI); - } - - //! Predicate for a conditional branch instruction - inline bool isCondBranch(const MachineInstr *I) { - unsigned opc = I->getOpcode(); - - return (opc == SPU::BRNZr32 - || opc == SPU::BRNZv4i32 - || opc == SPU::BRZr32 - || opc == SPU::BRZv4i32 - || opc == SPU::BRHNZr16 - || opc == SPU::BRHNZv8i16 - || opc == SPU::BRHZr16 - || opc == SPU::BRHZv8i16); - } -} - -SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) - : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), - TM(tm), - RI(*TM.getSubtargetImpl(), *this) -{ /* NOP */ } - -/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for -/// this target when scheduling the DAG. -ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer( - const TargetMachine *TM, - const ScheduleDAG *DAG) const { - const TargetInstrInfo *TII = TM->getInstrInfo(); - assert(TII && "No InstrInfo?"); - return new SPUHazardRecognizer(*TII); -} - -unsigned -SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case SPU::LQDv16i8: - case SPU::LQDv8i16: - case SPU::LQDv4i32: - case SPU::LQDv4f32: - case SPU::LQDv2f64: - case SPU::LQDr128: - case SPU::LQDr64: - case SPU::LQDr32: - case SPU::LQDr16: { - const MachineOperand MOp1 = MI->getOperand(1); - const MachineOperand MOp2 = MI->getOperand(2); - if (MOp1.isImm() && MOp2.isFI()) { - FrameIndex = MOp2.getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - } - return 0; -} - -unsigned -SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case SPU::STQDv16i8: - case SPU::STQDv8i16: - case SPU::STQDv4i32: - case SPU::STQDv4f32: - case SPU::STQDv2f64: - case SPU::STQDr128: - case SPU::STQDr64: - case SPU::STQDr32: - case SPU::STQDr16: - case SPU::STQDr8: { - const MachineOperand MOp1 = MI->getOperand(1); - const MachineOperand MOp2 = MI->getOperand(2); - if (MOp1.isImm() && MOp2.isFI()) { - FrameIndex = MOp2.getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - } - return 0; -} - -void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const -{ - // We support cross register class moves for our aliases, such as R3 in any - // reg class to any other reg class containing R3. This is required because - // we instruction select bitconvert i64 -> f64 as a noop for example, so our - // types have no specific meaning. - - BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); -} - -void -SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - unsigned opc; - bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); - if (RC == &SPU::GPRCRegClass) - opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128; - else if (RC == &SPU::R64CRegClass) - opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64; - else if (RC == &SPU::R64FPRegClass) - opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64; - else if (RC == &SPU::R32CRegClass) - opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32; - else if (RC == &SPU::R32FPRegClass) - opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32; - else if (RC == &SPU::R16CRegClass) - opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16; - else if (RC == &SPU::R8CRegClass) - opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8; - else if (RC == &SPU::VECREGRegClass) - opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8; - else - llvm_unreachable("Unknown regclass!"); - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - addFrameReference(BuildMI(MBB, MI, DL, get(opc)) - .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); -} - -void -SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - unsigned opc; - bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); - if (RC == &SPU::GPRCRegClass) - opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128; - else if (RC == &SPU::R64CRegClass) - opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64; - else if (RC == &SPU::R64FPRegClass) - opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64; - else if (RC == &SPU::R32CRegClass) - opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32; - else if (RC == &SPU::R32FPRegClass) - opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32; - else if (RC == &SPU::R16CRegClass) - opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16; - else if (RC == &SPU::R8CRegClass) - opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8; - else if (RC == &SPU::VECREGRegClass) - opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8; - else - llvm_unreachable("Unknown regclass in loadRegFromStackSlot!"); - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); -} - -//! Branch analysis -/*! - \note This code was kiped from PPC. There may be more branch analysis for - CellSPU than what's currently done here. - */ -bool -SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranch(LastInst)) { - // Check for jump tables - if (!LastInst->getOperand(0).isMBB()) - return true; - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else if (isCondBranch(LastInst)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(1).getMBB(); - DEBUG(errs() << "Pushing LastInst: "); - DEBUG(LastInst->dump()); - Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); - Cond.push_back(LastInst->getOperand(0)); - return false; - } - // Otherwise, don't know what this is. - return true; - } - - // Get the instruction before it if it's a terminator. - MachineInstr *SecondLastInst = I; - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a conditional and unconditional branch, handle it. - if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { - TBB = SecondLastInst->getOperand(1).getMBB(); - DEBUG(errs() << "Pushing SecondLastInst: "); - DEBUG(SecondLastInst->dump()); - Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); - Cond.push_back(SecondLastInst->getOperand(0)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // Otherwise, can't handle this. - return true; -} - -// search MBB for branch hint labels and branch hit ops -static void removeHBR( MachineBasicBlock &MBB) { - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){ - if (I->getOpcode() == SPU::HBRA || - I->getOpcode() == SPU::HBR_LABEL){ - I=MBB.erase(I); - if (I == MBB.end()) - break; - } - } -} - -unsigned -SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - removeHBR(MBB); - if (I == MBB.begin()) - return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (!isCondBranch(I) && !isUncondBranch(I)) - return 0; - - // Remove the first branch. - DEBUG(errs() << "Removing branch: "); - DEBUG(I->dump()); - I->eraseFromParent(); - I = MBB.end(); - if (I == MBB.begin()) - return 1; - - --I; - if (!(isCondBranch(I) || isUncondBranch(I))) - return 1; - - // Remove the second branch. - DEBUG(errs() << "Removing second branch: "); - DEBUG(I->dump()); - I->eraseFromParent(); - return 2; -} - -/** Find the optimal position for a hint branch instruction in a basic block. - * This should take into account: - * -the branch hint delays - * -congestion of the memory bus - * -dual-issue scheduling (i.e. avoid insertion of nops) - * Current implementation is rather simplistic. - */ -static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB) -{ - MachineBasicBlock::iterator J = MBB.end(); - for( int i=0; i<8; i++) { - if( J == MBB.begin() ) return J; - J--; - } - return J; -} - -unsigned -SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "SPU branch conditions have two components!"); - - MachineInstrBuilder MIB; - //TODO: make a more accurate algorithm. - bool haveHBR = MBB.size()>8; - - removeHBR(MBB); - MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol(); - // Add a label just before the branch - if (haveHBR) - MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel); - - // One-way branch. - if (FBB == 0) { - if (Cond.empty()) { - // Unconditional branch - MIB = BuildMI(&MBB, DL, get(SPU::BR)); - MIB.addMBB(TBB); - - DEBUG(errs() << "Inserted one-way uncond branch: "); - DEBUG((*MIB).dump()); - - // basic blocks have just one branch so it is safe to add the hint a its - if (haveHBR) { - MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); - MIB.addSym(branchLabel); - MIB.addMBB(TBB); - } - } else { - // Conditional branch - MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - MIB.addReg(Cond[1].getReg()).addMBB(TBB); - - if (haveHBR) { - MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); - MIB.addSym(branchLabel); - MIB.addMBB(TBB); - } - - DEBUG(errs() << "Inserted one-way cond branch: "); - DEBUG((*MIB).dump()); - } - return 1; - } else { - MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); - - // Two-way Conditional Branch. - MIB.addReg(Cond[1].getReg()).addMBB(TBB); - MIB2.addMBB(FBB); - - if (haveHBR) { - MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); - MIB.addSym(branchLabel); - MIB.addMBB(FBB); - } - - DEBUG(errs() << "Inserted conditional branch: "); - DEBUG((*MIB).dump()); - DEBUG(errs() << "part 2: "); - DEBUG((*MIB2).dump()); - return 2; - } -} - -//! Reverses a branch's condition, returning false on success. -bool -SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) - const { - // Pretty brainless way of inverting the condition, but it works, considering - // there are only two conditions... - static struct { - unsigned Opc; //! The incoming opcode - unsigned RevCondOpc; //! The reversed condition opcode - } revconds[] = { - { SPU::BRNZr32, SPU::BRZr32 }, - { SPU::BRNZv4i32, SPU::BRZv4i32 }, - { SPU::BRZr32, SPU::BRNZr32 }, - { SPU::BRZv4i32, SPU::BRNZv4i32 }, - { SPU::BRHNZr16, SPU::BRHZr16 }, - { SPU::BRHNZv8i16, SPU::BRHZv8i16 }, - { SPU::BRHZr16, SPU::BRHNZr16 }, - { SPU::BRHZv8i16, SPU::BRHNZv8i16 } - }; - - unsigned Opc = unsigned(Cond[0].getImm()); - // Pretty dull mapping between the two conditions that SPU can generate: - for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) { - if (revconds[i].Opc == Opc) { - Cond[0].setImm(revconds[i].RevCondOpc); - return false; - } - } - - return true; -} diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h deleted file mode 100644 index 85e5821aef..0000000000 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ /dev/null @@ -1,84 +0,0 @@ -//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the CellSPU implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_INSTRUCTIONINFO_H -#define SPU_INSTRUCTIONINFO_H - -#include "SPU.h" -#include "SPURegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "SPUGenInstrInfo.inc" - -namespace llvm { - //! Cell SPU instruction information class - class SPUInstrInfo : public SPUGenInstrInfo { - SPUTargetMachine &TM; - const SPURegisterInfo RI; - public: - explicit SPUInstrInfo(SPUTargetMachine &tm); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } - - ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const; - - unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - //! Store a register to a stack slot, based on its register class. - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - //! Load a register from a stack slot, based on its register class. - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - //! Reverses a branch's condition, returning false on success. - virtual - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - }; -} - -#endif diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td deleted file mode 100644 index 117acd736a..0000000000 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ /dev/null @@ -1,4484 +0,0 @@ -//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cell SPU Instructions: -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// TODO Items (not urgent today, but would be nice, low priority) -// -// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by -// concatenating the byte argument b as "bbbb". Could recognize this bit pattern -// in 16-bit and 32-bit constants and reduce instruction count. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Pseudo instructions: -//===----------------------------------------------------------------------===// - -let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { - def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt), - "${:comment} ADJCALLSTACKDOWN", - [(callseq_start timm:$amt)]>; - def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt), - "${:comment} ADJCALLSTACKUP", - [(callseq_end timm:$amt)]>; - def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ), - "$targ:\t${:comment}branch hint target",[ ]>; -} - -//===----------------------------------------------------------------------===// -// Loads: -// NB: The ordering is actually important, since the instruction selection -// will try each of the instructions in sequence, i.e., the D-form first with -// the 10-bit displacement, then the A-form with the 16 bit displacement, and -// finally the X-form with the register-register. -//===----------------------------------------------------------------------===// - -let canFoldAsLoad = 1 in { - class LoadDFormVec - : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src), - "lqd\t$rT, $src", - LoadStore, - [(set (vectype VECREG:$rT), (load dform_addr:$src))]> - { } - - class LoadDForm - : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src), - "lqd\t$rT, $src", - LoadStore, - [(set rclass:$rT, (load dform_addr:$src))]> - { } - - multiclass LoadDForms - { - def v16i8: LoadDFormVec; - def v8i16: LoadDFormVec; - def v4i32: LoadDFormVec; - def v2i64: LoadDFormVec; - def v4f32: LoadDFormVec; - def v2f64: LoadDFormVec; - - def r128: LoadDForm; - def r64: LoadDForm; - def r32: LoadDForm; - def f32: LoadDForm; - def f64: LoadDForm; - def r16: LoadDForm; - def r8: LoadDForm; - } - - class LoadAFormVec - : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", - LoadStore, - [(set (vectype VECREG:$rT), (load aform_addr:$src))]> - { } - - class LoadAForm - : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src), - "lqa\t$rT, $src", - LoadStore, - [(set rclass:$rT, (load aform_addr:$src))]> - { } - - multiclass LoadAForms - { - def v16i8: LoadAFormVec; - def v8i16: LoadAFormVec; - def v4i32: LoadAFormVec; - def v2i64: LoadAFormVec; - def v4f32: LoadAFormVec; - def v2f64: LoadAFormVec; - - def r128: LoadAForm; - def r64: LoadAForm; - def r32: LoadAForm; - def f32: LoadAForm; - def f64: LoadAForm; - def r16: LoadAForm; - def r8: LoadAForm; - } - - class LoadXFormVec - : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), - "lqx\t$rT, $src", - LoadStore, - [(set (vectype VECREG:$rT), (load xform_addr:$src))]> - { } - - class LoadXForm - : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src), - "lqx\t$rT, $src", - LoadStore, - [(set rclass:$rT, (load xform_addr:$src))]> - { } - - multiclass LoadXForms - { - def v16i8: LoadXFormVec; - def v8i16: LoadXFormVec; - def v4i32: LoadXFormVec; - def v2i64: LoadXFormVec; - def v4f32: LoadXFormVec; - def v2f64: LoadXFormVec; - - def r128: LoadXForm; - def r64: LoadXForm; - def r32: LoadXForm; - def f32: LoadXForm; - def f64: LoadXForm; - def r16: LoadXForm; - def r8: LoadXForm; - } - - defm LQA : LoadAForms; - defm LQD : LoadDForms; - defm LQX : LoadXForms; - -/* Load quadword, PC relative: Not much use at this point in time. - Might be of use later for relocatable code. It's effectively the - same as LQA, but uses PC-relative addressing. - def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), - "lqr\t$rT, $disp", LoadStore, - [(set VECREG:$rT, (load iaddr:$disp))]>; - */ -} - -//===----------------------------------------------------------------------===// -// Stores: -//===----------------------------------------------------------------------===// -class StoreDFormVec - : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src), - "stqd\t$rT, $src", - LoadStore, - [(store (vectype VECREG:$rT), dform_addr:$src)]> -{ } - -class StoreDForm - : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src), - "stqd\t$rT, $src", - LoadStore, - [(store rclass:$rT, dform_addr:$src)]> -{ } - -multiclass StoreDForms -{ - def v16i8: StoreDFormVec; - def v8i16: StoreDFormVec; - def v4i32: StoreDFormVec; - def v2i64: StoreDFormVec; - def v4f32: StoreDFormVec; - def v2f64: StoreDFormVec; - - def r128: StoreDForm; - def r64: StoreDForm; - def r32: StoreDForm; - def f32: StoreDForm; - def f64: StoreDForm; - def r16: StoreDForm; - def r8: StoreDForm; -} - -class StoreAFormVec - : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src), - "stqa\t$rT, $src", - LoadStore, - [(store (vectype VECREG:$rT), aform_addr:$src)]>; - -class StoreAForm - : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src), - "stqa\t$rT, $src", - LoadStore, - [(store rclass:$rT, aform_addr:$src)]>; - -multiclass StoreAForms -{ - def v16i8: StoreAFormVec; - def v8i16: StoreAFormVec; - def v4i32: StoreAFormVec; - def v2i64: StoreAFormVec; - def v4f32: StoreAFormVec; - def v2f64: StoreAFormVec; - - def r128: StoreAForm; - def r64: StoreAForm; - def r32: StoreAForm; - def f32: StoreAForm; - def f64: StoreAForm; - def r16: StoreAForm; - def r8: StoreAForm; -} - -class StoreXFormVec - : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), - "stqx\t$rT, $src", - LoadStore, - [(store (vectype VECREG:$rT), xform_addr:$src)]> -{ } - -class StoreXForm - : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src), - "stqx\t$rT, $src", - LoadStore, - [(store rclass:$rT, xform_addr:$src)]> -{ } - -multiclass StoreXForms -{ - def v16i8: StoreXFormVec; - def v8i16: StoreXFormVec; - def v4i32: StoreXFormVec; - def v2i64: StoreXFormVec; - def v4f32: StoreXFormVec; - def v2f64: StoreXFormVec; - - def r128: StoreXForm; - def r64: StoreXForm; - def r32: StoreXForm; - def f32: StoreXForm; - def f64: StoreXForm; - def r16: StoreXForm; - def r8: StoreXForm; -} - -defm STQD : StoreDForms; -defm STQA : StoreAForms; -defm STQX : StoreXForms; - -/* Store quadword, PC relative: Not much use at this point in time. Might - be useful for relocatable code. -def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), - "stqr\t$rT, $disp", LoadStore, - [(store VECREG:$rT, iaddr:$disp)]>; -*/ - -//===----------------------------------------------------------------------===// -// Generate Controls for Insertion: -//===----------------------------------------------------------------------===// - -def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cbd\t$rT, $src", ShuffleOp, - [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), - "cbx\t$rT, $src", ShuffleOp, - [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "chd\t$rT, $src", ShuffleOp, - [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), - "chx\t$rT, $src", ShuffleOp, - [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cwd\t$rT, $src", ShuffleOp, - [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), - "cwx\t$rT, $src", ShuffleOp, - [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cwd\t$rT, $src", ShuffleOp, - [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), - "cwx\t$rT, $src", ShuffleOp, - [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cdd\t$rT, $src", ShuffleOp, - [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), - "cdx\t$rT, $src", ShuffleOp, - [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), - "cdd\t$rT, $src", ShuffleOp, - [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; - -def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), - "cdx\t$rT, $src", ShuffleOp, - [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; - -//===----------------------------------------------------------------------===// -// Constant formation: -//===----------------------------------------------------------------------===// - -def ILHv8i16: - RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val), - "ilh\t$rT, $val", ImmLoad, - [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>; - -def ILHr16: - RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val), - "ilh\t$rT, $val", ImmLoad, - [(set R16C:$rT, immSExt16:$val)]>; - -// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with -// the right constant") -def ILHr8: - RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val), - "ilh\t$rT, $val", ImmLoad, - [(set R8C:$rT, immSExt8:$val)]>; - -// IL does sign extension! - -class ILInst pattern>: - RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val", - ImmLoad, pattern>; - -class ILVecInst: - ILInst<(outs VECREG:$rT), (ins immtype:$val), - [(set (vectype VECREG:$rT), (vectype xform:$val))]>; - -class ILRegInst: - ILInst<(outs rclass:$rT), (ins immtype:$val), - [(set rclass:$rT, xform:$val)]>; - -multiclass ImmediateLoad -{ - def v2i64: ILVecInst; - def v4i32: ILVecInst; - - // TODO: Need v2f64, v4f32 - - def r64: ILRegInst; - def r32: ILRegInst; - def f32: ILRegInst; - def f64: ILRegInst; -} - -defm IL : ImmediateLoad; - -class ILHUInst pattern>: - RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val", - ImmLoad, pattern>; - -class ILHUVecInst: - ILHUInst<(outs VECREG:$rT), (ins immtype:$val), - [(set (vectype VECREG:$rT), (vectype xform:$val))]>; - -class ILHURegInst: - ILHUInst<(outs rclass:$rT), (ins immtype:$val), - [(set rclass:$rT, xform:$val)]>; - -multiclass ImmLoadHalfwordUpper -{ - def v2i64: ILHUVecInst; - def v4i32: ILHUVecInst; - - def r64: ILHURegInst; - def r32: ILHURegInst; - - // Loads the high portion of an address - def hi: ILHURegInst; - - // Used in custom lowering constant SFP loads: - def f32: ILHURegInst; -} - -defm ILHU : ImmLoadHalfwordUpper; - -// Immediate load address (can also be used to load 18-bit unsigned constants, -// see the zext 16->32 pattern) - -class ILAInst pattern>: - RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val", - LoadNOP, pattern>; - -class ILAVecInst: - ILAInst<(outs VECREG:$rT), (ins immtype:$val), - [(set (vectype VECREG:$rT), (vectype xform:$val))]>; - -class ILARegInst: - ILAInst<(outs rclass:$rT), (ins immtype:$val), - [(set rclass:$rT, xform:$val)]>; - -multiclass ImmLoadAddress -{ - def v2i64: ILAVecInst; - def v4i32: ILAVecInst; - - def r64: ILARegInst; - def r32: ILARegInst; - def f32: ILARegInst; - def f64: ILARegInst; - - def hi: ILARegInst; - def lo: ILARegInst; - - def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val), - [(set R32C:$rT, imm18:$val)]>; -} - -defm ILA : ImmLoadAddress; - -// Immediate OR, Halfword Lower: The "other" part of loading large constants -// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...> -// Note that these are really two operand instructions, but they're encoded -// as three operands with the first two arguments tied-to each other. - -class IOHLInst pattern>: - RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val", - ImmLoad, pattern>, - RegConstraint<"$rS = $rT">, - NoEncode<"$rS">; - -class IOHLVecInst: - IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val), - [/* no pattern */]>; - -class IOHLRegInst: - IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val), - [/* no pattern */]>; - -multiclass ImmOrHalfwordLower -{ - def v2i64: IOHLVecInst; - def v4i32: IOHLVecInst; - - def r32: IOHLRegInst; - def f32: IOHLRegInst; - - def lo: IOHLRegInst; -} - -defm IOHL: ImmOrHalfwordLower; - -// Form select mask for bytes using immediate, used in conjunction with the -// SELB instruction: - -class FSMBIVec: - RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), - "fsmbi\t$rT, $val", - SelectOp, - [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>; - -multiclass FormSelectMaskBytesImm -{ - def v16i8: FSMBIVec; - def v8i16: FSMBIVec; - def v4i32: FSMBIVec; - def v2i64: FSMBIVec; -} - -defm FSMBI : FormSelectMaskBytesImm; - -// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits -class FSMBInst pattern>: - RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp, - pattern>; - -class FSMBRegInst: - FSMBInst<(outs VECREG:$rT), (ins rclass:$rA), - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; - -class FSMBVecInst: - FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), - (SPUselmask (vectype VECREG:$rA)))]>; - -multiclass FormSelectMaskBits { - def v16i8_r16: FSMBRegInst; - def v16i8: FSMBVecInst; -} - -defm FSMB: FormSelectMaskBits; - -// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is -// only 8-bits wide (even though it's input as 16-bits here) - -class FSMHInst pattern>: - RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp, - pattern>; - -class FSMHRegInst: - FSMHInst<(outs VECREG:$rT), (ins rclass:$rA), - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; - -class FSMHVecInst: - FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), - (SPUselmask (vectype VECREG:$rA)))]>; - -multiclass FormSelectMaskHalfword { - def v8i16_r16: FSMHRegInst; - def v8i16: FSMHVecInst; -} - -defm FSMH: FormSelectMaskHalfword; - -// fsm: Form select mask for words. Like the other fsm* instructions, -// only the lower 4 bits of $rA are significant. - -class FSMInst pattern>: - RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp, - pattern>; - -class FSMRegInst: - FSMInst<(outs VECREG:$rT), (ins rclass:$rA), - [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; - -class FSMVecInst: - FSMInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>; - -multiclass FormSelectMaskWord { - def v4i32: FSMVecInst; - - def r32 : FSMRegInst; - def r16 : FSMRegInst; -} - -defm FSM : FormSelectMaskWord; - -// Special case when used for i64 math operations -multiclass FormSelectMaskWord64 { - def r32 : FSMRegInst; - def r16 : FSMRegInst; -} - -defm FSM64 : FormSelectMaskWord64; - -//===----------------------------------------------------------------------===// -// Integer and Logical Operations: -//===----------------------------------------------------------------------===// - -def AHv8i16: - RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "ah\t$rT, $rA, $rB", IntegerOp, - [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>; - -def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (AHv8i16 VECREG:$rA, VECREG:$rB)>; - -def AHr16: - RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "ah\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>; - -def AHIvec: - RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "ahi\t$rT, $rA, $val", IntegerOp, - [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA), - v8i16SExt10Imm:$val))]>; - -def AHIr16: - RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "ahi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; - -// v4i32, i32 add instruction: - -class AInst pattern>: - RRForm<0b00000011000, OOL, IOL, - "a\t$rT, $rA, $rB", IntegerOp, - pattern>; - -class AVecInst: - AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class ARegInst: - AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>; - -multiclass AddInstruction { - def v4i32: AVecInst; - def v16i8: AVecInst; - def r32: ARegInst; -} - -defm A : AddInstruction; - -class AIInst pattern>: - RI10Form<0b00111000, OOL, IOL, - "ai\t$rT, $rA, $val", IntegerOp, - pattern>; - -class AIVecInst: - AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; - -class AIFPVecInst: - AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [/* no pattern */]>; - -class AIRegInst: - AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), - [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; - -// This is used to add epsilons to floating point numbers in the f32 fdiv code: -class AIFPInst: - AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), - [/* no pattern */]>; - -multiclass AddImmediate { - def v4i32: AIVecInst; - - def r32: AIRegInst; - - def v4f32: AIFPVecInst; - def f32: AIFPInst; -} - -defm AI : AddImmediate; - -def SFHvec: - RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "sfh\t$rT, $rA, $rB", IntegerOp, - [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - -def SFHr16: - RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "sfh\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>; - -def SFHIvec: - RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "sfhi\t$rT, $rA, $val", IntegerOp, - [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val, - (v8i16 VECREG:$rA)))]>; - -def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "sfhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>; - -def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - "sf\t$rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; - - -def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "sf\t$rT, $rA, $rB", IntegerOp, - [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; - -def SFIvec: - RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - "sfi\t$rT, $rA, $val", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val, - (v4i32 VECREG:$rA)))]>; - -def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT), - (ins R32C:$rA, s10imm_i32:$val), - "sfi\t$rT, $rA, $val", IntegerOp, - [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>; - -// ADDX: only available in vector form, doesn't match a pattern. -class ADDXInst pattern>: - RRForm<0b00000010110, OOL, IOL, - "addx\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ADDXVecInst: - ADDXInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -class ADDXRegInst: - ADDXInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -multiclass AddExtended { - def v2i64 : ADDXVecInst; - def v4i32 : ADDXVecInst; - def r64 : ADDXRegInst; - def r32 : ADDXRegInst; -} - -defm ADDX : AddExtended; - -// CG: Generate carry for add -class CGInst pattern>: - RRForm<0b01000011000, OOL, IOL, - "cg\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class CGVecInst: - CGInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -class CGRegInst: - CGInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB), - [/* no pattern */]>; - -multiclass CarryGenerate { - def v2i64 : CGVecInst; - def v4i32 : CGVecInst; - def r64 : CGRegInst; - def r32 : CGRegInst; -} - -defm CG : CarryGenerate; - -// SFX: Subract from, extended. This is used in conjunction with BG to subtract -// with carry (borrow, in this case) -class SFXInst pattern>: - RRForm<0b10000010110, OOL, IOL, - "sfx\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class SFXVecInst: - SFXInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -class SFXRegInst: - SFXInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB, rclass:$rCarry), - [/* no pattern */]>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -multiclass SubtractExtended { - def v2i64 : SFXVecInst; - def v4i32 : SFXVecInst; - def r64 : SFXRegInst; - def r32 : SFXRegInst; -} - -defm SFX : SubtractExtended; - -// BG: only available in vector form, doesn't match a pattern. -class BGInst pattern>: - RRForm<0b01000010000, OOL, IOL, - "bg\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class BGVecInst: - BGInst<(outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -class BGRegInst: - BGInst<(outs rclass:$rT), - (ins rclass:$rA, rclass:$rB), - [/* no pattern */]>; - -multiclass BorrowGenerate { - def v4i32 : BGVecInst; - def v2i64 : BGVecInst; - def r64 : BGRegInst; - def r32 : BGRegInst; -} - -defm BG : BorrowGenerate; - -// BGX: Borrow generate, extended. -def BGXvec: - RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, - VECREG:$rCarry), - "bgx\t$rT, $rA, $rB", IntegerOp, - []>, - RegConstraint<"$rCarry = $rT">, - NoEncode<"$rCarry">; - -// Halfword multiply variants: -// N.B: These can be used to build up larger quantities (16x16 -> 32) - -def MPYv8i16: - RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "mpy\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYr16: - RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - "mpy\t$rT, $rA, $rB", IntegerMulDiv, - [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>; - -// Unsigned 16-bit multiply: - -class MPYUInst pattern>: - RRForm<0b00110011110, OOL, IOL, - "mpyu\t$rT, $rA, $rB", IntegerMulDiv, - pattern>; - -def MPYUv4i32: - MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -def MPYUr16: - MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>; - -def MPYUr32: - MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [/* no pattern */]>; - -// mpyi: multiply 16 x s10imm -> 32 result. - -class MPYIInst pattern>: - RI10Form<0b00101110, OOL, IOL, - "mpyi\t$rT, $rA, $val", IntegerMulDiv, - pattern>; - -def MPYIvec: - MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; - -def MPYIr16: - MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>; - -// mpyui: same issues as other multiplies, plus, this doesn't match a -// pattern... but may be used during target DAG selection or lowering - -class MPYUIInst pattern>: - RI10Form<0b10101110, OOL, IOL, - "mpyui\t$rT, $rA, $val", IntegerMulDiv, - pattern>; - -def MPYUIvec: - MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - []>; - -def MPYUIr16: - MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - []>; - -// mpya: 16 x 16 + 16 -> 32 bit result -class MPYAInst pattern>: - RRRForm<0b0011, OOL, IOL, - "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, - pattern>; - -def MPYAv4i32: - MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (v4i32 VECREG:$rT), - (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))), - (v4i32 VECREG:$rC)))]>; - -def MPYAr32: - MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), - [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), - R32C:$rC))]>; - -def MPYAr32_sext: - MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), - [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)), - R32C:$rC))]>; - -def MPYAr32_sextinreg: - MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC), - [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16), - (sext_inreg R32C:$rB, i16)), - R32C:$rC))]>; - -// mpyh: multiply high, used to synthesize 32-bit multiplies -class MPYHInst pattern>: - RRForm<0b10100011110, OOL, IOL, - "mpyh\t$rT, $rA, $rB", IntegerMulDiv, - pattern>; - -def MPYHv4i32: - MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern */]>; - -def MPYHr32: - MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [/* no pattern */]>; - -// mpys: multiply high and shift right (returns the top half of -// a 16-bit multiply, sign extended to 32 bits.) - -class MPYSInst: - RRForm<0b11100011110, OOL, IOL, - "mpys\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYSv4i32: - MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYSr16: - MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; - -// mpyhh: multiply high-high (returns the 32-bit result from multiplying -// the top 16 bits of the $rA, $rB) - -class MPYHHInst: - RRForm<0b01100011110, OOL, IOL, - "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHv8i16: - MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHr32: - MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -// mpyhha: Multiply high-high, add to $rT: - -class MPYHHAInst: - RRForm<0b01100010110, OOL, IOL, - "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHAvec: - MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHAr32: - MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -// mpyhhu: Multiply high-high, unsigned, e.g.: -// -// +-------+-------+ +-------+-------+ +---------+ -// | a0 . a1 | x | b0 . b1 | = | a0 x b0 | -// +-------+-------+ +-------+-------+ +---------+ -// -// where a0, b0 are the upper 16 bits of the 32-bit word - -class MPYHHUInst: - RRForm<0b01110011110, OOL, IOL, - "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHUv4i32: - MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHUr32: - MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -// mpyhhau: Multiply high-high, unsigned - -class MPYHHAUInst: - RRForm<0b01110010110, OOL, IOL, - "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, - [/* no pattern */]>; - -def MPYHHAUvec: - MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; - -def MPYHHAUr32: - MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// clz: Count leading zeroes -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -class CLZInst pattern>: - RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA", - IntegerOp, pattern>; - -class CLZRegInst: - CLZInst<(outs rclass:$rT), (ins rclass:$rA), - [(set rclass:$rT, (ctlz rclass:$rA))]>; - -class CLZVecInst: - CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), - [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>; - -multiclass CountLeadingZeroes { - def v4i32 : CLZVecInst; - def r32 : CLZRegInst; -} - -defm CLZ : CountLeadingZeroes; - -// cntb: Count ones in bytes (aka "population count") -// -// NOTE: This instruction is really a vector instruction, but the custom -// lowering code uses it in unorthodox ways to support CTPOP for other -// data types! - -def CNTBv16i8: - RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), - "cntb\t$rT, $rA", IntegerOp, - [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>; - -def CNTBv8i16 : - RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), - "cntb\t$rT, $rA", IntegerOp, - [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>; - -def CNTBv4i32 : - RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), - "cntb\t$rT, $rA", IntegerOp, - [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>; - -// gbb: Gather the low order bits from each byte in $rA into a single 16-bit -// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are -// slots 1-3. -// -// Note: This instruction "pairs" with the fsmb instruction for all of the -// various types defined here. -// -// Note 2: The "VecInst" and "RegInst" forms refer to the result being either -// a vector or register. - -class GBBInst pattern>: - RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>; - -class GBBRegInst: - GBBInst<(outs rclass:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -class GBBVecInst: - GBBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -multiclass GatherBitsFromBytes { - def v16i8_r32: GBBRegInst; - def v16i8_r16: GBBRegInst; - def v16i8: GBBVecInst; -} - -defm GBB: GatherBitsFromBytes; - -// gbh: Gather all low order bits from each halfword in $rA into a single -// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0 -// and slots 1-3 also set to 0. -// -// See notes for GBBInst, above. - -class GBHInst pattern>: - RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp, - pattern>; - -class GBHRegInst: - GBHInst<(outs rclass:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -class GBHVecInst: - GBHInst<(outs VECREG:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -multiclass GatherBitsHalfword { - def v8i16_r32: GBHRegInst; - def v8i16_r16: GBHRegInst; - def v8i16: GBHVecInst; -} - -defm GBH: GatherBitsHalfword; - -// gb: Gather all low order bits from each word in $rA into a single -// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0, -// as well as slots 1-3. -// -// See notes for gbb, above. - -class GBInst pattern>: - RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp, - pattern>; - -class GBRegInst: - GBInst<(outs rclass:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -class GBVecInst: - GBInst<(outs VECREG:$rT), (ins VECREG:$rA), - [/* no pattern */]>; - -multiclass GatherBitsWord { - def v4i32_r32: GBRegInst; - def v4i32_r16: GBRegInst; - def v4i32: GBVecInst; -} - -defm GB: GatherBitsWord; - -// avgb: average bytes -def AVGB: - RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "avgb\t$rT, $rA, $rB", ByteOp, - []>; - -// absdb: absolute difference of bytes -def ABSDB: - RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "absdb\t$rT, $rA, $rB", ByteOp, - []>; - -// sumb: sum bytes into halfwords -def SUMB: - RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "sumb\t$rT, $rA, $rB", ByteOp, - []>; - -// Sign extension operations: -class XSBHInst pattern>: - RRForm_1<0b01101101010, OOL, IOL, - "xsbh\t$rDst, $rSrc", - IntegerOp, pattern>; - -class XSBHInRegInst pattern>: - XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), - pattern>; - -multiclass ExtendByteHalfword { - def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [ - /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>; - def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), - [(set R16C:$rDst, (sext R8C:$rSrc))]>; - def r16: XSBHInRegInst; - - // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit - // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 - // pattern below). Intentionally doesn't match a pattern because we want the - // sext 8->32 pattern to do the work for us, namely because we need the extra - // XSHWr32. - def r32: XSBHInRegInst; - - // Same as the 32-bit version, but for i64 - def r64: XSBHInRegInst; -} - -defm XSBH : ExtendByteHalfword; - -// Sign extend halfwords to words: - -class XSHWInst pattern>: - RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc", - IntegerOp, pattern>; - -class XSHWVecInst: - XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc), - [(set (out_vectype VECREG:$rDest), - (sext (in_vectype VECREG:$rSrc)))]>; - -class XSHWInRegInst pattern>: - XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc), - pattern>; - -class XSHWRegInst: - XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc), - [(set rclass:$rDest, (sext R16C:$rSrc))]>; - -multiclass ExtendHalfwordWord { - def v4i32: XSHWVecInst; - - def r16: XSHWRegInst; - - def r32: XSHWInRegInst; - def r64: XSHWInRegInst; -} - -defm XSHW : ExtendHalfwordWord; - -// Sign-extend words to doublewords (32->64 bits) - -class XSWDInst pattern>: - RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc", - IntegerOp, pattern>; - -class XSWDVecInst: - XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [/*(set (out_vectype VECREG:$rDst), - (sext (out_vectype VECREG:$rSrc)))*/]>; - -class XSWDRegInst: - XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc), - [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>; - -multiclass ExtendWordToDoubleWord { - def v2i64: XSWDVecInst; - def r64: XSWDRegInst; - - def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc), - [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>; -} - -defm XSWD : ExtendWordToDoubleWord; - -// AND operations - -class ANDInst pattern> : - RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ANDVecInst: - ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class ANDRegInst: - ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>; - -multiclass BitwiseAnd -{ - def v16i8: ANDVecInst; - def v8i16: ANDVecInst; - def v4i32: ANDVecInst; - def v2i64: ANDVecInst; - - def r128: ANDRegInst; - def r64: ANDRegInst; - def r32: ANDRegInst; - def r16: ANDRegInst; - def r8: ANDRegInst; - - //===--------------------------------------------- - // Special instructions to perform the fabs instruction - def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), - [/* Intentionally does not match a pattern */]>; - - def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), - [/* Intentionally does not match a pattern */]>; - - def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* Intentionally does not match a pattern */]>; - - //===--------------------------------------------- - - // Hacked form of AND to zero-extend 16-bit quantities to 32-bit - // quantities -- see 16->32 zext pattern. - // - // This pattern is somewhat artificial, since it might match some - // compiler generated pattern but it is unlikely to do so. - - def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB), - [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>; -} - -defm AND : BitwiseAnd; - - -def vnot_cell_conv : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>; - -// N.B.: vnot_cell_conv is one of those special target selection pattern -// fragments, -// in which we expect there to be a bit_convert on the constant. Bear in mind -// that llvm translates "not " to "xor , -1" (or in this case, a -// constant -1 vector.) - -class ANDCInst pattern>: - RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ANDCVecInst: - ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (and (vectype VECREG:$rA), - (vnot_frag (vectype VECREG:$rB))))]>; - -class ANDCRegInst: - ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>; - -multiclass AndComplement -{ - def v16i8: ANDCVecInst; - def v8i16: ANDCVecInst; - def v4i32: ANDCVecInst; - def v2i64: ANDCVecInst; - - def r128: ANDCRegInst; - def r64: ANDCRegInst; - def r32: ANDCRegInst; - def r16: ANDCRegInst; - def r8: ANDCRegInst; - - // Sometimes, the xor pattern has a bitcast constant: - def v16i8_conv: ANDCVecInst; -} - -defm ANDC : AndComplement; - -class ANDBIInst pattern>: - RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass AndByteImm -{ - def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (v16i8 VECREG:$rT), - (and (v16i8 VECREG:$rA), - (v16i8 v16i8U8Imm:$val)))]>; - - def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), - [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>; -} - -defm ANDBI : AndByteImm; - -class ANDHIInst pattern> : - RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass AndHalfwordImm -{ - def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; - - def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), - [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>; - - // Zero-extend i8 to i16: - def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val), - [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>; -} - -defm ANDHI : AndHalfwordImm; - -class ANDIInst pattern> : - RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val", - IntegerOp, pattern>; - -multiclass AndWordImm -{ - def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>; - - def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>; - - // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32 - // pattern below. - def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), - [(set R32C:$rT, - (and (zext R8C:$rA), i32ImmSExt10:$val))]>; - - // Hacked form of ANDI to zero-extend i16 quantities to i32. See the - // zext 16->32 pattern below. - // - // Note that this pattern is somewhat artificial, since it might match - // something the compiler generates but is unlikely to occur in practice. - def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), - [(set R32C:$rT, - (and (zext R16C:$rA), i32ImmSExt10:$val))]>; -} - -defm ANDI : AndWordImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Bitwise OR group: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// Bitwise "or" (N.B.: These are also register-register copy instructions...) -class ORInst pattern>: - RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ORVecInst: - ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class ORRegInst: - ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; - - -multiclass BitwiseOr -{ - def v16i8: ORVecInst; - def v8i16: ORVecInst; - def v4i32: ORVecInst; - def v2i64: ORVecInst; - - def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4f32 VECREG:$rT), - (v4f32 (bitconvert (or (v4i32 VECREG:$rA), - (v4i32 VECREG:$rB)))))]>; - - def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v2f64 VECREG:$rT), - (v2f64 (bitconvert (or (v2i64 VECREG:$rA), - (v2i64 VECREG:$rB)))))]>; - - def r128: ORRegInst; - def r64: ORRegInst; - def r32: ORRegInst; - def r16: ORRegInst; - def r8: ORRegInst; - - // OR instructions used to copy f32 and f64 registers. - def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [/* no pattern */]>; - - def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - [/* no pattern */]>; -} - -defm OR : BitwiseOr; - -//===----------------------------------------------------------------------===// -// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers -//===----------------------------------------------------------------------===// -def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), - (COPY_TO_REGCLASS R8C:$rA, VECREG)>; - -def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), - (COPY_TO_REGCLASS R16C:$rA, VECREG)>; - -def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), - (COPY_TO_REGCLASS R32C:$rA, VECREG)>; - -def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), - (COPY_TO_REGCLASS R64C:$rA, VECREG)>; - -def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), - (COPY_TO_REGCLASS R32FP:$rA, VECREG)>; - -def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), - (COPY_TO_REGCLASS R64FP:$rA, VECREG)>; - -def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))), - (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>; - -def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))), - (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>; - -def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))), - (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>; - -def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))), - (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>; - -def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))), - (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>; - -def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))), - (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>; - -// Load Register: This is an assembler alias for a bitwise OR of a register -// against itself. It's here because it brings some clarity to assembly -// language output. - -let hasCtrlDep = 1 in { - class LRInst - : SPUInstr { - bits<7> RA; - bits<7> RT; - - let Pattern = [/*no pattern*/]; - - let Inst{0-10} = 0b10000010000; /* It's an OR operation */ - let Inst{11-17} = RA; - let Inst{18-24} = RA; - let Inst{25-31} = RT; - } - - class LRVecInst: - LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>; - - class LRRegInst: - LRInst<(outs rclass:$rT), (ins rclass:$rA)>; - - multiclass LoadRegister { - def v2i64: LRVecInst; - def v2f64: LRVecInst; - def v4i32: LRVecInst; - def v4f32: LRVecInst; - def v8i16: LRVecInst; - def v16i8: LRVecInst; - - def r128: LRRegInst; - def r64: LRRegInst; - def f64: LRRegInst; - def r32: LRRegInst; - def f32: LRRegInst; - def r16: LRRegInst; - def r8: LRRegInst; - } - - defm LR: LoadRegister; -} - -// ORC: Bitwise "or" with complement (c = a | ~b) - -class ORCInst pattern>: - RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class ORCVecInst: - ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - (vnot (vectype VECREG:$rB))))]>; - -class ORCRegInst: - ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>; - -multiclass BitwiseOrComplement -{ - def v16i8: ORCVecInst; - def v8i16: ORCVecInst; - def v4i32: ORCVecInst; - def v2i64: ORCVecInst; - - def r128: ORCRegInst; - def r64: ORCRegInst; - def r32: ORCRegInst; - def r16: ORCRegInst; - def r8: ORCRegInst; -} - -defm ORC : BitwiseOrComplement; - -// OR byte immediate -class ORBIInst pattern>: - RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val", - IntegerOp, pattern>; - -class ORBIVecInst: - ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA), - (vectype immpred:$val)))]>; - -multiclass BitwiseOrByteImm -{ - def v16i8: ORBIVecInst; - - def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), - [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>; -} - -defm ORBI : BitwiseOrByteImm; - -// OR halfword immediate -class ORHIInst pattern>: - RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val", - IntegerOp, pattern>; - -class ORHIVecInst: - ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - immpred:$val))]>; - -multiclass BitwiseOrHalfwordImm -{ - def v8i16: ORHIVecInst; - - def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), - [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>; - - // Specialized ORHI form used to promote 8-bit registers to 16-bit - def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val), - [(set R16C:$rT, (or (anyext R8C:$rA), - i16ImmSExt10:$val))]>; -} - -defm ORHI : BitwiseOrHalfwordImm; - -class ORIInst pattern>: - RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val", - IntegerOp, pattern>; - -class ORIVecInst: - ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), - immpred:$val))]>; - -// Bitwise "or" with immediate -multiclass BitwiseOrImm -{ - def v4i32: ORIVecInst; - - def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; - - // i16i32: hacked version of the ori instruction to extend 16-bit quantities - // to 32-bit quantities. used exclusively to match "anyext" conversions (vide - // infra "anyext 16->32" pattern.) - def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (or (anyext R16C:$rA), - i32ImmSExt10:$val))]>; - - // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities - // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide - // infra "anyext 16->32" pattern.) - def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (or (anyext R8C:$rA), - i32ImmSExt10:$val))]>; -} - -defm ORI : BitwiseOrImm; - -// ORX: "or" across the vector: or's $rA's word slots leaving the result in -// $rT[0], slots 1-3 are zeroed. -// -// FIXME: Needs to match an intrinsic pattern. -def ORXv4i32: - RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "orx\t$rT, $rA, $rB", IntegerOp, - []>; - -// XOR: - -class XORInst pattern> : - RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class XORVecInst: - XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA), - (vectype VECREG:$rB)))]>; - -class XORRegInst: - XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>; - -multiclass BitwiseExclusiveOr -{ - def v16i8: XORVecInst; - def v8i16: XORVecInst; - def v4i32: XORVecInst; - def v2i64: XORVecInst; - - def r128: XORRegInst; - def r64: XORRegInst; - def r32: XORRegInst; - def r16: XORRegInst; - def r8: XORRegInst; - - // XOR instructions used to negate f32 and f64 quantities. - - def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), - [/* no pattern */]>; - - def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), - [/* no pattern */]>; - - def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* no pattern, see fneg{32,64} */]>; -} - -defm XOR : BitwiseExclusiveOr; - -//==---------------------------------------------------------- - -class XORBIInst pattern>: - RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val", - IntegerOp, pattern>; - -multiclass XorByteImm -{ - def v16i8: - XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>; - - def r8: - XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), - [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>; -} - -defm XORBI : XorByteImm; - -def XORHIv8i16: - RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), - "xorhi\t$rT, $rA, $val", IntegerOp, - [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA), - v8i16SExt10Imm:$val))]>; - -def XORHIr16: - RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - "xorhi\t$rT, $rA, $val", IntegerOp, - [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>; - -def XORIv4i32: - RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val), - "xori\t$rT, $rA, $val", IntegerOp, - [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA), - v4i32SExt10Imm:$val))]>; - -def XORIr32: - RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - "xori\t$rT, $rA, $val", IntegerOp, - [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>; - -// NAND: - -class NANDInst pattern>: - RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class NANDVecInst: - NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA), - (vectype VECREG:$rB))))]>; -class NANDRegInst: - NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>; - -multiclass BitwiseNand -{ - def v16i8: NANDVecInst; - def v8i16: NANDVecInst; - def v4i32: NANDVecInst; - def v2i64: NANDVecInst; - - def r128: NANDRegInst; - def r64: NANDRegInst; - def r32: NANDRegInst; - def r16: NANDRegInst; - def r8: NANDRegInst; -} - -defm NAND : BitwiseNand; - -// NOR: - -class NORInst pattern>: - RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class NORVecInst: - NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA), - (vectype VECREG:$rB))))]>; -class NORRegInst: - NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>; - -multiclass BitwiseNor -{ - def v16i8: NORVecInst; - def v8i16: NORVecInst; - def v4i32: NORVecInst; - def v2i64: NORVecInst; - - def r128: NORRegInst; - def r64: NORRegInst; - def r32: NORRegInst; - def r16: NORRegInst; - def r8: NORRegInst; -} - -defm NOR : BitwiseNor; - -// Select bits: -class SELBInst pattern>: - RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC", - IntegerOp, pattern>; - -class SELBVecInst: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (vectype VECREG:$rT), - (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)), - (and (vnot_frag (vectype VECREG:$rC)), - (vectype VECREG:$rA))))]>; - -class SELBVecVCondInst: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (vectype VECREG:$rT), - (select (vectype VECREG:$rC), - (vectype VECREG:$rB), - (vectype VECREG:$rA)))]>; - -class SELBVecCondInst: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), - [(set (vectype VECREG:$rT), - (select R32C:$rC, - (vectype VECREG:$rB), - (vectype VECREG:$rA)))]>; - -class SELBRegInst: - SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC), - [(set rclass:$rT, - (or (and rclass:$rB, rclass:$rC), - (and rclass:$rA, (not rclass:$rC))))]>; - -class SELBRegCondInst: - SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC), - [(set rclass:$rT, - (select rcond:$rC, rclass:$rB, rclass:$rA))]>; - -multiclass SelectBits -{ - def v16i8: SELBVecInst; - def v8i16: SELBVecInst; - def v4i32: SELBVecInst; - def v2i64: SELBVecInst; - - def r128: SELBRegInst; - def r64: SELBRegInst; - def r32: SELBRegInst; - def r16: SELBRegInst; - def r8: SELBRegInst; - - def v16i8_cond: SELBVecCondInst; - def v8i16_cond: SELBVecCondInst; - def v4i32_cond: SELBVecCondInst; - def v2i64_cond: SELBVecCondInst; - - def v16i8_vcond: SELBVecCondInst; - def v8i16_vcond: SELBVecCondInst; - def v4i32_vcond: SELBVecCondInst; - def v2i64_vcond: SELBVecCondInst; - - def v4f32_cond: - SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (v4f32 VECREG:$rT), - (select (v4i32 VECREG:$rC), - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rA)))]>; - - // SELBr64_cond is defined in SPU64InstrInfo.td - def r32_cond: SELBRegCondInst; - def f32_cond: SELBRegCondInst; - def r16_cond: SELBRegCondInst; - def r8_cond: SELBRegCondInst; -} - -defm SELB : SelectBits; - -class SPUselbPatVec: - Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)), - (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>; - -def : SPUselbPatVec; -def : SPUselbPatVec; -def : SPUselbPatVec; -def : SPUselbPatVec; - -class SPUselbPatReg: - Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC), - (inst rclass:$rA, rclass:$rB, rclass:$rC)>; - -def : SPUselbPatReg; -def : SPUselbPatReg; -def : SPUselbPatReg; -def : SPUselbPatReg; - -// EQV: Equivalence (1 for each same bit, otherwise 0) -// -// Note: There are a lot of ways to match this bit operator and these patterns -// attempt to be as exhaustive as possible. - -class EQVInst pattern>: - RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB", - IntegerOp, pattern>; - -class EQVVecInst: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), - (and (vnot (vectype VECREG:$rA)), - (vnot (vectype VECREG:$rB)))))]>; - -class EQVRegInst: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB), - (and (not rclass:$rA), (not rclass:$rB))))]>; - -class EQVVecPattern1: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>; - -class EQVRegPattern1: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>; - -class EQVVecPattern2: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), - (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>; - -class EQVRegPattern2: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, - (or (and rclass:$rA, rclass:$rB), - (not (or rclass:$rA, rclass:$rB))))]>; - -class EQVVecPattern3: - EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>; - -class EQVRegPattern3: - EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>; - -multiclass BitEquivalence -{ - def v16i8: EQVVecInst; - def v8i16: EQVVecInst; - def v4i32: EQVVecInst; - def v2i64: EQVVecInst; - - def v16i8_1: EQVVecPattern1; - def v8i16_1: EQVVecPattern1; - def v4i32_1: EQVVecPattern1; - def v2i64_1: EQVVecPattern1; - - def v16i8_2: EQVVecPattern2; - def v8i16_2: EQVVecPattern2; - def v4i32_2: EQVVecPattern2; - def v2i64_2: EQVVecPattern2; - - def v16i8_3: EQVVecPattern3; - def v8i16_3: EQVVecPattern3; - def v4i32_3: EQVVecPattern3; - def v2i64_3: EQVVecPattern3; - - def r128: EQVRegInst; - def r64: EQVRegInst; - def r32: EQVRegInst; - def r16: EQVRegInst; - def r8: EQVRegInst; - - def r128_1: EQVRegPattern1; - def r64_1: EQVRegPattern1; - def r32_1: EQVRegPattern1; - def r16_1: EQVRegPattern1; - def r8_1: EQVRegPattern1; - - def r128_2: EQVRegPattern2; - def r64_2: EQVRegPattern2; - def r32_2: EQVRegPattern2; - def r16_2: EQVRegPattern2; - def r8_2: EQVRegPattern2; - - def r128_3: EQVRegPattern3; - def r64_3: EQVRegPattern3; - def r32_3: EQVRegPattern3; - def r16_3: EQVRegPattern3; - def r8_3: EQVRegPattern3; -} - -defm EQV: BitEquivalence; - -//===----------------------------------------------------------------------===// -// Vector shuffle... -//===----------------------------------------------------------------------===// -// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB. -// See the SPUshuffle SDNode operand above, which sets up the DAG pattern -// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with -// the SPUISD::SHUFB opcode. -//===----------------------------------------------------------------------===// - -class SHUFBInst pattern>: - RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC", - ShuffleOp, pattern>; - -class SHUFBVecInst: - SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - [(set (resultvec VECREG:$rT), - (SPUshuffle (resultvec VECREG:$rA), - (resultvec VECREG:$rB), - (maskvec VECREG:$rC)))]>; - -class SHUFBGPRCInst: - SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC), - [/* no pattern */]>; - -multiclass ShuffleBytes -{ - def v16i8 : SHUFBVecInst; - def v16i8_m32 : SHUFBVecInst; - def v8i16 : SHUFBVecInst; - def v8i16_m32 : SHUFBVecInst; - def v4i32 : SHUFBVecInst; - def v4i32_m32 : SHUFBVecInst; - def v2i64 : SHUFBVecInst; - def v2i64_m32 : SHUFBVecInst; - - def v4f32 : SHUFBVecInst; - def v4f32_m32 : SHUFBVecInst; - - def v2f64 : SHUFBVecInst; - def v2f64_m32 : SHUFBVecInst; - - def gprc : SHUFBGPRCInst; -} - -defm SHUFB : ShuffleBytes; - -//===----------------------------------------------------------------------===// -// Shift and rotate group: -//===----------------------------------------------------------------------===// - -class SHLHInst pattern>: - RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -class SHLHVecInst: - SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; - -multiclass ShiftLeftHalfword -{ - def v8i16: SHLHVecInst; - def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>; - def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>; -} - -defm SHLH : ShiftLeftHalfword; - -//===----------------------------------------------------------------------===// - -class SHLHIInst pattern>: - RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class SHLHIVecInst: - SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), - [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>; - -multiclass ShiftLeftHalfwordImm -{ - def v8i16: SHLHIVecInst; - def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), - [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>; -} - -defm SHLHI : ShiftLeftHalfwordImm; - -def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)), - (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>; - -def : Pat<(shl R16C:$rA, (i32 uimm7:$val)), - (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>; - -//===----------------------------------------------------------------------===// - -class SHLInst pattern>: - RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -multiclass ShiftLeftWord -{ - def v4i32: - SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - def r32: - SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; -} - -defm SHL: ShiftLeftWord; - -//===----------------------------------------------------------------------===// - -class SHLIInst pattern>: - RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val", - RotShiftVec, pattern>; - -multiclass ShiftLeftWordImm -{ - def v4i32: - SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), - [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>; - - def r32: - SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val), - [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>; -} - -defm SHLI : ShiftLeftWordImm; - -//===----------------------------------------------------------------------===// -// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit -// register) to the left. Vector form is here to ensure type correctness. -// -// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift -// of 7 bits is actually possible. -// -// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI -// to shift i64 and i128. SHLQBI is the residual left over after shifting by -// bytes with SHLQBY. - -class SHLQBIInst pattern>: - RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class SHLQBIVecInst: - SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>; - -class SHLQBIRegInst: - SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass ShiftLeftQuadByBits -{ - def v16i8: SHLQBIVecInst; - def v8i16: SHLQBIVecInst; - def v4i32: SHLQBIVecInst; - def v4f32: SHLQBIVecInst; - def v2i64: SHLQBIVecInst; - def v2f64: SHLQBIVecInst; - - def r128: SHLQBIRegInst; -} - -defm SHLQBI : ShiftLeftQuadByBits; - -// See note above on SHLQBI. In this case, the predicate actually does then -// enforcement, whereas with SHLQBI, we have to "take it on faith." -class SHLQBIIInst pattern>: - RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class SHLQBIIVecInst: - SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>; - -multiclass ShiftLeftQuadByBitsImm -{ - def v16i8 : SHLQBIIVecInst; - def v8i16 : SHLQBIIVecInst; - def v4i32 : SHLQBIIVecInst; - def v4f32 : SHLQBIIVecInst; - def v2i64 : SHLQBIIVecInst; - def v2f64 : SHLQBIIVecInst; -} - -defm SHLQBII : ShiftLeftQuadByBitsImm; - -// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes, -// not by bits. See notes above on SHLQBI. - -class SHLQBYInst pattern>: - RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class SHLQBYVecInst: - SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>; - -multiclass ShiftLeftQuadBytes -{ - def v16i8: SHLQBYVecInst; - def v8i16: SHLQBYVecInst; - def v4i32: SHLQBYVecInst; - def v4f32: SHLQBYVecInst; - def v2i64: SHLQBYVecInst; - def v2f64: SHLQBYVecInst; - def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), - [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>; -} - -defm SHLQBY: ShiftLeftQuadBytes; - -class SHLQBYIInst pattern>: - RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class SHLQBYIVecInst: - SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), - [(set (vectype VECREG:$rT), - (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; - -multiclass ShiftLeftQuadBytesImm -{ - def v16i8: SHLQBYIVecInst; - def v8i16: SHLQBYIVecInst; - def v4i32: SHLQBYIVecInst; - def v4f32: SHLQBYIVecInst; - def v2i64: SHLQBYIVecInst; - def v2f64: SHLQBYIVecInst; - def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val), - [(set GPRC:$rT, - (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>; -} - -defm SHLQBYI : ShiftLeftQuadBytesImm; - -class SHLQBYBIInst pattern>: - RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class SHLQBYBIVecInst: - SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern */]>; - -class SHLQBYBIRegInst: - SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass ShiftLeftQuadBytesBitCount -{ - def v16i8: SHLQBYBIVecInst; - def v8i16: SHLQBYBIVecInst; - def v4i32: SHLQBYBIVecInst; - def v4f32: SHLQBYBIVecInst; - def v2i64: SHLQBYBIVecInst; - def v2f64: SHLQBYBIVecInst; - - def r128: SHLQBYBIRegInst; -} - -defm SHLQBYBI : ShiftLeftQuadBytesBitCount; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate halfword: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -class ROTHInst pattern>: - RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -class ROTHVecInst: - ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>; - -class ROTHRegInst: - ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), - [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>; - -multiclass RotateLeftHalfword -{ - def v8i16: ROTHVecInst; - def r16: ROTHRegInst; -} - -defm ROTH: RotateLeftHalfword; - -def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate halfword, immediate: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -class ROTHIInst pattern>: - RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class ROTHIVecInst: - ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), - [(set (vectype VECREG:$rT), - (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>; - -multiclass RotateLeftHalfwordImm -{ - def v8i16: ROTHIVecInst; - def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), - [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>; - def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val), - [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>; -} - -defm ROTHI: RotateLeftHalfwordImm; - -def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)), - (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate word: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTInst pattern>: - RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -class ROTVecInst: - ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>; - -class ROTRegInst: - ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [(set rclass:$rT, - (rotl rclass:$rA, R32C:$rB))]>; - -multiclass RotateLeftWord -{ - def v4i32: ROTVecInst; - def r32: ROTRegInst; -} - -defm ROT: RotateLeftWord; - -// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or -// 32-bit register -def ROTr32_r16_anyext: - ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB), - [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>; - -def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))), - (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; - -def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))), - (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; - -def ROTr32_r8_anyext: - ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB), - [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>; - -def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))), - (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; - -def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), - (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate word, immediate -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTIInst pattern>: - RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class ROTIVecInst: - ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), - [(set (vectype VECREG:$rT), - (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>; - -class ROTIRegInst: - ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), - [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>; - -multiclass RotateLeftWordImm -{ - def v4i32: ROTIVecInst; - def v4i32_i16: ROTIVecInst; - def v4i32_i8: ROTIVecInst; - - def r32: ROTIRegInst; - def r32_i16: ROTIRegInst; - def r32_i8: ROTIRegInst; -} - -defm ROTI : RotateLeftWordImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad by byte (count) -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQBYInst pattern>: - RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQBYGenInst: - ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB), - [(set (type rc:$rT), - (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>; - -class ROTQBYVecInst: - ROTQBYGenInst; - -multiclass RotateQuadLeftByBytes -{ - def v16i8: ROTQBYVecInst; - def v8i16: ROTQBYVecInst; - def v4i32: ROTQBYVecInst; - def v4f32: ROTQBYVecInst; - def v2i64: ROTQBYVecInst; - def v2f64: ROTQBYVecInst; - def i128: ROTQBYGenInst; -} - -defm ROTQBY: RotateQuadLeftByBytes; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad by byte (count), immediate -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQBYIInst pattern>: - RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQBYIGenInst: - ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val), - [(set (type rclass:$rT), - (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>; - -class ROTQBYIVecInst: - ROTQBYIGenInst; - -multiclass RotateQuadByBytesImm -{ - def v16i8: ROTQBYIVecInst; - def v8i16: ROTQBYIVecInst; - def v4i32: ROTQBYIVecInst; - def v4f32: ROTQBYIVecInst; - def v2i64: ROTQBYIVecInst; - def vfi64: ROTQBYIVecInst; - def i128: ROTQBYIGenInst; -} - -defm ROTQBYI: RotateQuadByBytesImm; - -// See ROTQBY note above. -class ROTQBYBIInst pattern>: - RI7Form<0b00110011100, OOL, IOL, - "rotqbybi\t$rT, $rA, $shift", - RotShiftQuad, pattern>; - -class ROTQBYBIVecInst: - ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift), - [(set (vectype VECREG:$rT), - (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>; - -multiclass RotateQuadByBytesByBitshift { - def v16i8_r32: ROTQBYBIVecInst; - def v8i16_r32: ROTQBYBIVecInst; - def v4i32_r32: ROTQBYBIVecInst; - def v2i64_r32: ROTQBYBIVecInst; -} - -defm ROTQBYBI : RotateQuadByBytesByBitshift; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// See ROTQBY note above. -// -// Assume that the user of this instruction knows to shift the rotate count -// into bit 29 -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQBIInst pattern>: - RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQBIVecInst: - ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern yet */]>; - -class ROTQBIRegInst: - ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern yet */]>; - -multiclass RotateQuadByBitCount -{ - def v16i8: ROTQBIVecInst; - def v8i16: ROTQBIVecInst; - def v4i32: ROTQBIVecInst; - def v2i64: ROTQBIVecInst; - - def r128: ROTQBIRegInst; - def r64: ROTQBIRegInst; -} - -defm ROTQBI: RotateQuadByBitCount; - -class ROTQBIIInst pattern>: - RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQBIIVecInst: - ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), - [/* no pattern yet */]>; - -class ROTQBIIRegInst: - ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), - [/* no pattern yet */]>; - -multiclass RotateQuadByBitCountImm -{ - def v16i8: ROTQBIIVecInst; - def v8i16: ROTQBIIVecInst; - def v4i32: ROTQBIIVecInst; - def v2i64: ROTQBIIVecInst; - - def r128: ROTQBIIRegInst; - def r64: ROTQBIIRegInst; -} - -defm ROTQBII : RotateQuadByBitCountImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// ROTHM v8i16 form: -// NOTE(1): No vector rotate is generated by the C/C++ frontend (today), -// so this only matches a synthetically generated/lowered code -// fragment. -// NOTE(2): $rB must be negated before the right rotate! -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTHMInst pattern>: - RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -def ROTHMv8i16: - ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; - -// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left -// Note: This instruction doesn't match a pattern because rB must be negated -// for the instruction to work. Thus, the pattern below the instruction! - -def ROTHMr16: - ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - [/* see patterns below - $rB must be negated! */]>; - -def : Pat<(srl R16C:$rA, R32C:$rB), - (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(srl R16C:$rA, R16C:$rB), - (ROTHMr16 R16C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(srl R16C:$rA, R8C:$rB), - (ROTHMr16 R16C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; - -// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is -// that the immediate can be complemented, so that the user doesn't have to -// worry about it. - -class ROTHMIInst pattern>: - RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val", - RotShiftVec, pattern>; - -def ROTHMIv8i16: - ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), - [/* no pattern */]>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)), - (ROTHMIv8i16 VECREG:$rA, imm:$val)>; - -def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)), - (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)), - (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>; - -def ROTHMIr16: - ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val), - [/* no pattern */]>; - -def: Pat<(srl R16C:$rA, (i32 uimm7:$val)), - (ROTHMIr16 R16C:$rA, uimm7:$val)>; - -def: Pat<(srl R16C:$rA, (i16 uimm7:$val)), - (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), - (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -// ROTM v4i32 form: See the ROTHM v8i16 comments. -class ROTMInst pattern>: - RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB", - RotShiftVec, pattern>; - -def ROTMv4i32: - ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), - (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>; - -def ROTMr32: - ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(srl R32C:$rA, R32C:$rB), - (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(srl R32C:$rA, R16C:$rB), - (ROTMr32 R32C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(srl R32C:$rA, R8C:$rB), - (ROTMr32 R32C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; - -// ROTMI v4i32 form: See the comment for ROTHM v8i16. -def ROTMIv4i32: - RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - "rotmi\t$rT, $rA, $val", RotShiftVec, - [(set (v4i32 VECREG:$rT), - (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)), - (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)), - (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>; - -// ROTMI r32 form: know how to complement the immediate value. -def ROTMIr32: - RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val), - "rotmi\t$rT, $rA, $val", RotShiftVec, - [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>; - -def : Pat<(srl R32C:$rA, (i16 imm:$val)), - (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>; - -def : Pat<(srl R32C:$rA, (i8 imm:$val)), - (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// ROTQMBY: This is a vector form merely so that when used in an -// instruction pattern, type checking will succeed. This instruction assumes -// that the user knew to negate $rB. -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBYInst pattern>: - RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQMBYVecInst: - ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern, $rB must be negated */]>; - -class ROTQMBYRegInst: - ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass RotateQuadBytes -{ - def v16i8: ROTQMBYVecInst; - def v8i16: ROTQMBYVecInst; - def v4i32: ROTQMBYVecInst; - def v2i64: ROTQMBYVecInst; - - def r128: ROTQMBYRegInst; - def r64: ROTQMBYRegInst; -} - -defm ROTQMBY : RotateQuadBytes; - -def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB), - (ROTQMBYr128 GPRC:$rA, - (SFIr32 R32C:$rB, 0))>; - -class ROTQMBYIInst pattern>: - RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQMBYIVecInst: - ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - [/* no pattern */]>; - -class ROTQMBYIRegInst: - ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), - [/* no pattern */]>; - -// 128-bit zero extension form: -class ROTQMBYIZExtInst: - ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val), - [/* no pattern */]>; - -multiclass RotateQuadBytesImm -{ - def v16i8: ROTQMBYIVecInst; - def v8i16: ROTQMBYIVecInst; - def v4i32: ROTQMBYIVecInst; - def v2i64: ROTQMBYIVecInst; - - def r128: ROTQMBYIRegInst; - def r64: ROTQMBYIRegInst; - - def r128_zext_r8: ROTQMBYIZExtInst; - def r128_zext_r16: ROTQMBYIZExtInst; - def r128_zext_r32: ROTQMBYIZExtInst; - def r128_zext_r64: ROTQMBYIZExtInst; -} - -defm ROTQMBYI : RotateQuadBytesImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate right and mask by bit count -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBYBIInst pattern>: - RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQMBYBIVecInst: - ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern, */]>; - -multiclass RotateMaskQuadByBitCount -{ - def v16i8: ROTQMBYBIVecInst; - def v8i16: ROTQMBYBIVecInst; - def v4i32: ROTQMBYBIVecInst; - def v2i64: ROTQMBYBIVecInst; - def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), - [/*no pattern*/]>; -} - -defm ROTQMBYBI: RotateMaskQuadByBitCount; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad and mask by bits -// Note that the rotate amount has to be negated -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBIInst pattern>: - RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB", - RotShiftQuad, pattern>; - -class ROTQMBIVecInst: - ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [/* no pattern */]>; - -class ROTQMBIRegInst: - ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), - [/* no pattern */]>; - -multiclass RotateMaskQuadByBits -{ - def v16i8: ROTQMBIVecInst; - def v8i16: ROTQMBIVecInst; - def v4i32: ROTQMBIVecInst; - def v2i64: ROTQMBIVecInst; - - def r128: ROTQMBIRegInst; - def r64: ROTQMBIRegInst; -} - -defm ROTQMBI: RotateMaskQuadByBits; - -def : Pat<(srl GPRC:$rA, R32C:$rB), - (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA, - (SFIr32 R32C:$rB, 0)), - (SFIr32 R32C:$rB, 0))>; - - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Rotate quad and mask by bits, immediate -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class ROTQMBIIInst pattern>: - RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val", - RotShiftQuad, pattern>; - -class ROTQMBIIVecInst: - ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - [/* no pattern */]>; - -class ROTQMBIIRegInst: - ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), - [/* no pattern */]>; - -multiclass RotateMaskQuadByBitsImm -{ - def v16i8: ROTQMBIIVecInst; - def v8i16: ROTQMBIIVecInst; - def v4i32: ROTQMBIIVecInst; - def v2i64: ROTQMBIIVecInst; - - def r128: ROTQMBIIRegInst; - def r64: ROTQMBIIRegInst; -} - -defm ROTQMBII: RotateMaskQuadByBitsImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def ROTMAHv8i16: - RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "rotmah\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; - -def ROTMAHr16: - RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - "rotmah\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(sra R16C:$rA, R32C:$rB), - (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(sra R16C:$rA, R16C:$rB), - (ROTMAHr16 R16C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(sra R16C:$rA, R8C:$rB), - (ROTMAHr16 R16C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; - -def ROTMAHIv8i16: - RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), - "rotmahi\t$rT, $rA, $val", RotShiftVec, - [(set (v8i16 VECREG:$rT), - (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)), - (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)), - (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>; - -def ROTMAHIr16: - RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val), - "rotmahi\t$rT, $rA, $val", RotShiftVec, - [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>; - -def : Pat<(sra R16C:$rA, (i32 imm:$val)), - (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -def : Pat<(sra R16C:$rA, (i8 imm:$val)), - (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; - -def ROTMAv4i32: - RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "rotma\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), - (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>; - -def ROTMAr32: - RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "rotma\t$rT, $rA, $rB", RotShiftVec, - [/* see patterns below - $rB must be negated */]>; - -def : Pat<(sra R32C:$rA, R32C:$rB), - (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(sra R32C:$rA, R16C:$rB), - (ROTMAr32 R32C:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(sra R32C:$rA, R8C:$rB), - (ROTMAr32 R32C:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; - -class ROTMAIInst pattern>: - RRForm<0b01011110000, OOL, IOL, - "rotmai\t$rT, $rA, $val", - RotShiftVec, pattern>; - -class ROTMAIVecInst: - ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val), - [(set (vectype VECREG:$rT), - (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>; - -class ROTMAIRegInst: - ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val), - [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>; - -multiclass RotateMaskAlgebraicImm { - def v2i64_i32 : ROTMAIVecInst; - def v4i32_i32 : ROTMAIVecInst; - def r64_i32 : ROTMAIRegInst; - def r32_i32 : ROTMAIRegInst; -} - -defm ROTMAI : RotateMaskAlgebraicImm; - -//===----------------------------------------------------------------------===// -// Branch and conditionals: -//===----------------------------------------------------------------------===// - -let isTerminator = 1, isBarrier = 1 in { - // Halt If Equal (r32 preferred slot only, no vector form) - def HEQr32: - RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB), - "heq\t$rA, $rB", BranchResolv, - [/* no pattern to match */]>; - - def HEQIr32 : - RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val), - "heqi\t$rA, $val", BranchResolv, - [/* no pattern to match */]>; - - // HGT/HGTI: These instructions use signed arithmetic for the comparison, - // contrasting with HLGT/HLGTI, which use unsigned comparison: - def HGTr32: - RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB), - "hgt\t$rA, $rB", BranchResolv, - [/* no pattern to match */]>; - - def HGTIr32: - RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val), - "hgti\t$rA, $val", BranchResolv, - [/* no pattern to match */]>; - - def HLGTr32: - RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB), - "hlgt\t$rA, $rB", BranchResolv, - [/* no pattern to match */]>; - - def HLGTIr32: - RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val), - "hlgti\t$rA, $val", BranchResolv, - [/* no pattern to match */]>; -} - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// Comparison operators for i8, i16 and i32: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class CEQBInst pattern> : - RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpEqualByte -{ - def v16i8 : - CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r8 : - CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>; -} - -class CEQBIInst pattern> : - RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpEqualByteImm -{ - def v16i8 : - CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), - [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA), - v16i8SExt8Imm:$val))]>; - def r8: - CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), - [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>; -} - -class CEQHInst pattern> : - RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpEqualHalfword -{ - def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>; -} - -class CEQHIInst pattern> : - RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpEqualHalfwordImm -{ - def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (seteq (v8i16 VECREG:$rA), - (v8i16 v8i16SExt10Imm:$val)))]>; - def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>; -} - -class CEQInst pattern> : - RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpEqualWord -{ - def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - - def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>; -} - -class CEQIInst pattern> : - RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpEqualWordImm -{ - def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (seteq (v4i32 VECREG:$rA), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>; -} - -class CGTBInst pattern> : - RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpGtrByte -{ - def v16i8 : - CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r8 : - CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>; -} - -class CGTBIInst pattern> : - RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpGtrByteImm -{ - def v16i8 : - CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), - [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA), - v16i8SExt8Imm:$val))]>; - def r8: - CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), - [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>; -} - -class CGTHInst pattern> : - RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpGtrHalfword -{ - def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>; -} - -class CGTHIInst pattern> : - RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpGtrHalfwordImm -{ - def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (setgt (v8i16 VECREG:$rA), - (v8i16 v8i16SExt10Imm:$val)))]>; - def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>; -} - -class CGTInst pattern> : - RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpGtrWord -{ - def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - - def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>; -} - -class CGTIInst pattern> : - RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpGtrWordImm -{ - def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (setgt (v4i32 VECREG:$rA), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; - - // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: - def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), - [/* no pattern */]>; -} - -class CLGTBInst pattern> : - RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpLGtrByte -{ - def v16i8 : - CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r8 : - CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), - [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>; -} - -class CLGTBIInst pattern> : - RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpLGtrByteImm -{ - def v16i8 : - CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), - [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA), - v16i8SExt8Imm:$val))]>; - def r8: - CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), - [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>; -} - -class CLGTHInst pattern> : - RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpLGtrHalfword -{ - def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA), - (v8i16 VECREG:$rB)))]>; - - def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), - [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>; -} - -class CLGTHIInst pattern> : - RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpLGtrHalfwordImm -{ - def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v8i16 VECREG:$rT), - (setugt (v8i16 VECREG:$rA), - (v8i16 v8i16SExt10Imm:$val)))]>; - def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), - [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>; -} - -class CLGTInst pattern> : - RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB", - ByteOp, pattern>; - -multiclass CmpLGtrWord -{ - def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (v4i32 VECREG:$rT), - (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; - - def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>; -} - -class CLGTIInst pattern> : - RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val", - ByteOp, pattern>; - -multiclass CmpLGtrWordImm -{ - def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), - [(set (v4i32 VECREG:$rT), - (setugt (v4i32 VECREG:$rA), - (v4i32 v4i32SExt16Imm:$val)))]>; - - def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), - [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>; -} - -defm CEQB : CmpEqualByte; -defm CEQBI : CmpEqualByteImm; -defm CEQH : CmpEqualHalfword; -defm CEQHI : CmpEqualHalfwordImm; -defm CEQ : CmpEqualWord; -defm CEQI : CmpEqualWordImm; -defm CGTB : CmpGtrByte; -defm CGTBI : CmpGtrByteImm; -defm CGTH : CmpGtrHalfword; -defm CGTHI : CmpGtrHalfwordImm; -defm CGT : CmpGtrWord; -defm CGTI : CmpGtrWordImm; -defm CLGTB : CmpLGtrByte; -defm CLGTBI : CmpLGtrByteImm; -defm CLGTH : CmpLGtrHalfword; -defm CLGTHI : CmpLGtrHalfwordImm; -defm CLGT : CmpLGtrWord; -defm CLGTI : CmpLGtrWordImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// For SETCC primitives not supported above (setlt, setle, setge, etc.) -// define a pattern to generate the right code, as a binary operator -// (in a manner of speaking.) -// -// Notes: -// 1. This only matches the setcc set of conditionals. Special pattern -// matching is used for select conditionals. -// -// 2. The "DAG" versions of these classes is almost exclusively used for -// i64 comparisons. See the tblgen fundamentals documentation for what -// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern -// class for where ResultInstrs originates. -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class SETCCNegCondReg: - Pat<(cond rclass:$rA, rclass:$rB), - (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>; - -class SETCCNegCondImm: - Pat<(cond rclass:$rA, (inttype immpred:$imm)), - (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>; - -def : SETCCNegCondReg; -def : SETCCNegCondImm; - -def : SETCCNegCondReg; -def : SETCCNegCondImm; - -def : SETCCNegCondReg; -def : SETCCNegCondImm; - -class SETCCBinOpReg: - Pat<(cond rclass:$rA, rclass:$rB), - (binop (cmpOp1 rclass:$rA, rclass:$rB), - (cmpOp2 rclass:$rA, rclass:$rB))>; - -class SETCCBinOpImm: - Pat<(cond rclass:$rA, (immtype immpred:$imm)), - (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)), - (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setle R8C:$rA, R8C:$rB), - (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>; -def : Pat<(setle R8C:$rA, immU8:$imm), - (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setle R16C:$rA, R16C:$rB), - (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; -def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm), - (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setle R32C:$rA, R32C:$rB), - (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; -def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm), - (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setule R8C:$rA, R8C:$rB), - (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>; -def : Pat<(setule R8C:$rA, immU8:$imm), - (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setule R16C:$rA, R16C:$rB), - (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; -def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm), - (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; - -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : SETCCBinOpReg; -def : SETCCBinOpImm; -def : Pat<(setule R32C:$rA, R32C:$rB), - (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; -def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm), - (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// select conditional patterns: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -class SELECTNegCondReg: - Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rTrue, rclass:$rFalse, - (cmpare rclass:$rA, rclass:$rB))>; - -class SELECTNegCondImm: - Pat<(select (inttype (cond rclass:$rA, immpred:$imm)), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rTrue, rclass:$rFalse, - (cmpare rclass:$rA, immpred:$imm))>; - -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; - -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; - -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; -def : SELECTNegCondReg; -def : SELECTNegCondImm; - -class SELECTBinOpReg: - Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rFalse, rclass:$rTrue, - (binop (cmpOp1 rclass:$rA, rclass:$rB), - (cmpOp2 rclass:$rA, rclass:$rB)))>; - -class SELECTBinOpImm: - Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))), - rclass:$rTrue, rclass:$rFalse), - (selinstr rclass:$rFalse, rclass:$rTrue, - (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)), - (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -def : SELECTBinOpReg; -def : SELECTBinOpImm; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -let isCall = 1, - // All calls clobber the non-callee-saved registers: - Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, - R10,R11,R12,R13,R14,R15,R16,R17,R18,R19, - R20,R21,R22,R23,R24,R25,R26,R27,R28,R29, - R30,R31,R32,R33,R34,R35,R36,R37,R38,R39, - R40,R41,R42,R43,R44,R45,R46,R47,R48,R49, - R50,R51,R52,R53,R54,R55,R56,R57,R58,R59, - R60,R61,R62,R63,R64,R65,R66,R67,R68,R69, - R70,R71,R72,R73,R74,R75,R76,R77,R78,R79], - // All of these instructions use $lr (aka $0) - Uses = [R0] in { - // Branch relative and set link: Used if we actually know that the target - // is within [-32768, 32767] bytes of the target - def BRSL: - BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func), - "brsl\t$$lr, $func", - [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>; - - // Branch absolute and set link: Used if we actually know that the target - // is an absolute address - def BRASL: - BranchSetLink<0b011001100, (outs), (ins calltarget:$func), - "brasl\t$$lr, $func", - [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; - - // Branch indirect and set link if external data. These instructions are not - // actually generated, matched by an intrinsic: - def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>; - def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>; - def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>; - def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>; - - // Branch indirect and set link. This is the "X-form" address version of a - // function call - def BISL: - BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>; -} - -// Support calls to external symbols: -def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)), - (BRSL texternalsym:$func)>; - -def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), - (BRASL texternalsym:$func)>; - -// Unconditional branches: -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { - let isBarrier = 1 in { - def BR : - UncondBranch<0b001001100, (outs), (ins brtarget:$dest), - "br\t$dest", - [(br bb:$dest)]>; - - // Unconditional, absolute address branch - def BRA: - UncondBranch<0b001100000, (outs), (ins brtarget:$dest), - "bra\t$dest", - [/* no pattern */]>; - - // Indirect branch - let isIndirectBranch = 1 in { - def BI: - BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; - } - } - - // Conditional branches: - class BRNZInst pattern>: - RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest", - BranchResolv, pattern>; - - class BRNZRegInst: - BRNZInst<(ins rclass:$rCond, brtarget:$dest), - [(brcond rclass:$rCond, bb:$dest)]>; - - class BRNZVecInst: - BRNZInst<(ins VECREG:$rCond, brtarget:$dest), - [(brcond (vectype VECREG:$rCond), bb:$dest)]>; - - multiclass BranchNotZero { - def v4i32 : BRNZVecInst; - def r32 : BRNZRegInst; - } - - defm BRNZ : BranchNotZero; - - class BRZInst pattern>: - RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest", - BranchResolv, pattern>; - - class BRZRegInst: - BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; - - class BRZVecInst: - BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; - - multiclass BranchZero { - def v4i32: BRZVecInst; - def r32: BRZRegInst; - } - - defm BRZ: BranchZero; - - // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would - // be useful: - /* - class BINZInst pattern>: - BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>; - - class BINZRegInst: - BINZInst<(ins rclass:$rA, brtarget:$dest), - [(brcond rclass:$rA, R32C:$dest)]>; - - class BINZVecInst: - BINZInst<(ins VECREG:$rA, R32C:$dest), - [(brcond (vectype VECREG:$rA), R32C:$dest)]>; - - multiclass BranchNotZeroIndirect { - def v4i32: BINZVecInst; - def r32: BINZRegInst; - } - - defm BINZ: BranchNotZeroIndirect; - - class BIZInst pattern>: - BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>; - - class BIZRegInst: - BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>; - - class BIZVecInst: - BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>; - - multiclass BranchZeroIndirect { - def v4i32: BIZVecInst; - def r32: BIZRegInst; - } - - defm BIZ: BranchZeroIndirect; - */ - - class BRHNZInst pattern>: - RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv, - pattern>; - - class BRHNZRegInst: - BRHNZInst<(ins rclass:$rCond, brtarget:$dest), - [(brcond rclass:$rCond, bb:$dest)]>; - - class BRHNZVecInst: - BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>; - - multiclass BranchNotZeroHalfword { - def v8i16: BRHNZVecInst; - def r16: BRHNZRegInst; - } - - defm BRHNZ: BranchNotZeroHalfword; - - class BRHZInst pattern>: - RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv, - pattern>; - - class BRHZRegInst: - BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; - - class BRHZVecInst: - BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; - - multiclass BranchZeroHalfword { - def v8i16: BRHZVecInst; - def r16: BRHZRegInst; - } - - defm BRHZ: BranchZeroHalfword; -} - -//===----------------------------------------------------------------------===// -// setcc and brcond patterns: -//===----------------------------------------------------------------------===// - -def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), - (BRHZr16 R16C:$rA, bb:$dest)>; -def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), - (BRHNZr16 R16C:$rA, bb:$dest)>; - -def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), - (BRZr32 R32C:$rA, bb:$dest)>; -def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), - (BRNZr32 R32C:$rA, bb:$dest)>; - -multiclass BranchCondEQ -{ - def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - - def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; -} - -defm BRCONDeq : BranchCondEQ; -defm BRCONDne : BranchCondEQ; - -multiclass BranchCondLGT -{ - def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - - def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; -} - -defm BRCONDugt : BranchCondLGT; -defm BRCONDule : BranchCondLGT; - -multiclass BranchCondLGTEQ -{ - def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), - (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), - bb:$dest)>; - - def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB), - (CEQHr16 R16C:$rA, R16:$rB)), - bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), - (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), - bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB), - (CEQr32 R32C:$rA, R32C:$rB)), - bb:$dest)>; -} - -defm BRCONDuge : BranchCondLGTEQ; -defm BRCONDult : BranchCondLGTEQ; - -multiclass BranchCondGT -{ - def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; - - def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; -} - -defm BRCONDgt : BranchCondGT; -defm BRCONDle : BranchCondGT; - -multiclass BranchCondGTEQ -{ - def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), - (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), - (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), - bb:$dest)>; - - def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), - (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB), - (CEQHr16 R16C:$rA, R16:$rB)), - bb:$dest)>; - - def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), - (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), - (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), - bb:$dest)>; - - def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), - (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB), - (CEQr32 R32C:$rA, R32C:$rB)), - bb:$dest)>; -} - -defm BRCONDge : BranchCondGTEQ; -defm BRCONDlt : BranchCondGTEQ; - -let isTerminator = 1, isBarrier = 1 in { - let isReturn = 1 in { - def RET: - RETForm<"bi\t$$lr", [(retflag)]>; - } -} - -//===----------------------------------------------------------------------===// -// Single precision floating point instructions -//===----------------------------------------------------------------------===// - -class FAInst pattern>: - RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", - SPrecFP, pattern>; - -class FAVecInst: - FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; - -multiclass SFPAdd -{ - def v4f32: FAVecInst; - def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; -} - -defm FA : SFPAdd; - -class FSInst pattern>: - RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB", - SPrecFP, pattern>; - -class FSVecInst: - FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (vectype VECREG:$rT), - (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; - -multiclass SFPSub -{ - def v4f32: FSVecInst; - def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; -} - -defm FS : SFPSub; - -class FMInst pattern>: - RRForm<0b01100011010, OOL, IOL, - "fm\t$rT, $rA, $rB", SPrecFP, - pattern>; - -class FMVecInst: - FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - [(set (type VECREG:$rT), - (fmul (type VECREG:$rA), (type VECREG:$rB)))]>; - -multiclass SFPMul -{ - def v4f32: FMVecInst; - def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; -} - -defm FM : SFPMul; - -// Floating point multiply and add -// e.g. d = c + (a * b) -def FMAv4f32: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fadd (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; - -def FMAf32: - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -// FP multiply and subtract -// Subtracts value in rC from product -// res = a * b - c -def FMSv4f32 : - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (v4f32 VECREG:$rC)))]>; - -def FMSf32 : - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, - (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; - -// Floating Negative Mulitply and Subtract -// Subtracts product from value in rC -// res = fneg(fms a b c) -// = - (a * b - c) -// = c - a * b -// NOTE: subtraction order -// fsub a b = a - b -// fs a b = b - a? -def FNMSf32 : - RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -def FNMSv4f32 : - RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB))))]>; - - - - -// Floating point reciprocal estimate - -class FRESTInst: - RRForm_1<0b00110111000, OOL, IOL, - "frest\t$rT, $rA", SPrecFP, - [/* no pattern */]>; - -def FRESTv4f32 : - FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; - -def FRESTf32 : - FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; - -// Floating point interpolate (used in conjunction with reciprocal estimate) -def FIv4f32 : - RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fi\t$rT, $rA, $rB", SPrecFP, - [/* no pattern */]>; - -def FIf32 : - RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - "fi\t$rT, $rA, $rB", SPrecFP, - [/* no pattern */]>; - -//-------------------------------------------------------------------------- -// Basic single precision floating point comparisons: -// -// Note: There is no support on SPU for single precision NaN. Consequently, -// ordered and unordered comparisons are the same. -//-------------------------------------------------------------------------- - -def FCEQf32 : - RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fceq\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>; - -def : Pat<(setoeq R32FP:$rA, R32FP:$rB), - (FCEQf32 R32FP:$rA, R32FP:$rB)>; - -def FCMEQf32 : - RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fcmeq\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; - -def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)), - (FCMEQf32 R32FP:$rA, R32FP:$rB)>; - -def FCGTf32 : - RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fcgt\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>; - -def : Pat<(setogt R32FP:$rA, R32FP:$rB), - (FCGTf32 R32FP:$rA, R32FP:$rB)>; - -def FCMGTf32 : - RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), - "fcmgt\t$rT, $rA, $rB", SPrecFP, - [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; - -def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)), - (FCMGTf32 R32FP:$rA, R32FP:$rB)>; - -//-------------------------------------------------------------------------- -// Single precision floating point comparisons and SETCC equivalents: -//-------------------------------------------------------------------------- - -def : SETCCNegCondReg; -def : SETCCNegCondReg; - -def : SETCCBinOpReg; -def : SETCCBinOpReg; - -def : SETCCBinOpReg; -def : SETCCBinOpReg; - -def : Pat<(setule R32FP:$rA, R32FP:$rB), - (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; -def : Pat<(setole R32FP:$rA, R32FP:$rB), - (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; - -// FP Status and Control Register Write -// Why isn't rT a don't care in the ISA? -// Should we create a special RRForm_3 for this guy and zero out the rT? -def FSCRWf32 : - RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA), - "fscrwr\t$rA", SPrecFP, - [/* This instruction requires an intrinsic. Note: rT is unused. */]>; - -// FP Status and Control Register Read -def FSCRRf32 : - RRForm_2<0b01011101110, (outs R32FP:$rT), (ins), - "fscrrd\t$rT", SPrecFP, - [/* This instruction requires an intrinsic */]>; - -// llvm instruction space -// How do these map onto cell instructions? -// fdiv rA rB -// frest rC rB # c = 1/b (both lines) -// fi rC rB rC -// fm rD rA rC # d = a * 1/b -// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world -// fma rB rB rC rD # b = b * c + d -// = -(d *b -a) * c + d -// = a * c - c ( a *b *c - a) - -// fcopysign (???) - -// Library calls: -// These llvm instructions will actually map to library calls. -// All that's needed, then, is to check that the appropriate library is -// imported and do a brsl to the proper function name. -// frem # fmod(x, y): x - (x/y) * y -// (Note: fmod(double, double), fmodf(float,float) -// fsqrt? -// fsin? -// fcos? -// Unimplemented SPU instruction space -// floating reciprocal absolute square root estimate (frsqest) - -// The following are probably just intrinsics -// status and control register write -// status and control register read - -//-------------------------------------- -// Floating Point Conversions -// Signed conversions: -def CSiFv4f32: - CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "csflt\t$rT, $rA, 0", SPrecFP, - [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>; - -// Convert signed integer to floating point -def CSiFf32 : - CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA), - "csflt\t$rT, $rA, 0", SPrecFP, - [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>; - -// Convert unsigned into to float -def CUiFv4f32 : - CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "cuflt\t$rT, $rA, 0", SPrecFP, - [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>; - -def CUiFf32 : - CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA), - "cuflt\t$rT, $rA, 0", SPrecFP, - [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>; - -// Convert float to unsigned int -// Assume that scale = 0 - -def CFUiv4f32 : - CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "cfltu\t$rT, $rA, 0", SPrecFP, - [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>; - -def CFUif32 : - CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), - "cfltu\t$rT, $rA, 0", SPrecFP, - [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>; - -// Convert float to signed int -// Assume that scale = 0 - -def CFSiv4f32 : - CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), - "cflts\t$rT, $rA, 0", SPrecFP, - [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>; - -def CFSif32 : - CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), - "cflts\t$rT, $rA, 0", SPrecFP, - [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>; - -//===----------------------------------------------------------------------==// -// Single<->Double precision conversions -//===----------------------------------------------------------------------==// - -// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a -// v4f32, output is v2f64--which goes in the name?) - -// Floating point extend single to double -// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it -// operates on two double-word slots (i.e. 1st and 3rd fp numbers -// are ignored). -def FESDvec : - RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA), - "fesd\t$rT, $rA", SPrecFP, - [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>; - -def FESDf32 : - RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA), - "fesd\t$rT, $rA", SPrecFP, - [(set R64FP:$rT, (fextend R32FP:$rA))]>; - -// Floating point round double to single -//def FRDSvec : -// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA), -// "frds\t$rT, $rA,", SPrecFP, -// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>; - -def FRDSf64 : - RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA), - "frds\t$rT, $rA", SPrecFP, - [(set R32FP:$rT, (fround R64FP:$rA))]>; - -//ToDo include anyextend? - -//===----------------------------------------------------------------------==// -// Double precision floating point instructions -//===----------------------------------------------------------------------==// -def FAf64 : - RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - "dfa\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>; - -def FAv2f64 : - RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfa\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; - -def FSf64 : - RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - "dfs\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>; - -def FSv2f64 : - RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfs\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; - -def FMf64 : - RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), - "dfm\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>; - -def FMv2f64: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "dfm\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; - -def FMAf64: - RRForm<0b00111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfma\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FMAv2f64: - RRForm<0b00111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfma\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fadd (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FMSf64 : - RRForm<0b10111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfms\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FMSv2f64 : - RRForm<0b10111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfms\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)), - (v2f64 VECREG:$rC)))]>; - -// DFNMS: - (a * b - c) -// - (a * b) + c => c - (a * b) - -class DFNMSInst pattern>: - RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB", - DPrecFP, pattern>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -class DFNMSVecInst pattern>: - DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - pattern>; - -class DFNMSRegInst pattern>: - DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - pattern>; - -multiclass DFMultiplySubtract -{ - def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), - (fsub (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB))))]>; - - def f64 : DFNMSRegInst<[(set R64FP:$rT, - (fsub R64FP:$rC, - (fmul R64FP:$rA, R64FP:$rB)))]>; -} - -defm DFNMS : DFMultiplySubtract; - -// - (a * b + c) -// - (a * b) - c -def FNMAf64 : - RRForm<0b11111010110, (outs R64FP:$rT), - (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), - "dfnma\t$rT, $rA, $rB", DPrecFP, - [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -def FNMAv2f64 : - RRForm<0b11111010110, (outs VECREG:$rT), - (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "dfnma\t$rT, $rA, $rB", DPrecFP, - [(set (v2f64 VECREG:$rT), - (fneg (fadd (v2f64 VECREG:$rC), - (fmul (v2f64 VECREG:$rA), - (v2f64 VECREG:$rB)))))]>, - RegConstraint<"$rC = $rT">, - NoEncode<"$rC">; - -//===----------------------------------------------------------------------==// -// Floating point negation and absolute value -//===----------------------------------------------------------------------==// - -def : Pat<(fneg (v4f32 VECREG:$rA)), - (XORfnegvec (v4f32 VECREG:$rA), - (v4f32 (ILHUv4i32 0x8000)))>; - -def : Pat<(fneg R32FP:$rA), - (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>; - -// Floating point absolute value -// Note: f64 fabs is custom-selected. - -def : Pat<(fabs R32FP:$rA), - (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>; - -def : Pat<(fabs (v4f32 VECREG:$rA)), - (ANDfabsvec (v4f32 VECREG:$rA), - (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>; - -//===----------------------------------------------------------------------===// -// Hint for branch instructions: -//===----------------------------------------------------------------------===// -def HBRA : - HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">; - -//===----------------------------------------------------------------------===// -// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong -// in the odd pipeline) -//===----------------------------------------------------------------------===// - -def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> { - let Pattern = []; - - let Inst{0-10} = 0b10000000010; - let Inst{11-17} = 0; - let Inst{18-24} = 0; - let Inst{25-31} = 0; -} - -def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> { - let Pattern = []; - - let Inst{0-10} = 0b10000000000; - let Inst{11-17} = 0; - let Inst{18-24} = 0; - let Inst{25-31} = 0; -} - -//===----------------------------------------------------------------------===// -// Bit conversions (type conversions between vector/packed types) -// NOTE: Promotions are handled using the XS* instructions. -//===----------------------------------------------------------------------===// -def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>; - -def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>; - -def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>; - -def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>; -def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>; - -def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>; - -def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>; - -def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; -def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))), - (COPY_TO_REGCLASS VECREG:$src, GPRC)>; - -def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))), - (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))), - (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))), - (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))), - (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))), - (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; -def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))), - (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; - -def : Pat<(i32 (bitconvert R32FP:$rA)), - (COPY_TO_REGCLASS R32FP:$rA, R32C)>; - -def : Pat<(f32 (bitconvert R32C:$rA)), - (COPY_TO_REGCLASS R32C:$rA, R32FP)>; - -def : Pat<(i64 (bitconvert R64FP:$rA)), - (COPY_TO_REGCLASS R64FP:$rA, R64C)>; - -def : Pat<(f64 (bitconvert R64C:$rA)), - (COPY_TO_REGCLASS R64C:$rA, R64FP)>; - - -//===----------------------------------------------------------------------===// -// Instruction patterns: -//===----------------------------------------------------------------------===// - -// General 32-bit constants: -def : Pat<(i32 imm:$imm), - (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>; - -// Single precision float constants: -def : Pat<(f32 fpimm:$imm), - (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>; - -// General constant 32-bit vectors -def : Pat<(v4i32 v4i32Imm:$imm), - (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), - (LO16_vec v4i32Imm:$imm))>; - -// 8-bit constants -def : Pat<(i8 imm:$imm), - (ILHr8 imm:$imm)>; - -//===----------------------------------------------------------------------===// -// Zero/Any/Sign extensions -//===----------------------------------------------------------------------===// - -// sext 8->32: Sign extend bytes to words -def : Pat<(sext_inreg R32C:$rSrc, i8), - (XSHWr32 (XSBHr32 R32C:$rSrc))>; - -def : Pat<(i32 (sext R8C:$rSrc)), - (XSHWr16 (XSBHr8 R8C:$rSrc))>; - -// sext 8->64: Sign extend bytes to double word -def : Pat<(sext_inreg R64C:$rSrc, i8), - (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>; - -def : Pat<(i64 (sext R8C:$rSrc)), - (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>; - -// zext 8->16: Zero extend bytes to halfwords -def : Pat<(i16 (zext R8C:$rSrc)), - (ANDHIi8i16 R8C:$rSrc, 0xff)>; - -// zext 8->32: Zero extend bytes to words -def : Pat<(i32 (zext R8C:$rSrc)), - (ANDIi8i32 R8C:$rSrc, 0xff)>; - -// zext 8->64: Zero extend bytes to double words -def : Pat<(i64 (zext R8C:$rSrc)), - (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32 - (COPY_TO_REGCLASS - (ANDIi8i32 R8C:$rSrc,0xff), VECREG), - 0x4), - (ILv4i32 0x0), - (FSMBIv4i32 0x0f0f)), R64C)>; - -// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits -def : Pat<(i16 (anyext R8C:$rSrc)), - (ORHIi8i16 R8C:$rSrc, 0)>; - -// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits -def : Pat<(i32 (anyext R8C:$rSrc)), - (COPY_TO_REGCLASS R8C:$rSrc, R32C)>; - -// sext 16->64: Sign extend halfword to double word -def : Pat<(sext_inreg R64C:$rSrc, i16), - (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>; - -def : Pat<(sext R16C:$rSrc), - (XSWDr64 (XSHWr16 R16C:$rSrc))>; - -// zext 16->32: Zero extend halfwords to words -def : Pat<(i32 (zext R16C:$rSrc)), - (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>; - -def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))), - (ANDIi16i32 R16C:$rSrc, 0xf)>; - -def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))), - (ANDIi16i32 R16C:$rSrc, 0xff)>; - -def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))), - (ANDIi16i32 R16C:$rSrc, 0xfff)>; - -// anyext 16->32: Extend 16->32 bits, irrespective of sign -def : Pat<(i32 (anyext R16C:$rSrc)), - (COPY_TO_REGCLASS R16C:$rSrc, R32C)>; - -//===----------------------------------------------------------------------===// -// Truncates: -// These truncates are for the SPU's supported types (i8, i16, i32). i64 and -// above are custom lowered. -//===----------------------------------------------------------------------===// - -def : Pat<(i8 (trunc GPRC:$src)), - (COPY_TO_REGCLASS - (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>; - -def : Pat<(i8 (trunc R64C:$src)), - (COPY_TO_REGCLASS - (SHUFBv2i64_m32 - (COPY_TO_REGCLASS R64C:$src, VECREG), - (COPY_TO_REGCLASS R64C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>; - -def : Pat<(i8 (trunc R32C:$src)), - (COPY_TO_REGCLASS - (SHUFBv4i32_m32 - (COPY_TO_REGCLASS R32C:$src, VECREG), - (COPY_TO_REGCLASS R32C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; - -def : Pat<(i8 (trunc R16C:$src)), - (COPY_TO_REGCLASS - (SHUFBv4i32_m32 - (COPY_TO_REGCLASS R16C:$src, VECREG), - (COPY_TO_REGCLASS R16C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; - -def : Pat<(i16 (trunc GPRC:$src)), - (COPY_TO_REGCLASS - (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>; - -def : Pat<(i16 (trunc R64C:$src)), - (COPY_TO_REGCLASS - (SHUFBv2i64_m32 - (COPY_TO_REGCLASS R64C:$src, VECREG), - (COPY_TO_REGCLASS R64C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>; - -def : Pat<(i16 (trunc R32C:$src)), - (COPY_TO_REGCLASS - (SHUFBv4i32_m32 - (COPY_TO_REGCLASS R32C:$src, VECREG), - (COPY_TO_REGCLASS R32C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>; - -def : Pat<(i32 (trunc GPRC:$src)), - (COPY_TO_REGCLASS - (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>; - -def : Pat<(i32 (trunc R64C:$src)), - (COPY_TO_REGCLASS - (SHUFBv2i64_m32 - (COPY_TO_REGCLASS R64C:$src, VECREG), - (COPY_TO_REGCLASS R64C:$src, VECREG), - (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>; - -//===----------------------------------------------------------------------===// -// Address generation: SPU, like PPC, has to split addresses into high and -// low parts in order to load them into a register. -//===----------------------------------------------------------------------===// - -def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; -def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>; -def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; -def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; - -def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0), - (SPUlo tglobaladdr:$in, 0)), - (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; - -def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0), - (SPUlo texternalsym:$in, 0)), - (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; - -def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0), - (SPUlo tjumptable:$in, 0)), - (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; - -def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), - (SPUlo tconstpool:$in, 0)), - (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; - -def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), - (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; - -def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)), - (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; - -def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)), - (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; - -def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), - (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; - -// Intrinsics: -include "CellSDKIntrinsics.td" -// Various math operator instruction sequences -include "SPUMathInstr.td" -// 64-bit "instructions"/support -include "SPU64InstrInfo.td" -// 128-bit "instructions"/support -include "SPU128InstrInfo.td" diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/CellSPU/SPUMachineFunction.cpp deleted file mode 100644 index 3e948d071d..0000000000 --- a/lib/Target/CellSPU/SPUMachineFunction.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "SPUMachineFunction.h" - -using namespace llvm; - -void SPUFunctionInfo::anchor() { } diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h deleted file mode 100644 index 399684bb08..0000000000 --- a/lib/Target/CellSPU/SPUMachineFunction.h +++ /dev/null @@ -1,50 +0,0 @@ -//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_MACHINE_FUNCTION_INFO_H -#define SPU_MACHINE_FUNCTION_INFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// SPUFunctionInfo - Cell SPU target-specific information for each -/// MachineFunction -class SPUFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - - /// UsesLR - Indicates whether LR is used in the current function. - /// - bool UsesLR; - - // VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; - -public: - SPUFunctionInfo(MachineFunction& MF) - : UsesLR(false), - VarArgsFrameIndex(0) - {} - - void setUsesLR(bool U) { UsesLR = U; } - bool usesLR() { return UsesLR; } - - int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } - void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } -}; - -} // end of namespace llvm - - -#endif - diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td deleted file mode 100644 index 9a5c3976af..0000000000 --- a/lib/Target/CellSPU/SPUMathInstr.td +++ /dev/null @@ -1,97 +0,0 @@ -//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===// -// -// Cell SPU math operations -// -// This target description file contains instruction sequences for various -// math operations, such as vector multiplies, i32 multiply, etc., for the -// SPU's i32, i16 i8 and corresponding vector types. -// -// Any resemblance to libsimdmath or the Cell SDK simdmath library is -// purely and completely coincidental. -//===----------------------------------------------------------------------===// - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v16i8 multiply instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), - (ORv4i32 - (ANDv4i32 - (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), - (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), - (ROTMAHIv8i16 VECREG:$rB, 8)), 8), - (FSMBIv8i16 0x2222)), - (ILAv4i32 0x0000ffff)), - (SHLIv4i32 - (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), - (ROTMAIv4i32_i32 VECREG:$rB, 16)), - (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), - (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), - (FSMBIv8i16 0x2222)), 16))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v8i16 multiply instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), - (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), - (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), - (FSMBIv8i16 0xcccc))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v4i32, i32 multiply instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -def MPYv4i32: - Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), - (Av4i32 - (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), - (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), - (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; - -def MPYi32: - Pat<(mul R32C:$rA, R32C:$rB), - (Ar32 - (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), - (MPYHr32 R32C:$rB, R32C:$rA)), - (MPYUr32 R32C:$rA, R32C:$rB))>; - -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// f32, v4f32 divide instruction sequence: -//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ - -// Reciprocal estimate and interpolation -def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; -// Division estimate -def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; -// Newton-Raphson iteration -def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), - Interpf32.Fragment, - DivEstf32.Fragment)>; -// Epsilon addition -def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; - -def : Pat<(fdiv R32FP:$rA, R32FP:$rB), - (SELBf32_cond NRaphf32.Fragment, - Epsilonf32.Fragment, - (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; - -// Reciprocal estimate and interpolation -def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; -// Division estimate -def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; -// Newton-Raphson iteration -def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, - (v4f32 VECREG:$rB), - (v4f32 VECREG:$rA)), - Interpv4f32.Fragment, - DivEstv4f32.Fragment)>; -// Epsilon addition -def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; - -def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (SELBv4f32_cond NRaphv4f32.Fragment, - Epsilonv4f32.Fragment, - (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), - Epsilonv4f32.Fragment, - (v4f32 VECREG:$rA)), -1))>; diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td deleted file mode 100644 index a47e9ef016..0000000000 --- a/lib/Target/CellSPU/SPUNodes.td +++ /dev/null @@ -1,159 +0,0 @@ -//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Type profiles and SelectionDAG nodes used by CellSPU -// -//===----------------------------------------------------------------------===// - -// Type profile for a call sequence -def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; - -// SPU_GenControl: Type profile for generating control words for insertions -def SPU_GenControl : SDTypeProfile<1, 1, []>; -def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; - -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; -//===----------------------------------------------------------------------===// -// Operand constraints: -//===----------------------------------------------------------------------===// - -def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -// Operand type constraints for vector shuffle/permute operations -def SDT_SPUshuffle : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> -]>; - -// Vector binary operator type constraints (needs a further constraint to -// ensure that operand 0 is a vector...): - -def SPUVecBinop: SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> -]>; - -// Trinary operators, e.g., addx, carry generate -def SPUIntTrinaryOp : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> -]>; - -// SELECT_MASK type constraints: There are several variations for the various -// vector types (this avoids having to bit_convert all over the place.) -def SPUselmask_type: SDTypeProfile<1, 1, [ - SDTCisInt<1> -]>; - -// SELB type constraints: -def SPUselb_type: SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>; - -// SPU Vector shift pseudo-instruction type constraints -def SPUvecshift_type: SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisInt<2>]>; - -// "marker" type for i64 operators that need a shuffle mask -// (i.e., uses cg or bg or another instruction that needs to -// use shufb to get things in the right place.) -// Op0: The result -// Op1, 2: LHS, RHS -// Op3: Carry-generate shuffle mask - -def SPUmarker_type : SDTypeProfile<1, 3, [ - SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; - -//===----------------------------------------------------------------------===// -// Synthetic/pseudo-instructions -//===----------------------------------------------------------------------===// - -// SPU CNTB: -def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; - -// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see -// SPUISelLowering.h): -def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; - -// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): -def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>; -def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>; -def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>; - -def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; -def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; - -// Vector rotate left, bits shifted out of the left are rotated in on the right -def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", - SPUvecshift_type, []>; - -// Vector rotate left by bytes, but the count is given in bits and the SPU -// internally converts it to bytes (saves an instruction to mask off lower -// three bits) -def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS", - SPUvecshift_type>; - -// Shift entire quad left by bytes/bits. Zeros are shifted in on the right -// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128 -def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>; -def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>; -def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>; - -// SPU form select mask for bytes, immediate -def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; - -// SPU select bits instruction -def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; - -def SDTprefslot2vec: SDTypeProfile<1, 1, []>; -def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; - -def SPU_vec_demote : SDTypeProfile<1, 1, []>; -def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>; - -// Address high and low components, used for [r+r] type addressing -def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>; -def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; - -// PC-relative address -def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; - -// A-Form local store addresses -def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; - -// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses -def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; - -// i64 markers: supplies extra operands used to generate the i64 operator -// instruction sequences -def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>; -def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>; -def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>; - -//===----------------------------------------------------------------------===// -// Constraints: (taken from PPCInstrInfo.td) -//===----------------------------------------------------------------------===// - -class RegConstraint { - string Constraints = C; -} - -class NoEncode { - string DisableEncoding = E; -} - -//===----------------------------------------------------------------------===// -// Return (flag isn't quite what it means: the operations are flagged so that -// instruction scheduling doesn't disassociate them.) -//===----------------------------------------------------------------------===// - -def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp deleted file mode 100644 index 7c58041e3b..0000000000 --- a/lib/Target/CellSPU/SPUNopFiller.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The final pass just before assembly printing. This pass is the last -// checkpoint where nops and lnops are added to the instruction stream to -// satisfy the dual issue requirements. The actual dual issue scheduling is -// done (TODO: nowhere, currently) -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - struct SPUNopFiller : public MachineFunctionPass { - - TargetMachine &TM; - const TargetInstrInfo *TII; - const InstrItineraryData *IID; - bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd - - static char ID; - SPUNopFiller(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), - IID(tm.getInstrItineraryData()) - { - DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; ); - } - - virtual const char *getPassName() const { - return "SPU nop/lnop Filler"; - } - - void runOnMachineBasicBlock(MachineBasicBlock &MBB); - - bool runOnMachineFunction(MachineFunction &F) { - isEvenPlace = true; //all functions get an .align 3 directive at start - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - runOnMachineBasicBlock(*FI); - return true; //never-ever do any more modifications, just print it! - } - - typedef enum { none = 0, // no more instructions in this function / BB - pseudo = 1, // this does not get executed - even = 2, - odd = 3 } SPUOpPlace; - SPUOpPlace getOpPlacement( MachineInstr &instr ); - - }; - char SPUNopFiller::ID = 0; - -} - -// Fill a BasicBlock to alignment. -// In the assebly we align the functions to 'even' adresses, but -// basic blocks have an implicit alignmnet. We hereby define -// basic blocks to have the same, even, alignment. -void SPUNopFiller:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) -{ - assert( isEvenPlace && "basic block start from odd address"); - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - { - SPUOpPlace this_optype, next_optype; - MachineBasicBlock::iterator J = I; - J++; - - this_optype = getOpPlacement( *I ); - next_optype = none; - while (J!=MBB.end()){ - next_optype = getOpPlacement( *J ); - ++J; - if (next_optype != pseudo ) - break; - } - - // padd: odd(wrong), even(wrong), ... - // to: nop(corr), odd(corr), even(corr)... - if( isEvenPlace && this_optype == odd && next_optype == even ) { - DEBUG( dbgs() <<"Adding NOP before: "; ); - DEBUG( I->dump(); ); - BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP)); - isEvenPlace=false; - } - - // padd: even(wrong), odd(wrong), ... - // to: lnop(corr), even(corr), odd(corr)... - else if ( !isEvenPlace && this_optype == even && next_optype == odd){ - DEBUG( dbgs() <<"Adding LNOP before: "; ); - DEBUG( I->dump(); ); - BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP)); - isEvenPlace=true; - } - - // now go to next mem slot - if( this_optype != pseudo ) - isEvenPlace = !isEvenPlace; - - } - - // padd basicblock end - if( !isEvenPlace ){ - MachineBasicBlock::iterator J = MBB.end(); - J--; - if (getOpPlacement( *J ) == odd) { - DEBUG( dbgs() <<"Padding basic block with NOP\n"; ); - BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP)); - } - else { - J++; - DEBUG( dbgs() <<"Padding basic block with LNOP\n"; ); - BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP)); - } - isEvenPlace=true; - } -} - -FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) { - return new SPUNopFiller(tm); -} - -// Figure out if 'instr' is executed in the even or odd pipeline -SPUNopFiller::SPUOpPlace -SPUNopFiller::getOpPlacement( MachineInstr &instr ) { - int sc = instr.getDesc().getSchedClass(); - const InstrStage *stage = IID->beginStage(sc); - unsigned FUs = stage->getUnits(); - SPUOpPlace retval; - - switch( FUs ) { - case 0: retval = pseudo; break; - case 1: retval = odd; break; - case 2: retval = even; break; - default: retval= pseudo; - assert( false && "got unknown FuncUnit\n"); - break; - }; - return retval; -} diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td deleted file mode 100644 index 6f8deef553..0000000000 --- a/lib/Target/CellSPU/SPUOperands.td +++ /dev/null @@ -1,664 +0,0 @@ -//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cell SPU Instruction Operands: -//===----------------------------------------------------------------------===// - -// TO_IMM32 - Convert an i8/i16 to i32. -def TO_IMM32 : SDNodeXFormgetZExtValue()); -}]>; - -// TO_IMM16 - Convert an i8/i32 to i16. -def TO_IMM16 : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i16); -}]>; - - -def LO16 : SDNodeXFormgetZExtValue(); - // Transformation function: get the low 16 bits. - return getI32Imm(val & 0xffff); -}]>; - -def LO16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR - && "LO16_vec got something other than a BUILD_VECTOR"); - - // Get first constant operand... - for (unsigned i = 0, e = N->getNumOperands(); - OpVal.getNode() == 0 && i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) - OpVal = N->getOperand(i); - } - - assert(OpVal.getNode() != 0 && "LO16_vec did not locate a node"); - ConstantSDNode *CN = cast(OpVal); - return getI32Imm((unsigned)CN->getZExtValue() & 0xffff); -}]>; - -// Transform an immediate, returning the high 16 bits shifted down: -def HI16 : SDNodeXFormgetZExtValue() >> 16); -}]>; - -// Transformation function: shift the high 16 bit immediate from a build_vector -// node into the low 16 bits, and return a 16-bit constant. -def HI16_vec : SDNodeXFormgetOpcode() == ISD::BUILD_VECTOR - && "HI16_vec got something other than a BUILD_VECTOR"); - - // Get first constant operand... - for (unsigned i = 0, e = N->getNumOperands(); - OpVal.getNode() == 0 && i != e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) - OpVal = N->getOperand(i); - } - - assert(OpVal.getNode() != 0 && "HI16_vec did not locate a node"); - ConstantSDNode *CN = cast(OpVal); - return getI32Imm((unsigned)CN->getZExtValue() >> 16); -}]>; - -// simm7 predicate - True if the immediate fits in an 7-bit signed -// field. -def simm7: PatLeaf<(imm), [{ - int sextVal = int(N->getSExtValue()); - return (sextVal >= -64 && sextVal <= 63); -}]>; - -// uimm7 predicate - True if the immediate fits in an 7-bit unsigned -// field. -def uimm7: PatLeaf<(imm), [{ - return (N->getZExtValue() <= 0x7f); -}]>; - -// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended -// field. -def immSExt8 : PatLeaf<(imm), [{ - int Value = int(N->getSExtValue()); - return (Value >= -(1 << 8) && Value <= (1 << 8) - 1); -}]>; - -// immU8: immediate, unsigned 8-bit quantity -def immU8 : PatLeaf<(imm), [{ - return (N->getZExtValue() <= 0xff); -}]>; - -// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i32ImmSExt10 : PatLeaf<(imm), [{ - return isI32IntS10Immediate(N); -}]>; - -// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned -// field. Used by RI10Form instructions like 'ldq'. -def i32ImmUns10 : PatLeaf<(imm), [{ - return isI32IntU10Immediate(N); -}]>; - -// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i16ImmSExt10 : PatLeaf<(imm), [{ - return isI16IntS10Immediate(N); -}]>; - -// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned -// value. Used by RI10Form instructions. -def i16ImmUns10 : PatLeaf<(imm), [{ - return isI16IntU10Immediate(N); -}]>; - -def immSExt16 : PatLeaf<(imm), [{ - // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended - // field. - short Ignored; - return isIntS16Immediate(N, Ignored); -}]>; - -def immZExt16 : PatLeaf<(imm), [{ - // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended - // field. - return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); -}], LO16>; - -def immU16 : PatLeaf<(imm), [{ - // immU16 predicate- True if the immediate fits into a 16-bit unsigned field. - return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff); -}]>; - -def imm18 : PatLeaf<(imm), [{ - // imm18 predicate: True if the immediate fits into an 18-bit unsigned field. - int Value = (int) N->getZExtValue(); - return isUInt<18>(Value); -}]>; - -def lo16 : PatLeaf<(imm), [{ - // lo16 predicate - returns true if the immediate has all zeros in the - // low order bits and is a 32-bit constant: - if (N->getValueType(0) == MVT::i32) { - uint32_t val = N->getZExtValue(); - return ((val & 0x0000ffff) == val); - } - - return false; -}], LO16>; - -def hi16 : PatLeaf<(imm), [{ - // hi16 predicate - returns true if the immediate has all zeros in the - // low order bits and is a 32-bit constant: - if (N->getValueType(0) == MVT::i32) { - uint32_t val = uint32_t(N->getZExtValue()); - return ((val & 0xffff0000) == val); - } else if (N->getValueType(0) == MVT::i64) { - uint64_t val = N->getZExtValue(); - return ((val & 0xffff0000ULL) == val); - } - - return false; -}], HI16>; - -def bitshift : PatLeaf<(imm), [{ - // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII - // (shift left quadword by bits immediate) - int64_t Val = N->getZExtValue(); - return (Val > 0 && Val <= 7); -}]>; - -//===----------------------------------------------------------------------===// -// Floating point operands: -//===----------------------------------------------------------------------===// - -// Transform a float, returning the high 16 bits shifted down, as if -// the float was really an unsigned integer: -def HI16_f32 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) >> 16); -}]>; - -// Transformation function on floats: get the low 16 bits as if the float was -// an unsigned integer. -def LO16_f32 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) & 0xffff); -}]>; - -def FPimm_sext16 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16)); -}]>; - -def FPimm_u18 : SDNodeXFormgetValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1)); -}]>; - -def fpimmSExt16 : PatLeaf<(fpimm), [{ - short Ignored; - return isFPS16Immediate(N, Ignored); -}], FPimm_sext16>; - -// Does the SFP constant only have upp 16 bits set? -def hi16_f32 : PatLeaf<(fpimm), [{ - if (N->getValueType(0) == MVT::f32) { - uint32_t val = FloatToBits(N->getValueAPF().convertToFloat()); - return ((val & 0xffff0000) == val); - } - - return false; -}], HI16_f32>; - -// Does the SFP constant fit into 18 bits? -def fpimm18 : PatLeaf<(fpimm), [{ - if (N->getValueType(0) == MVT::f32) { - uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat()); - return isUInt<18>(Value); - } - - return false; -}], FPimm_u18>; - -//===----------------------------------------------------------------------===// -// 64-bit operands (TODO): -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// build_vector operands: -//===----------------------------------------------------------------------===// - -// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended -// immediate constant load for v16i8 vectors. N.B.: The incoming constant has -// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). -def v16i8SExt8Imm_xform: SDNodeXForm; - -// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant -// load, works in conjunction with its transform function. N.B.: This relies the -// incoming constant being a 16-bit quantity, where the upper and lower bytes -// are EXACTLY the same (e.g., 0x2a2a) -def v16i8SExt8Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; -}], v16i8SExt8Imm_xform>; - -// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit -// immediate constant load for v16i8 vectors. N.B.: The incoming constant has -// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). -def v16i8U8Imm_xform: SDNodeXForm; - -// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant -// load, works in conjunction with its transform function. N.B.: This relies the -// incoming constant being a 16-bit quantity, where the upper and lower bytes -// are EXACTLY the same (e.g., 0x2a2a) -def v16i8U8Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; -}], v16i8U8Imm_xform>; - -// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended -// immediate constant load for v8i16 vectors. -def v8i16SExt8Imm_xform: SDNodeXForm; - -// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v8i16SExt8Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16SExt8Imm_xform>; - -// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v8i16 vectors. -def v8i16SExt10Imm_xform: SDNodeXForm; - -// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v8i16SExt10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16SExt10Imm_xform>; - -// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned -// immediate constant load for v8i16 vectors. -def v8i16Uns10Imm_xform: SDNodeXForm; - -// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant -// load, works in conjunction with its transform function. -def v8i16Uns10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16Uns10Imm_xform>; - -// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v8i16 vectors. -def v8i16Uns16Imm_xform: SDNodeXForm; - -// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v8i16SExt16Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0; -}], v8i16Uns16Imm_xform>; - -// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended -// immediate constant load for v4i32 vectors. -def v4i32SExt10Imm_xform: SDNodeXForm; - -// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v4i32SExt10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32SExt10Imm_xform>; - -// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned -// immediate constant load for v4i32 vectors. -def v4i32Uns10Imm_xform: SDNodeXForm; - -// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant -// load, works in conjunction with its transform function. -def v4i32Uns10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32Uns10Imm_xform>; - -// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v4i32 vectors. -def v4i32SExt16Imm_xform: SDNodeXForm; - -// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v4i32SExt16Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32SExt16Imm_xform>; - -// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned -// immediate constant load for v4i32 vectors. -def v4i32Uns18Imm_xform: SDNodeXForm; - -// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, -// works in conjunction with its transform function. -def v4i32Uns18Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], v4i32Uns18Imm_xform>; - -// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant -// load. -def ILHUvec_get_imm: SDNodeXForm; - -/// immILHUvec: Predicate test for a ILHU constant vector. -def immILHUvec: PatLeaf<(build_vector), [{ - return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0; -}], ILHUvec_get_imm>; - -// Catch-all for any other i32 vector constants -def v4i32_get_imm: SDNodeXForm; - -def v4i32Imm: PatLeaf<(build_vector), [{ - return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0; -}], v4i32_get_imm>; - -// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended -// immediate constant load for v2i64 vectors. -def v2i64SExt10Imm_xform: SDNodeXForm; - -// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v2i64SExt10Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], v2i64SExt10Imm_xform>; - -// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended -// immediate constant load for v2i64 vectors. -def v2i64SExt16Imm_xform: SDNodeXForm; - -// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant -// load, works in conjunction with its transform function. -def v2i64SExt16Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], v2i64SExt16Imm_xform>; - -// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned -// immediate constant load for v2i64 vectors. -def v2i64Uns18Imm_xform: SDNodeXForm; - -// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, -// works in conjunction with its transform function. -def v2i64Uns18Imm: PatLeaf<(build_vector), [{ - return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], v2i64Uns18Imm_xform>; - -/// immILHUvec: Predicate test for a ILHU constant vector. -def immILHUvec_i64: PatLeaf<(build_vector), [{ - return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0; -}], ILHUvec_get_imm>; - -// Catch-all for any other i32 vector constants -def v2i64_get_imm: SDNodeXForm; - -def v2i64Imm: PatLeaf<(build_vector), [{ - return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0; -}], v2i64_get_imm>; - -//===----------------------------------------------------------------------===// -// Operand Definitions. - -def s7imm: Operand { - let PrintMethod = "printS7ImmOperand"; -} - -def s7imm_i8: Operand { - let PrintMethod = "printS7ImmOperand"; -} - -def u7imm: Operand { - let PrintMethod = "printU7ImmOperand"; -} - -def u7imm_i8: Operand { - let PrintMethod = "printU7ImmOperand"; -} - -def u7imm_i32: Operand { - let PrintMethod = "printU7ImmOperand"; -} - -// Halfword, signed 10-bit constant -def s10imm : Operand { - let PrintMethod = "printS10ImmOperand"; -} - -def s10imm_i8: Operand { - let PrintMethod = "printS10ImmOperand"; -} - -def s10imm_i32: Operand { - let PrintMethod = "printS10ImmOperand"; -} - -def s10imm_i64: Operand { - let PrintMethod = "printS10ImmOperand"; -} - -// Unsigned 10-bit integers: -def u10imm: Operand { - let PrintMethod = "printU10ImmOperand"; -} - -def u10imm_i8: Operand { - let PrintMethod = "printU10ImmOperand"; -} - -def u10imm_i32: Operand { - let PrintMethod = "printU10ImmOperand"; -} - -def s16imm : Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_i8: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_i32: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_i64: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_f32: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def s16imm_f64: Operand { - let PrintMethod = "printS16ImmOperand"; -} - -def u16imm_i64 : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def u16imm_i32 : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def u16imm : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def f16imm : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -def s18imm : Operand { - let PrintMethod = "printS18ImmOperand"; -} - -def u18imm : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -def u18imm_i64 : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -def f18imm : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -def f18imm_f64 : Operand { - let PrintMethod = "printU18ImmOperand"; -} - -// Negated 7-bit halfword rotate immediate operands -def rothNeg7imm : Operand { - let PrintMethod = "printROTHNeg7Imm"; -} - -def rothNeg7imm_i16 : Operand { - let PrintMethod = "printROTHNeg7Imm"; -} - -// Negated 7-bit word rotate immediate operands -def rotNeg7imm : Operand { - let PrintMethod = "printROTNeg7Imm"; -} - -def rotNeg7imm_i16 : Operand { - let PrintMethod = "printROTNeg7Imm"; -} - -def rotNeg7imm_i8 : Operand { - let PrintMethod = "printROTNeg7Imm"; -} - -def target : Operand { - let PrintMethod = "printBranchOperand"; -} - -// Absolute address call target -def calltarget : Operand { - let PrintMethod = "printCallOperand"; - let MIOperandInfo = (ops u18imm:$calldest); -} - -// PC relative call target -def relcalltarget : Operand { - let PrintMethod = "printPCRelativeOperand"; - let MIOperandInfo = (ops s16imm:$calldest); -} - -// Branch targets: -def brtarget : Operand { - let PrintMethod = "printPCRelativeOperand"; -} - -// Hint for branch target -def hbrtarget : Operand { - let PrintMethod = "printHBROperand"; -} - -// Indirect call target -def indcalltarget : Operand { - let PrintMethod = "printCallOperand"; - let MIOperandInfo = (ops ptr_rc:$calldest); -} - -def symbolHi: Operand { - let PrintMethod = "printSymbolHi"; -} - -def symbolLo: Operand { - let PrintMethod = "printSymbolLo"; -} - -def symbolLSA: Operand { - let PrintMethod = "printSymbolLSA"; -} - -// Shuffle address memory operaand [s7imm(reg) d-format] -def shufaddr : Operand { - let PrintMethod = "printShufAddr"; - let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg); -} - -// memory s10imm(reg) operand -def dformaddr : Operand { - let PrintMethod = "printDFormAddr"; - let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg); -} - -// 256K local store address -// N.B.: The tblgen code generator expects to have two operands, an offset -// and a pointer. Of these, only the immediate is actually used. -def addr256k : Operand { - let PrintMethod = "printAddr256K"; - let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg); -} - -// memory s18imm(reg) operand -def memri18 : Operand { - let PrintMethod = "printMemRegImmS18"; - let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg); -} - -// memory register + register operand -def memrr : Operand { - let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b); -} - -// Define SPU-specific addressing modes: These come in three basic -// flavors: -// -// D-form : [r+I10] (10-bit signed offset + reg) -// X-form : [r+r] (reg+reg) -// A-form : abs (256K LSA offset) -// D-form(2): [r+I7] (7-bit signed offset + reg) - -def dform_addr : ComplexPattern; -def xform_addr : ComplexPattern; -def aform_addr : ComplexPattern; -def dform2_addr : ComplexPattern; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp deleted file mode 100644 index e6c872d0bb..0000000000 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Cell implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "reginfo" -#include "SPURegisterInfo.h" -#include "SPU.h" -#include "SPUInstrBuilder.h" -#include "SPUSubtarget.h" -#include "SPUMachineFunction.h" -#include "SPUFrameLowering.h" -#include "llvm/Constants.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include - -#define GET_REGINFO_TARGET_DESC -#include "SPUGenRegisterInfo.inc" - -using namespace llvm; - -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { - using namespace SPU; - switch (RegEnum) { - case SPU::R0: return 0; - case SPU::R1: return 1; - case SPU::R2: return 2; - case SPU::R3: return 3; - case SPU::R4: return 4; - case SPU::R5: return 5; - case SPU::R6: return 6; - case SPU::R7: return 7; - case SPU::R8: return 8; - case SPU::R9: return 9; - case SPU::R10: return 10; - case SPU::R11: return 11; - case SPU::R12: return 12; - case SPU::R13: return 13; - case SPU::R14: return 14; - case SPU::R15: return 15; - case SPU::R16: return 16; - case SPU::R17: return 17; - case SPU::R18: return 18; - case SPU::R19: return 19; - case SPU::R20: return 20; - case SPU::R21: return 21; - case SPU::R22: return 22; - case SPU::R23: return 23; - case SPU::R24: return 24; - case SPU::R25: return 25; - case SPU::R26: return 26; - case SPU::R27: return 27; - case SPU::R28: return 28; - case SPU::R29: return 29; - case SPU::R30: return 30; - case SPU::R31: return 31; - case SPU::R32: return 32; - case SPU::R33: return 33; - case SPU::R34: return 34; - case SPU::R35: return 35; - case SPU::R36: return 36; - case SPU::R37: return 37; - case SPU::R38: return 38; - case SPU::R39: return 39; - case SPU::R40: return 40; - case SPU::R41: return 41; - case SPU::R42: return 42; - case SPU::R43: return 43; - case SPU::R44: return 44; - case SPU::R45: return 45; - case SPU::R46: return 46; - case SPU::R47: return 47; - case SPU::R48: return 48; - case SPU::R49: return 49; - case SPU::R50: return 50; - case SPU::R51: return 51; - case SPU::R52: return 52; - case SPU::R53: return 53; - case SPU::R54: return 54; - case SPU::R55: return 55; - case SPU::R56: return 56; - case SPU::R57: return 57; - case SPU::R58: return 58; - case SPU::R59: return 59; - case SPU::R60: return 60; - case SPU::R61: return 61; - case SPU::R62: return 62; - case SPU::R63: return 63; - case SPU::R64: return 64; - case SPU::R65: return 65; - case SPU::R66: return 66; - case SPU::R67: return 67; - case SPU::R68: return 68; - case SPU::R69: return 69; - case SPU::R70: return 70; - case SPU::R71: return 71; - case SPU::R72: return 72; - case SPU::R73: return 73; - case SPU::R74: return 74; - case SPU::R75: return 75; - case SPU::R76: return 76; - case SPU::R77: return 77; - case SPU::R78: return 78; - case SPU::R79: return 79; - case SPU::R80: return 80; - case SPU::R81: return 81; - case SPU::R82: return 82; - case SPU::R83: return 83; - case SPU::R84: return 84; - case SPU::R85: return 85; - case SPU::R86: return 86; - case SPU::R87: return 87; - case SPU::R88: return 88; - case SPU::R89: return 89; - case SPU::R90: return 90; - case SPU::R91: return 91; - case SPU::R92: return 92; - case SPU::R93: return 93; - case SPU::R94: return 94; - case SPU::R95: return 95; - case SPU::R96: return 96; - case SPU::R97: return 97; - case SPU::R98: return 98; - case SPU::R99: return 99; - case SPU::R100: return 100; - case SPU::R101: return 101; - case SPU::R102: return 102; - case SPU::R103: return 103; - case SPU::R104: return 104; - case SPU::R105: return 105; - case SPU::R106: return 106; - case SPU::R107: return 107; - case SPU::R108: return 108; - case SPU::R109: return 109; - case SPU::R110: return 110; - case SPU::R111: return 111; - case SPU::R112: return 112; - case SPU::R113: return 113; - case SPU::R114: return 114; - case SPU::R115: return 115; - case SPU::R116: return 116; - case SPU::R117: return 117; - case SPU::R118: return 118; - case SPU::R119: return 119; - case SPU::R120: return 120; - case SPU::R121: return 121; - case SPU::R122: return 122; - case SPU::R123: return 123; - case SPU::R124: return 124; - case SPU::R125: return 125; - case SPU::R126: return 126; - case SPU::R127: return 127; - default: - report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); - } -} - -SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, - const TargetInstrInfo &tii) : - SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii) -{ -} - -/// getPointerRegClass - Return the register class to use to hold pointers. -/// This is used for addressing modes. -const TargetRegisterClass * -SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { - return &SPU::R32CRegClass; -} - -const uint16_t * -SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const -{ - // Cell ABI calling convention - static const uint16_t SPU_CalleeSaveRegs[] = { - SPU::R80, SPU::R81, SPU::R82, SPU::R83, - SPU::R84, SPU::R85, SPU::R86, SPU::R87, - SPU::R88, SPU::R89, SPU::R90, SPU::R91, - SPU::R92, SPU::R93, SPU::R94, SPU::R95, - SPU::R96, SPU::R97, SPU::R98, SPU::R99, - SPU::R100, SPU::R101, SPU::R102, SPU::R103, - SPU::R104, SPU::R105, SPU::R106, SPU::R107, - SPU::R108, SPU::R109, SPU::R110, SPU::R111, - SPU::R112, SPU::R113, SPU::R114, SPU::R115, - SPU::R116, SPU::R117, SPU::R118, SPU::R119, - SPU::R120, SPU::R121, SPU::R122, SPU::R123, - SPU::R124, SPU::R125, SPU::R126, SPU::R127, - SPU::R2, /* environment pointer */ - SPU::R1, /* stack pointer */ - SPU::R0, /* link register */ - 0 /* end */ - }; - - return SPU_CalleeSaveRegs; -} - -/*! - R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is - generally unused) are the Cell's reserved registers - */ -BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - Reserved.set(SPU::R0); // LR - Reserved.set(SPU::R1); // SP - Reserved.set(SPU::R2); // environment pointer - return Reserved; -} - -//===----------------------------------------------------------------------===// -// Stack Frame Processing methods -//===----------------------------------------------------------------------===// - -//-------------------------------------------------------------------------- -void -SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) - const -{ - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - -void -SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const -{ - unsigned i = 0; - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - DebugLoc dl = II->getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - MachineOperand &SPOp = MI.getOperand(i); - int FrameIndex = SPOp.getIndex(); - - // Now add the frame object offset to the offset from r1. - int Offset = MFI->getObjectOffset(FrameIndex); - - // Most instructions, except for generated FrameIndex additions using AIr32 - // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the - // immediate in operand 2. - unsigned OpNo = 1; - if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) - OpNo = 2; - - MachineOperand &MO = MI.getOperand(OpNo); - - // Offset is biased by $lr's slot at the bottom. - Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize(); - assert((Offset & 0xf) == 0 - && "16-byte alignment violated in eliminateFrameIndex"); - - // Replace the FrameIndex with base register with $sp (aka $r1) - SPOp.ChangeToRegister(SPU::R1, false); - - // if 'Offset' doesn't fit to the D-form instruction's - // immediate, convert the instruction to X-form - // if the instruction is not an AI (which takes a s10 immediate), assume - // it is a load/store that can take a s14 immediate - if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset)) - || !isInt<14>(Offset)) { - int newOpcode = convertDFormToXForm(MI.getOpcode()); - unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj); - BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg ) - .addImm(Offset); - BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg()) - .addReg(tmpReg, RegState::Kill) - .addReg(SPU::R1); - // remove the replaced D-form instruction - MBB.erase(II); - } else { - MO.ChangeToImmediate(Offset); - } -} - -unsigned -SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const -{ - return SPU::R1; -} - -int -SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const -{ - switch(dFormOpcode) - { - case SPU::AIr32: return SPU::Ar32; - case SPU::LQDr32: return SPU::LQXr32; - case SPU::LQDr128: return SPU::LQXr128; - case SPU::LQDv16i8: return SPU::LQXv16i8; - case SPU::LQDv4i32: return SPU::LQXv4i32; - case SPU::LQDv4f32: return SPU::LQXv4f32; - case SPU::STQDr32: return SPU::STQXr32; - case SPU::STQDr128: return SPU::STQXr128; - case SPU::STQDv16i8: return SPU::STQXv16i8; - case SPU::STQDv4i32: return SPU::STQXv4i32; - case SPU::STQDv4f32: return SPU::STQXv4f32; - - default: assert( false && "Unhandled D to X-form conversion"); - } - // default will assert, but need to return something to keep the - // compiler happy. - return dFormOpcode; -} - -// TODO this is already copied from PPC. Could this convenience function -// be moved to the RegScavenger class? -unsigned -SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, - RegScavenger *RS, - const TargetRegisterClass *RC, - int SPAdj) const -{ - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - assert( Reg && "Register scavenger failed"); - return Reg; -} diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h deleted file mode 100644 index e9f9aba63a..0000000000 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ /dev/null @@ -1,106 +0,0 @@ -//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Cell SPU implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_REGISTERINFO_H -#define SPU_REGISTERINFO_H - -#include "SPU.h" - -#define GET_REGINFO_HEADER -#include "SPUGenRegisterInfo.inc" - -namespace llvm { - class SPUSubtarget; - class TargetInstrInfo; - class Type; - - class SPURegisterInfo : public SPUGenRegisterInfo { - private: - const SPUSubtarget &Subtarget; - const TargetInstrInfo &TII; - - //! Predicate: Does the machine function use the link register? - bool usesLR(MachineFunction &MF) const; - - public: - SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii); - - //! Translate a register's enum value to a register number - /*! - This method translates a register's enum value to it's regiser number, - e.g. SPU::R14 -> 14. - */ - static unsigned getRegisterNumbering(unsigned RegEnum); - - /// getPointerRegClass - Return the register class to use to hold pointers. - /// This is used for addressing modes. - virtual const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; - - /// After allocating this many registers, the allocator should feel - /// register pressure. The value is a somewhat random guess, based on the - /// number of non callee saved registers in the C calling convention. - virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, - MachineFunction &MF) const{ - return 50; - } - - //! Return the array of callee-saved registers - virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; - - //! Allow for scavenging, so we can get scratch registers when needed. - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const - { return true; } - - //! Enable tracking of liveness after register allocation, since register - // scavenging is enabled. - virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const - { return true; } - - //! Return the reserved registers - BitVector getReservedRegs(const MachineFunction &MF) const; - - //! Eliminate the call frame setup pseudo-instructions - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - //! Convert frame indicies into machine operands - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS = NULL) const; - - //! Get the stack frame register (SP, aka R1) - unsigned getFrameRegister(const MachineFunction &MF) const; - - //------------------------------------------------------------------------ - // New methods added: - //------------------------------------------------------------------------ - - //! Convert D-form load/store to X-form load/store - /*! - Converts a regiser displacement load/store into a register-indexed - load/store for large stack frames, when the stack frame exceeds the - range of a s10 displacement. - */ - int convertDFormToXForm(int dFormOpcode) const; - - //! Acquire an unused register in an emergency. - unsigned findScratchRegister(MachineBasicBlock::iterator II, - RegScavenger *RS, - const TargetRegisterClass *RC, - int SPAdj) const; - - }; -} // end namespace llvm - -#endif diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td deleted file mode 100644 index f27b042edd..0000000000 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ /dev/null @@ -1,183 +0,0 @@ -//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -class SPUReg : Register { - let Namespace = "SPU"; -} - -// The SPU's register are all 128-bits wide, which makes specifying the -// registers relatively easy, if relatively mundane: - -class SPUVecReg num, string n> : SPUReg { - field bits<7> Num = num; -} - -def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>; -def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>; -def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>; -def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>; -def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>; -def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>; -def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>; -def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>; -def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>; -def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>; -def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>; -def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>; -def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>; -def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>; -def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>; -def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>; -def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>; -def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>; -def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>; -def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>; -def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>; -def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>; -def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>; -def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>; -def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>; -def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>; -def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>; -def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>; -def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>; -def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>; -def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>; -def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>; -def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>; -def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>; -def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>; -def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>; -def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>; -def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>; -def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>; -def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>; -def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>; -def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>; -def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>; -def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>; -def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>; -def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>; -def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>; -def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>; -def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>; -def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>; -def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>; -def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>; -def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>; -def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>; -def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>; -def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>; -def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>; -def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>; -def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>; -def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>; -def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>; -def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>; -def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>; -def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>; -def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>; -def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>; -def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>; -def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>; -def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>; -def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>; -def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>; -def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>; -def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>; -def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>; -def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>; -def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>; -def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>; -def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>; -def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>; -def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>; -def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>; -def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>; -def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>; -def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>; -def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>; -def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>; -def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>; -def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>; -def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>; -def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>; -def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>; -def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>; -def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>; -def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>; -def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>; -def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>; -def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>; -def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>; -def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>; -def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>; -def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>; -def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>; -def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>; -def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>; -def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>; -def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>; -def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>; -def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>; -def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>; -def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>; -def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>; -def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>; -def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>; -def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>; -def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>; -def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>; -def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>; -def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>; -def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>; -def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>; -def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>; -def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>; -def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>; -def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>; -def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>; -def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>; -def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>; -def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>; - -/* Need floating point status register here: */ -/* def FPCSR : ... */ - -// The SPU's registers as 128-bit wide entities, and can function as general -// purpose registers, where the operands are in the "preferred slot": -// The non-volatile registers are allocated in reverse order, like PPC does it. -def GPRC : RegisterClass<"SPU", [i128], 128, - (add (sequence "R%u", 0, 79), - (sequence "R%u", 127, 80))>; - -// The SPU's registers as 64-bit wide (double word integer) "preferred slot": -def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>; - -// The SPU's registers as 64-bit wide (double word) FP "preferred slot": -def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>; - -// The SPU's registers as 32-bit wide (word) "preferred slot": -def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>; - -// The SPU's registers as single precision floating point "preferred slot": -def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>; - -// The SPU's registers as 16-bit wide (halfword) "preferred slot": -def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>; - -// The SPU's registers as 8-bit wide (byte) "preferred slot": -def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>; - -// The SPU's registers as vector registers: -def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, - (add GPRC)>; diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h deleted file mode 100644 index e557ed340a..0000000000 --- a/lib/Target/CellSPU/SPURegisterNames.h +++ /dev/null @@ -1,19 +0,0 @@ -//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_REGISTER_NAMES_H -#define SPU_REGISTER_NAMES_H - -// Define symbolic names for Cell registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "SPUGenRegisterInfo.inc" - -#endif diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td deleted file mode 100644 index 9ccd0844e4..0000000000 --- a/lib/Target/CellSPU/SPUSchedule.td +++ /dev/null @@ -1,59 +0,0 @@ -//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Even pipeline: - -def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000) -def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100) - -//===----------------------------------------------------------------------===// -// Instruction Itinerary classes used for Cell SPU -//===----------------------------------------------------------------------===// - -def LoadStore : InstrItinClass; // ODD_UNIT -def BranchHints : InstrItinClass; // ODD_UNIT -def BranchResolv : InstrItinClass; // ODD_UNIT -def ChanOpSPR : InstrItinClass; // ODD_UNIT -def ShuffleOp : InstrItinClass; // ODD_UNIT -def SelectOp : InstrItinClass; // ODD_UNIT -def GatherOp : InstrItinClass; // ODD_UNIT -def LoadNOP : InstrItinClass; // ODD_UNIT -def ExecNOP : InstrItinClass; // EVEN_UNIT -def SPrecFP : InstrItinClass; // EVEN_UNIT -def DPrecFP : InstrItinClass; // EVEN_UNIT -def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer) -def ByteOp : InstrItinClass; // EVEN_UNIT -def IntegerOp : InstrItinClass; // EVEN_UNIT -def IntegerMulDiv: InstrItinClass; // EVEN_UNIT -def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector -def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad -def ImmLoad : InstrItinClass; // EVEN_UNIT - -/* Note: The itinerary for the Cell SPU is somewhat contrived... */ -def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> - ]>; diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp deleted file mode 100644 index 5732fd43cd..0000000000 --- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SPUSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "cellspu-selectiondag-info" -#include "SPUTargetMachine.h" -using namespace llvm; - -SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} - -SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { -} diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h deleted file mode 100644 index 39257d92c4..0000000000 --- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the CellSPU subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef CELLSPUSELECTIONDAGINFO_H -#define CELLSPUSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class SPUTargetMachine; - -class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM); - ~SPUSelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp deleted file mode 100644 index eec2d250be..0000000000 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CellSPU-specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "SPUSubtarget.h" -#include "SPU.h" -#include "SPURegisterInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "SPUGenSubtargetInfo.inc" - -using namespace llvm; - -SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) : - SPUGenSubtargetInfo(TT, CPU, FS), - StackAlignment(16), - ProcDirective(SPU::DEFAULT_PROC), - UseLargeMem(false) -{ - // Should be the target SPU processor type. For now, since there's only - // one, simply default to the current "v0" default: - std::string default_cpu("v0"); - - // Parse features string. - ParseSubtargetFeatures(default_cpu, FS); - - // Initialize scheduling itinerary for the specified CPU. - InstrItins = getInstrItineraryForCPU(default_cpu); -} - -/// SetJITMode - This is called to inform the subtarget info that we are -/// producing code for the JIT. -void SPUSubtarget::SetJITMode() { -} - -/// Enable PostRA scheduling for optimization levels -O2 and -O3. -bool SPUSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - // CriticalPathsRCs seems to be the set of - // RegisterClasses that antidep breakings are performed for. - // Do it for all register classes - CriticalPathRCs.clear(); - CriticalPathRCs.push_back(&SPU::R8CRegClass); - CriticalPathRCs.push_back(&SPU::R16CRegClass); - CriticalPathRCs.push_back(&SPU::R32CRegClass); - CriticalPathRCs.push_back(&SPU::R32FPRegClass); - CriticalPathRCs.push_back(&SPU::R64CRegClass); - CriticalPathRCs.push_back(&SPU::VECREGRegClass); - return OptLevel >= CodeGenOpt::Default; -} diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h deleted file mode 100644 index 27d28b22dd..0000000000 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ /dev/null @@ -1,97 +0,0 @@ -//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef CELLSUBTARGET_H -#define CELLSUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "SPUGenSubtargetInfo.inc" - -namespace llvm { - class GlobalValue; - class StringRef; - - namespace SPU { - enum { - PROC_NONE, - DEFAULT_PROC - }; - } - - class SPUSubtarget : public SPUGenSubtargetInfo { - protected: - /// stackAlignment - The minimum alignment known to hold of the stack frame - /// on entry to the function and which must be maintained by every function. - unsigned StackAlignment; - - /// Selected instruction itineraries (one entry per itinerary class.) - InstrItineraryData InstrItins; - - /// Which SPU processor (this isn't really used, but it's there to keep - /// the C compiler happy) - unsigned ProcDirective; - - /// Use (assume) large memory -- effectively disables the LQA/STQA - /// instructions that assume 259K local store. - bool UseLargeMem; - - public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - SPUSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - /// SetJITMode - This is called to inform the subtarget info that we are - /// producing code for the JIT. - void SetJITMode(); - - /// getStackAlignment - Returns the minimum alignment known to hold of the - /// stack frame on entry to the function and which must be maintained by - /// every function for this subtarget. - unsigned getStackAlignment() const { return StackAlignment; } - - /// getInstrItins - Return the instruction itineraies based on subtarget - /// selection. - const InstrItineraryData &getInstrItineraryData() const { - return InstrItins; - } - - /// Use large memory addressing predicate - bool usingLargeMem() const { - return UseLargeMem; - } - - /// getDataLayoutString - Return the pointer size and type alignment - /// properties of this subtarget. - const char *getDataLayoutString() const { - return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" - "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128" - "-s:128:128-n32:64"; - } - - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; - }; -} // End llvm namespace - -#endif diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp deleted file mode 100644 index 918316572a..0000000000 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the Cell SPU target. -// -//===----------------------------------------------------------------------===// - -#include "SPUTargetMachine.h" -#include "SPU.h" -#include "llvm/PassManager.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -extern "C" void LLVMInitializeCellSPUTarget() { - // Register the target. - RegisterTargetMachine X(TheCellSPUTarget); -} - -const std::pair * -SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { - NumEntries = 1; - return &LR[0]; -} - -SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - DL(Subtarget.getDataLayoutString()), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), - TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { -} - -//===----------------------------------------------------------------------===// -// Pass Pipeline Configuration -//===----------------------------------------------------------------------===// - -namespace { -/// SPU Code Generator Pass Configuration Options. -class SPUPassConfig : public TargetPassConfig { -public: - SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - SPUTargetMachine &getSPUTargetMachine() const { - return getTM(); - } - - virtual bool addInstSelector(); - virtual bool addPreEmitPass(); -}; -} // namespace - -TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SPUPassConfig(this, PM); -} - -bool SPUPassConfig::addInstSelector() { - // Install an instruction selector. - addPass(createSPUISelDag(getSPUTargetMachine())); - return false; -} - -// passes to run just before printing the assembly -bool SPUPassConfig::addPreEmitPass() { - // load the TCE instruction scheduler, if available via - // loaded plugins - typedef llvm::FunctionPass* (*BuilderFunc)(const char*); - BuilderFunc schedulerCreator = - (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( - "createTCESchedulerPass"); - if (schedulerCreator != NULL) - addPass(schedulerCreator("cellspu")); - - //align instructions with nops/lnops for dual issue - addPass(createSPUNopFillerPass(getSPUTargetMachine())); - return true; -} diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h deleted file mode 100644 index 7f53ea6fbe..0000000000 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ /dev/null @@ -1,96 +0,0 @@ -//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the CellSPU-specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef SPU_TARGETMACHINE_H -#define SPU_TARGETMACHINE_H - -#include "SPUSubtarget.h" -#include "SPUInstrInfo.h" -#include "SPUISelLowering.h" -#include "SPUSelectionDAGInfo.h" -#include "SPUFrameLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/DataLayout.h" - -namespace llvm { - -/// SPUTargetMachine -/// -class SPUTargetMachine : public LLVMTargetMachine { - SPUSubtarget Subtarget; - const DataLayout DL; - SPUInstrInfo InstrInfo; - SPUFrameLowering FrameLowering; - SPUTargetLowering TLInfo; - SPUSelectionDAGInfo TSInfo; - InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; -public: - SPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); - - /// Return the subtarget implementation object - virtual const SPUSubtarget *getSubtargetImpl() const { - return &Subtarget; - } - virtual const SPUInstrInfo *getInstrInfo() const { - return &InstrInfo; - } - virtual const SPUFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - /*! - \note Cell SPU does not support JIT today. It could support JIT at some - point. - */ - virtual TargetJITInfo *getJITInfo() { - return NULL; - } - - virtual const SPUTargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - virtual const SPURegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - - virtual const DataLayout *getDataLayout() const { - return &DL; - } - - virtual const InstrItineraryData *getInstrItineraryData() const { - return &InstrItins; - } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt deleted file mode 100644 index 6a98f95db6..0000000000 --- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCellSPUInfo - CellSPUTargetInfo.cpp - ) - -add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen) diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp deleted file mode 100644 index 84aadfad6f..0000000000 --- a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheCellSPUTarget; - -extern "C" void LLVMInitializeCellSPUTargetInfo() { - RegisterTarget - X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]"); -} diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 6937e705ff..0000000000 --- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = CellSPUInfo -parent = CellSPU -required_libraries = MC Support Target -add_to_library_groups = CellSPU diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/CellSPU/TargetInfo/Makefile deleted file mode 100644 index 9cb6827b43..0000000000 --- a/lib/Target/CellSPU/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 8995080974..eb6c779f45 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore +subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the -- cgit v1.2.3