summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-23 23:14:44 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-23 23:14:44 +0000
commitc75c5fa12582956fc6b7d7d756b2bdd49fa61f71 (patch)
tree7e65cf89ff161af91b980dc0f9c823caa74df7e9 /lib
parenta271174771adce635d29219225843ddb3fb17770 (diff)
downloadllvm-c75c5fa12582956fc6b7d7d756b2bdd49fa61f71.tar.gz
llvm-c75c5fa12582956fc6b7d7d756b2bdd49fa61f71.tar.bz2
llvm-c75c5fa12582956fc6b7d7d756b2bdd49fa61f71.tar.xz
Add a late SSEDomainFix pass that twiddles SSE instructions to avoid domain crossings.
This is work in progress. So far, SSE execution domain tables are added to X86InstrInfo, and a skeleton pass is enabled with -sse-domain-fix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99345 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp536
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp3
-rw-r--r--lib/Target/X86/X86InstrInfo.h14
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp15
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
7 files changed, 574 insertions, 0 deletions
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4d3dedf0e5..22285f1932 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ SSEDomainFix.cpp
X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 0000000000..419c675190
--- /dev/null
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,536 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class SSEDomainFixPass : public MachineFunctionPass {
+ static char ID;
+ const X86InstrInfo *TII;
+
+ MachineFunction *MF;
+ MachineBasicBlock *MBB;
+public:
+ SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "SSE execution domain fixup";
+ }
+
+private:
+ void enterBasicBlock(MachineBasicBlock *MBB);
+};
+}
+
+void SSEDomainFixPass::enterBasicBlock(MachineBasicBlock *mbb) {
+ MBB = mbb;
+ DEBUG(dbgs() << "Entering MBB " << MBB->getName() << "\n");
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*, 16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*,
+ SmallPtrSet<MachineBasicBlock*, 16> >
+ DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+ DFI != DFE; ++DFI) {
+ enterBasicBlock(*DFI);
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *MI = I;
+ const unsigned *equiv = 0;
+ X86InstrInfo::SSEDomain domain = TII->GetSSEDomain(MI, equiv);
+ DEBUG(dbgs() << "isd-"[domain] << (equiv ? "* " : " ") << *MI);
+ }
+ }
+ return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+ return new SSEDomainFixPass();
+}
+
+// These are the replaceable instructions. Some of these have _Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVDQAmr, X86::MOVAPSmr, X86::MOVAPDmr },
+ { X86::MOVDQArm, X86::MOVAPSrm, X86::MOVAPDrm },
+ { X86::MOVDQArr, X86::MOVAPSrr, X86::MOVAPDrr },
+ { X86::MOVDQUmr, X86::MOVUPSmr, X86::MOVUPDmr },
+ { X86::MOVDQUrm, X86::MOVUPSrm, X86::MOVUPDrm },
+ { X86::MOVNTDQmr, X86::MOVNTPSmr, X86::MOVNTPDmr },
+ { X86::PANDNrm, X86::ANDNPSrm, X86::ANDNPDrm },
+ { X86::PANDNrr, X86::ANDNPSrr, X86::ANDNPDrr },
+ { X86::PANDrm, X86::ANDPSrm, X86::ANDPDrm },
+ { X86::PANDrr, X86::ANDPSrr, X86::ANDPDrr },
+ { X86::PORrm, X86::ORPSrm, X86::ORPDrm },
+ { X86::PORrr, X86::ORPSrr, X86::ORPDrr },
+ { X86::PUNPCKHQDQrm, X86::UNPCKHPSrm, X86::UNPCKHPDrm },
+ { X86::PUNPCKHQDQrr, X86::UNPCKHPSrr, X86::UNPCKHPDrr },
+ { X86::PUNPCKLQDQrm, X86::UNPCKLPSrm, X86::UNPCKLPDrm },
+ { X86::PUNPCKLQDQrr, X86::UNPCKLPSrr, X86::UNPCKLPDrr },
+ { X86::PXORrm, X86::XORPSrm, X86::XORPDrm },
+ { X86::PXORrr, X86::XORPSrr, X86::XORPDrr },
+};
+
+void X86InstrInfo::populateSSEInstrDomainTable() {
+ // Instructions that execute in the packed integer domain.
+ static const unsigned PackedIntInstrs[] = {
+ X86::LDDQUrm,
+ X86::MASKMOVDQU,
+ X86::MASKMOVDQU64,
+ X86::MOVDI2PDIrm,
+ X86::MOVDI2PDIrr,
+ X86::MOVDQUmr_Int,
+ X86::MOVDQUrm_Int,
+ X86::MOVLQ128mr,
+ X86::MOVNTDQArm,
+ X86::MOVNTDQmr_Int,
+ X86::MOVNTDQ_64mr,
+ X86::MOVPDI2DImr,
+ X86::MOVPDI2DIrr,
+ X86::MOVPQI2QImr,
+ X86::MOVPQIto64rr,
+ X86::MOVQI2PQIrm,
+ X86::MOVQxrxr,
+ X86::MOVZDI2PDIrm,
+ X86::MOVZDI2PDIrr,
+ X86::MOVZPQILo2PQIrm,
+ X86::MOVZPQILo2PQIrr,
+ X86::MOVZQI2PQIrm,
+ X86::MOVZQI2PQIrr,
+ X86::MPSADBWrmi,
+ X86::MPSADBWrri,
+ X86::PABSBrm128,
+ X86::PABSBrr128,
+ X86::PABSDrm128,
+ X86::PABSDrr128,
+ X86::PABSWrm128,
+ X86::PABSWrr128,
+ X86::PACKSSDWrm,
+ X86::PACKSSDWrr,
+ X86::PACKSSWBrm,
+ X86::PACKSSWBrr,
+ X86::PACKUSDWrm,
+ X86::PACKUSDWrr,
+ X86::PACKUSWBrm,
+ X86::PACKUSWBrr,
+ X86::PADDBrm,
+ X86::PADDBrr,
+ X86::PADDDrm,
+ X86::PADDDrr,
+ X86::PADDQrm,
+ X86::PADDQrr,
+ X86::PADDSBrm,
+ X86::PADDSBrr,
+ X86::PADDSWrm,
+ X86::PADDSWrr,
+ X86::PADDUSBrm,
+ X86::PADDUSBrr,
+ X86::PADDUSWrm,
+ X86::PADDUSWrr,
+ X86::PADDWrm,
+ X86::PADDWrr,
+ X86::PALIGNR128rm,
+ X86::PALIGNR128rr,
+ X86::PAVGBrm,
+ X86::PAVGBrr,
+ X86::PAVGWrm,
+ X86::PAVGWrr,
+ X86::PBLENDVBrm0,
+ X86::PBLENDVBrr0,
+ X86::PBLENDWrmi,
+ X86::PBLENDWrri,
+ X86::PCMPEQBrm,
+ X86::PCMPEQBrr,
+ X86::PCMPEQDrm,
+ X86::PCMPEQDrr,
+ X86::PCMPEQQrm,
+ X86::PCMPEQQrr,
+ X86::PCMPEQWrm,
+ X86::PCMPEQWrr,
+ X86::PCMPESTRIArm,
+ X86::PCMPESTRIArr,
+ X86::PCMPESTRICrm,
+ X86::PCMPESTRICrr,
+ X86::PCMPESTRIOrm,
+ X86::PCMPESTRIOrr,
+ X86::PCMPESTRIrm,
+ X86::PCMPESTRIrr,
+ X86::PCMPESTRISrm,
+ X86::PCMPESTRISrr,
+ X86::PCMPESTRIZrm,
+ X86::PCMPESTRIZrr,
+ X86::PCMPESTRM128MEM,
+ X86::PCMPESTRM128REG,
+ X86::PCMPESTRM128rm,
+ X86::PCMPESTRM128rr,
+ X86::PCMPGTBrm,
+ X86::PCMPGTBrr,
+ X86::PCMPGTDrm,
+ X86::PCMPGTDrr,
+ X86::PCMPGTQrm,
+ X86::PCMPGTQrr,
+ X86::PCMPGTWrm,
+ X86::PCMPGTWrr,
+ X86::PCMPISTRIArm,
+ X86::PCMPISTRIArr,
+ X86::PCMPISTRICrm,
+ X86::PCMPISTRICrr,
+ X86::PCMPISTRIOrm,
+ X86::PCMPISTRIOrr,
+ X86::PCMPISTRIrm,
+ X86::PCMPISTRIrr,
+ X86::PCMPISTRISrm,
+ X86::PCMPISTRISrr,
+ X86::PCMPISTRIZrm,
+ X86::PCMPISTRIZrr,
+ X86::PCMPISTRM128MEM,
+ X86::PCMPISTRM128REG,
+ X86::PCMPISTRM128rm,
+ X86::PCMPISTRM128rr,
+ X86::PEXTRBmr,
+ X86::PEXTRBrr,
+ X86::PEXTRDmr,
+ X86::PEXTRDrr,
+ X86::PEXTRQmr,
+ X86::PEXTRQrr,
+ X86::PEXTRWmr,
+ X86::PEXTRWri,
+ X86::PHADDDrm128,
+ X86::PHADDDrr128,
+ X86::PHADDSWrm128,
+ X86::PHADDSWrr128,
+ X86::PHADDWrm128,
+ X86::PHADDWrr128,
+ X86::PHMINPOSUWrm128,
+ X86::PHMINPOSUWrr128,
+ X86::PHSUBDrm128,
+ X86::PHSUBDrr128,
+ X86::PHSUBSWrm128,
+ X86::PHSUBSWrr128,
+ X86::PHSUBWrm128,
+ X86::PHSUBWrr128,
+ X86::PINSRBrm,
+ X86::PINSRBrr,
+ X86::PINSRDrm,
+ X86::PINSRDrr,
+ X86::PINSRQrm,
+ X86::PINSRQrr,
+ X86::PINSRWrmi,
+ X86::PINSRWrri,
+ X86::PMADDUBSWrm128,
+ X86::PMADDUBSWrr128,
+ X86::PMADDWDrm,
+ X86::PMADDWDrr,
+ X86::PMAXSBrm,
+ X86::PMAXSBrr,
+ X86::PMAXSDrm,
+ X86::PMAXSDrr,
+ X86::PMAXSWrm,
+ X86::PMAXSWrr,
+ X86::PMAXUBrm,
+ X86::PMAXUBrr,
+ X86::PMAXUDrm,
+ X86::PMAXUDrr,
+ X86::PMAXUWrm,
+ X86::PMAXUWrr,
+ X86::PMINSBrm,
+ X86::PMINSBrr,
+ X86::PMINSDrm,
+ X86::PMINSDrr,
+ X86::PMINSWrm,
+ X86::PMINSWrr,
+ X86::PMINUBrm,
+ X86::PMINUBrr,
+ X86::PMINUDrm,
+ X86::PMINUDrr,
+ X86::PMINUWrm,
+ X86::PMINUWrr,
+ X86::PMOVSXBDrm,
+ X86::PMOVSXBDrr,
+ X86::PMOVSXBQrm,
+ X86::PMOVSXBQrr,
+ X86::PMOVSXBWrm,
+ X86::PMOVSXBWrr,
+ X86::PMOVSXDQrm,
+ X86::PMOVSXDQrr,
+ X86::PMOVSXWDrm,
+ X86::PMOVSXWDrr,
+ X86::PMOVSXWQrm,
+ X86::PMOVSXWQrr,
+ X86::PMOVZXBDrm,
+ X86::PMOVZXBDrr,
+ X86::PMOVZXBQrm,
+ X86::PMOVZXBQrr,
+ X86::PMOVZXBWrm,
+ X86::PMOVZXBWrr,
+ X86::PMOVZXDQrm,
+ X86::PMOVZXDQrr,
+ X86::PMOVZXWDrm,
+ X86::PMOVZXWDrr,
+ X86::PMOVZXWQrm,
+ X86::PMOVZXWQrr,
+ X86::PMULDQrm,
+ X86::PMULDQrr,
+ X86::PMULHRSWrm128,
+ X86::PMULHRSWrr128,
+ X86::PMULHUWrm,
+ X86::PMULHUWrr,
+ X86::PMULHWrm,
+ X86::PMULHWrr,
+ X86::PMULLDrm,
+ X86::PMULLDrm_int,
+ X86::PMULLDrr,
+ X86::PMULLDrr_int,
+ X86::PMULLWrm,
+ X86::PMULLWrr,
+ X86::PMULUDQrm,
+ X86::PMULUDQrr,
+ X86::PSADBWrm,
+ X86::PSADBWrr,
+ X86::PSHUFBrm128,
+ X86::PSHUFBrr128,
+ X86::PSHUFHWmi,
+ X86::PSHUFHWri,
+ X86::PSHUFLWmi,
+ X86::PSHUFLWri,
+ X86::PSIGNBrm128,
+ X86::PSIGNBrr128,
+ X86::PSIGNDrm128,
+ X86::PSIGNDrr128,
+ X86::PSIGNWrm128,
+ X86::PSIGNWrr128,
+ X86::PSLLDQri,
+ X86::PSLLDri,
+ X86::PSLLDrm,
+ X86::PSLLDrr,
+ X86::PSLLQri,
+ X86::PSLLQrm,
+ X86::PSLLQrr,
+ X86::PSLLWri,
+ X86::PSLLWrm,
+ X86::PSLLWrr,
+ X86::PSRADri,
+ X86::PSRADrm,
+ X86::PSRADrr,
+ X86::PSRAWri,
+ X86::PSRAWrm,
+ X86::PSRAWrr,
+ X86::PSRLDQri,
+ X86::PSRLDri,
+ X86::PSRLDrm,
+ X86::PSRLDrr,
+ X86::PSRLQri,
+ X86::PSRLQrm,
+ X86::PSRLQrr,
+ X86::PSRLWri,
+ X86::PSRLWrm,
+ X86::PSRLWrr,
+ X86::PSUBBrm,
+ X86::PSUBBrr,
+ X86::PSUBDrm,
+ X86::PSUBDrr,
+ X86::PSUBQrm,
+ X86::PSUBQrr,
+ X86::PSUBSBrm,
+ X86::PSUBSBrr,
+ X86::PSUBSWrm,
+ X86::PSUBSWrr,
+ X86::PSUBUSBrm,
+ X86::PSUBUSBrr,
+ X86::PSUBUSWrm,
+ X86::PSUBUSWrr,
+ X86::PSUBWrm,
+ X86::PSUBWrr,
+ X86::PUNPCKHBWrm,
+ X86::PUNPCKHBWrr,
+ X86::PUNPCKHWDrm,
+ X86::PUNPCKHWDrr,
+ X86::PUNPCKLBWrm,
+ X86::PUNPCKLBWrr,
+ X86::PUNPCKLWDrm,
+ X86::PUNPCKLWDrr,
+ };
+
+ // Instructions that execute in the packed single domain.
+ static const unsigned PackedSingleInstrs[] = {
+ X86::ADDPSrm,
+ X86::ADDPSrr,
+ X86::ADDSUBPSrm,
+ X86::ADDSUBPSrr,
+ X86::BLENDPSrmi,
+ X86::BLENDPSrri,
+ X86::BLENDVPSrm0,
+ X86::BLENDVPSrr0,
+ X86::CMPPSrmi,
+ X86::CMPPSrri,
+ X86::DIVPSrm,
+ X86::DIVPSrr,
+ X86::DPPSrmi,
+ X86::DPPSrri,
+ X86::EXTRACTPSmr,
+ X86::EXTRACTPSrr,
+ X86::HADDPSrm,
+ X86::HADDPSrr,
+ X86::HSUBPSrm,
+ X86::HSUBPSrr,
+ X86::INSERTPSrm,
+ X86::INSERTPSrr,
+ X86::MAXPSrm,
+ X86::MAXPSrm_Int,
+ X86::MAXPSrr,
+ X86::MAXPSrr_Int,
+ X86::MINPSrm,
+ X86::MINPSrm_Int,
+ X86::MINPSrr,
+ X86::MINPSrr_Int,
+ X86::MOVHLPSrr,
+ X86::MOVHPSmr,
+ X86::MOVHPSrm,
+ X86::MOVLHPSrr,
+ X86::MOVLPSmr,
+ X86::MOVLPSrm,
+ X86::MOVMSKPSrr,
+ X86::MOVNTPSmr_Int,
+ X86::MOVSHDUPrm,
+ X86::MOVSHDUPrr,
+ X86::MOVSLDUPrm,
+ X86::MOVSLDUPrr,
+ X86::MOVUPSmr_Int,
+ X86::MOVUPSrm_Int,
+ X86::MULPSrm,
+ X86::MULPSrr,
+ X86::RCPPSm,
+ X86::RCPPSm_Int,
+ X86::RCPPSr,
+ X86::RCPPSr_Int,
+ X86::ROUNDPSm_Int,
+ X86::ROUNDPSr_Int,
+ X86::RSQRTPSm,
+ X86::RSQRTPSm_Int,
+ X86::RSQRTPSr,
+ X86::RSQRTPSr_Int,
+ X86::SQRTPSm,
+ X86::SQRTPSm_Int,
+ X86::SQRTPSr,
+ X86::SQRTPSr_Int,
+ X86::SUBPSrm,
+ X86::SUBPSrr,
+ };
+
+ // Instructions that execute in the packed double domain.
+ static const unsigned PackedDoubleInstrs[] = {
+ X86::ADDPDrm,
+ X86::ADDPDrr,
+ X86::ADDSUBPDrm,
+ X86::ADDSUBPDrr,
+ X86::BLENDPDrmi,
+ X86::BLENDPDrri,
+ X86::BLENDVPDrm0,
+ X86::BLENDVPDrr0,
+ X86::CMPPDrmi,
+ X86::CMPPDrri,
+ X86::DIVPDrm,
+ X86::DIVPDrr,
+ X86::DPPDrmi,
+ X86::DPPDrri,
+ X86::HADDPDrm,
+ X86::HADDPDrr,
+ X86::HSUBPDrm,
+ X86::HSUBPDrr,
+ X86::MAXPDrm,
+ X86::MAXPDrm_Int,
+ X86::MAXPDrr,
+ X86::MAXPDrr_Int,
+ X86::MINPDrm,
+ X86::MINPDrm_Int,
+ X86::MINPDrr,
+ X86::MINPDrr_Int,
+ X86::MOVHPDmr,
+ X86::MOVHPDrm,
+ X86::MOVLPDmr,
+ X86::MOVLPDrm,
+ X86::MOVMSKPDrr,
+ X86::MOVNTPDmr_Int,
+ X86::MOVUPDmr_Int,
+ X86::MOVUPDrm_Int,
+ X86::MULPDrm,
+ X86::MULPDrr,
+ X86::ROUNDPDm_Int,
+ X86::ROUNDPDr_Int,
+ X86::SQRTPDm,
+ X86::SQRTPDm_Int,
+ X86::SQRTPDr,
+ X86::SQRTPDr_Int,
+ X86::SUBPDrm,
+ X86::SUBPDrr,
+ };
+
+ // Add non-negative entries for forcing instructions.
+ for (unsigned i = 0, e = array_lengthof(PackedIntInstrs); i != e; ++i)
+ SSEInstrDomainTable.insert(std::make_pair(PackedIntInstrs[i],
+ PackedInt));
+ for (unsigned i = 0, e = array_lengthof(PackedSingleInstrs); i != e; ++i)
+ SSEInstrDomainTable.insert(std::make_pair(PackedSingleInstrs[i],
+ PackedSingle));
+ for (unsigned i = 0, e = array_lengthof(PackedDoubleInstrs); i != e; ++i)
+ SSEInstrDomainTable.insert(std::make_pair(PackedDoubleInstrs[i],
+ PackedDouble));
+
+ // Add row number + 1 for replaceable instructions.
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ for (unsigned c = 0; c != 3; ++c)
+ SSEInstrDomainTable.insert(std::make_pair(ReplaceableInstrs[i][c],
+ c + 4*(i+1)));
+}
+
+X86InstrInfo::SSEDomain X86InstrInfo::GetSSEDomain(const MachineInstr *MI,
+ const unsigned *&equiv) const {
+ DenseMap<unsigned,unsigned>::const_iterator i =
+ SSEInstrDomainTable.find(MI->getOpcode());
+ if (i == SSEInstrDomainTable.end())
+ return NotSSEDomain;
+ unsigned value = i->second;
+ if (value/4)
+ equiv = ReplaceableInstrs[value/4 - 1];
+ else
+ equiv = 0;
+ return SSEDomain(value & 3);
+}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index c753cf2a53..9be38a4b56 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
///
FunctionPass *createX86FloatingPointStackifierPass();
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
/// createX87FPRegKillInserterPass - This function returns a pass which
/// inserts FP_REG_KILL instructions where needed.
///
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 2323f5790a..71a4dae6a5 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -665,6 +665,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// Remove ambiguous entries.
assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
+
+ if (TM.getSubtarget<X86Subtarget>().hasSSE2())
+ populateSSEInstrDomainTable();
}
bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5111719a20..c3dbae9a5e 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -486,6 +486,9 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+ /// SSEInstrDomainTable - Map SSE opcodes to execution domain info.
+ DenseMap<unsigned, unsigned> SSEInstrDomainTable;
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,14 @@ public:
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ /// Some SSE instructions come in variants for three domains.
+ enum SSEDomain { PackedInt, PackedSingle, PackedDouble, NotSSEDomain };
+
+ /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for
+ /// unknown instructions. If the instruction has equivalents for other domain,
+ /// equiv points to a list of opcodes index by domain.
+ SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
@@ -732,6 +743,9 @@ private:
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;
+
+ // Implemented in SSEDomainFix.cpp
+ void populateSSEInstrDomainTable();
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f13e6f3525..06a481de25 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,11 +17,17 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
+static cl::opt<bool>
+SSEDomainFix("sse-domain-fix",
+ cl::desc("Enable fixing of SSE execution domain"),
+ cl::init(false), cl::Hidden);
+
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -169,6 +175,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
return true; // -print-machineinstr should print after this.
}
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (SSEDomainFix && OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+ PM.add(createSSEDomainFixPass());
+ return true;
+ }
+ return false;
+}
+
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 2bb54544d4..ae7b5b29af 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -66,6 +66,7 @@ public:
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};