summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/Passes.h4
-rw-r--r--include/llvm/InitializePasses.h1
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/BranchFolding.cpp20
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp2
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp33
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp240
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp2
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp7
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h5
-rw-r--r--test/CodeGen/ARM/code-placement.ll6
-rw-r--r--test/CodeGen/X86/machine-cp.ll36
13 files changed, 318 insertions, 41 deletions
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index b4f6cac858..f077f2b8d6 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -193,6 +193,10 @@ namespace llvm {
/// instructions.
FunctionPass *createMachineSinkingPass();
+ /// createMachineCopyPropagationPass - This pass performs copy propagation on
+ /// machine instructions.
+ FunctionPass *createMachineCopyPropagationPass();
+
/// createPeepholeOptimizerPass - This pass performs peephole optimizations -
/// like extension and comparison eliminations.
FunctionPass *createPeepholeOptimizerPass();
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index ce87f16c2e..ed044c0ad9 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -79,6 +79,7 @@ void initializeCallGraphAnalysisGroup(PassRegistry&);
void initializeCodeGenPreparePass(PassRegistry&);
void initializeConstantMergePass(PassRegistry&);
void initializeConstantPropagationPass(PassRegistry&);
+void initializeMachineCopyPropagationPass(PassRegistry&);
void initializeCorrelatedValuePropagationPass(PassRegistry&);
void initializeDAEPass(PassRegistry&);
void initializeDAHPass(PassRegistry&);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 6cf4571d73..77958d9c95 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -186,7 +186,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ for (const unsigned *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
for (const unsigned *Alias = TRI->getOverlaps(Reg);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 89894c37ee..29e8545030 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -180,7 +180,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
TRI = tri;
MMI = mmi;
- RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+ RS = new RegScavenger();
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
@@ -368,16 +368,14 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
MachineBasicBlock *NewMBB) {
- if (RS) {
- RS->enterBasicBlock(CurMBB);
- if (!CurMBB->empty())
- RS->forward(prior(CurMBB->end()));
- BitVector RegsLiveAtExit(TRI->getNumRegs());
- RS->getRegsUsed(RegsLiveAtExit, false);
- for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
- if (RegsLiveAtExit[i])
- NewMBB->addLiveIn(i);
- }
+ RS->enterBasicBlock(CurMBB);
+ if (!CurMBB->empty())
+ RS->forward(prior(CurMBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+ if (RegsLiveAtExit[i])
+ NewMBB->addLiveIn(i);
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ce9d0d4d0d..7fa5e47f9e 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -40,6 +40,7 @@ add_llvm_library(LLVMCodeGen
MachineBlockPlacement.cpp
MachineBranchProbabilityInfo.cpp
MachineCodeEmitter.cpp
+ MachineCopyPropagation.cpp
MachineCSE.cpp
MachineDominators.cpp
MachineFunction.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 128143e70f..5c325396db 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -102,7 +102,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+ for (const unsigned *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 62227fd4d6..7fd089a5db 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -452,23 +452,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (addPostRegAlloc(PM))
printAndVerify(PM, "After PostRegAlloc passes");
- PM.add(createExpandPostRAPseudosPass());
- printAndVerify(PM, "After ExpandPostRAPseudos");
-
// Insert prolog/epilog code. Eliminate abstract frame index references...
PM.add(createPrologEpilogCodeInserter());
printAndVerify(PM, "After PrologEpilogCodeInserter");
- // Run pre-sched2 passes.
- if (addPreSched2(PM))
- printAndVerify(PM, "After PreSched2 passes");
-
- // Second pass scheduler.
- if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) {
- PM.add(createPostRAScheduler(getOptLevel()));
- printAndVerify(PM, "After PostRAScheduler");
- }
-
// Branch folding must be run after regalloc and prolog/epilog insertion.
if (getOptLevel() != CodeGenOpt::None && !DisableBranchFold) {
PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
@@ -481,6 +468,26 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
printNoVerify(PM, "After TailDuplicate");
}
+ // Copy propagation.
+ if (getOptLevel() != CodeGenOpt::None) {
+ PM.add(createMachineCopyPropagationPass());
+ printNoVerify(PM, "After copy propagation pass");
+ }
+
+ // Expand pseudo instructions before second scheduling pass.
+ PM.add(createExpandPostRAPseudosPass());
+ printNoVerify(PM, "After ExpandPostRAPseudos");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM))
+ printNoVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) {
+ PM.add(createPostRAScheduler(getOptLevel()));
+ printNoVerify(PM, "After PostRAScheduler");
+ }
+
PM.add(createGCMachineCodeAnalysisPass());
if (PrintGCInfo)
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 0000000000..861025ca0b
--- /dev/null
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,240 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+ class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ BitVector ReservedRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ void SourceNoLongerAvailable(unsigned Reg,
+ DenseMap<unsigned, unsigned> &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+ bool CopyPropagateBlock(MachineBasicBlock &MBB);
+ };
+}
+char MachineCopyPropagation::ID = 0;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+ "Machine Copy Propagation Pass", false, false)
+
+FunctionPass *llvm::createMachineCopyPropagationPass() {
+ return new MachineCopyPropagation();
+}
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+ DenseMap<unsigned, unsigned> &SrcMap,
+ DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+ DenseMap<unsigned, unsigned>::iterator SI = SrcMap.find(Reg);
+ if (SI != SrcMap.end()) {
+ unsigned MappedDef = SI->second;
+ // Source of copy is no longer available for propagation.
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (const unsigned *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ SI = SrcMap.find(*AS);
+ if (SI != SrcMap.end()) {
+ unsigned MappedDef = SI->second;
+ if (AvailCopyMap.erase(MappedDef)) {
+ for (const unsigned *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ AvailCopyMap.erase(*SR);
+ }
+ }
+ }
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
+ DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
+ DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map
+ DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isCopy()) {
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Def) ||
+ TargetRegisterInfo::isVirtualRegister(Src))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+ if (CI != AvailCopyMap.end()) {
+ MachineInstr *CopyMI = CI->second;
+ unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+ if (!ReservedRegs.test(Def) &&
+ (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) {
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX<kill>
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ CopyMI->getOperand(1).setIsKill(false);
+ MI->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ continue;
+ }
+ }
+
+ // If Src is defined by a previous copy, it cannot be eliminated.
+ CI = CopyMap.find(Src);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) {
+ CI = CopyMap.find(*AS);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+
+ // Copy is now a candidate for deletion.
+ MaybeDeadCopies.insert(MI);
+
+ // If 'Src' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9<def> = copy %xmm2
+ // ...
+ // %xmm2<def> = copy %xmm0
+ // ...
+ // %xmm2<def> = copy %xmm9
+ SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+ // Remember Def is defined by the copy.
+ CopyMap[Def] = MI;
+ AvailCopyMap[Def] = MI;
+ for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+ CopyMap[*SR] = MI;
+ AvailCopyMap[*SR] = MI;
+ }
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ SrcMap[Src] = Def;
+
+ continue;
+ }
+
+ // Not a copy.
+ SmallVector<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ report_fatal_error("MachineCopyPropagation should be run after"
+ " register allocation!");
+
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ continue;
+ }
+
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ CI = CopyMap.find(*AS);
+ if (CI != CopyMap.end())
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+
+ // No longer defined by a copy.
+ CopyMap.erase(Reg);
+ AvailCopyMap.erase(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ CopyMap.erase(*AS);
+ AvailCopyMap.erase(*AS);
+ }
+
+ // If 'Reg' is previously source of a copy, it is no longer available for
+ // copy propagation.
+ SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+ }
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (SmallSetVector<MachineInstr*, 8>::iterator
+ DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+ DI != DE; ++DI) {
+ if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+ (*DI)->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ TRI = MF.getTarget().getRegisterInfo();
+ ReservedRegs = TRI->getReservedRegs(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ Changed |= CopyPropagateBlock(*I);
+
+ return Changed;
+}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index ca02aa1b81..07cf027691 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -96,7 +96,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
// Create callee-saved registers bitvector.
CalleeSavedRegs.resize(NumPhysRegs);
- const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+ const unsigned *CSRegs = TRI->getCalleeSavedRegs(&MF);
if (CSRegs != NULL)
for (unsigned i = 0; CSRegs[i]; ++i)
CalleeSavedRegs.set(CSRegs[i]);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index b02f3b6e1e..a6556a51c8 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -136,13 +136,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
- if (BB == ML->getLoopLatch()) {
- MachineBasicBlock *Header = ML->getHeader();
- for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
- E = Header->livein_end(); I != E; ++I)
- LoopLiveInRegs.insert(*I);
+ if (BB == ML->getLoopLatch())
LoopRegs.VisitLoop(ML);
- }
}
/// AddSchedBarrierDeps - Add dependencies from instructions in the current
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 666bdf548c..a6233d3482 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -120,11 +120,6 @@ namespace llvm {
///
LoopDependencies LoopRegs;
- /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
- /// back-edge-aware scheduling.
- ///
- SmallSet<unsigned, 8> LoopLiveInRegs;
-
protected:
/// DbgValues - Remember instruction that preceeds DBG_VALUE.
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef659252..487ec690ea 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644
index 0000000000..54fa01c38f
--- /dev/null
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: jne
+ %cmp1 = icmp eq i32 %b, 0
+ br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body: ; preds = %entry, %while.body
+ %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+ %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+ %rem = srem i32 %a.addr.03, %b.addr.02
+ %cmp = icmp eq i32 %rem, 0
+ br i1 %cmp, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+ ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+ %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+ ret <8 x i16> %tmp8
+}