diff options
author | Nadav Rotem <nrotem@apple.com> | 2013-04-26 20:19:41 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2013-04-26 20:19:41 +0000 |
commit | 0d840bb2faa6c8cabedaa64826e3c98988d30abb (patch) | |
tree | ee0c278aba4345463d356faaa4eef23db5455b7a | |
parent | 2173e1839c2d00f7f980450dd537047b7b376e6b (diff) | |
download | llvm-0d840bb2faa6c8cabedaa64826e3c98988d30abb.tar.gz llvm-0d840bb2faa6c8cabedaa64826e3c98988d30abb.tar.bz2 llvm-0d840bb2faa6c8cabedaa64826e3c98988d30abb.tar.xz |
Teach the interpreter to handle vector compares and additional vector arithmetic operations.
Patch by Yuri Veselov.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180626 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/ExecutionEngine/Interpreter/Execution.cpp | 324 | ||||
-rw-r--r-- | test/ExecutionEngine/test-interp-vec-arithm_float.ll | 20 | ||||
-rw-r--r-- | test/ExecutionEngine/test-interp-vec-arithm_int.ll | 37 | ||||
-rw-r--r-- | test/ExecutionEngine/test-interp-vec-logical.ll | 22 | ||||
-rw-r--r-- | test/ExecutionEngine/test-interp-vec-setcond-fp.ll | 25 | ||||
-rw-r--r-- | test/ExecutionEngine/test-interp-vec-setcond-int.ll | 69 |
6 files changed, 453 insertions, 44 deletions
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 526c04e082..b95a9e867c 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \ break; +#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY) \ + case Type::VectorTyID: { \ + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \ + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \ + for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \ + Dest.AggregateVal[_i].IntVal = APInt(1, \ + Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\ + } break; + // Handle pointers specially because they must be compared with only as much // width as the host has. We _do not_ want to be comparing 64 bit values when // running on a 32-bit target, otherwise the upper 32 bits might mess up @@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(eq,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty); IMPLEMENT_POINTER_ICMP(==); default: dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; @@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ne,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty); IMPLEMENT_POINTER_ICMP(!=); default: dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; @@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ult,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty); IMPLEMENT_POINTER_ICMP(<); default: dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; @@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(slt,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty); IMPLEMENT_POINTER_ICMP(<); default: dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; @@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ugt,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty); IMPLEMENT_POINTER_ICMP(>); default: dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; @@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sgt,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty); IMPLEMENT_POINTER_ICMP(>); default: dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; @@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ule,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty); IMPLEMENT_POINTER_ICMP(<=); default: dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; @@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sle,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty); IMPLEMENT_POINTER_ICMP(<=); default: dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; @@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(uge,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; @@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sge,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; @@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) { Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \ break +#define IMPLEMENT_VECTOR_FCMP_T(OP, TY) \ + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \ + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \ + for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \ + Dest.AggregateVal[_i].IntVal = APInt(1, \ + Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\ + break; + +#define IMPLEMENT_VECTOR_FCMP(OP) \ + case Type::VectorTyID: \ + if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { \ + IMPLEMENT_VECTOR_FCMP_T(OP, Float); \ + } else { \ + IMPLEMENT_VECTOR_FCMP_T(OP, Double); \ + } + static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(==, Float); IMPLEMENT_FCMP(==, Double); + IMPLEMENT_VECTOR_FCMP(==); default: dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -298,17 +334,65 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, return Dest; } +#define IMPLEMENT_SCALAR_NANS(TY, X,Y) \ + if (TY->isFloatTy()) { \ + if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \ + Dest.IntVal = APInt(1,false); \ + return Dest; \ + } \ + } else { \ + if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \ + Dest.IntVal = APInt(1,false); \ + return Dest; \ + } \ + } + +#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG) \ + assert(X.AggregateVal.size() == Y.AggregateVal.size()); \ + Dest.AggregateVal.resize( X.AggregateVal.size() ); \ + for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) { \ + if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val || \ + Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val) \ + Dest.AggregateVal[_i].IntVal = APInt(1,FLAG); \ + else { \ + Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG); \ + } \ + } + +#define MASK_VECTOR_NANS(TY, X,Y, FLAG) \ + if (TY->isVectorTy()) { \ + if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) { \ + MASK_VECTOR_NANS_T(X, Y, Float, FLAG) \ + } else { \ + MASK_VECTOR_NANS_T(X, Y, Double, FLAG) \ + } \ + } \ + + + static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, - Type *Ty) { + Type *Ty) +{ GenericValue Dest; + // if input is scalar value and Src1 or Src2 is NaN return false + IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2) + // if vector input detect NaNs and fill mask + MASK_VECTOR_NANS(Ty, Src1, Src2, false) + GenericValue DestMask = Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(!=, Float); IMPLEMENT_FCMP(!=, Double); - - default: - dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; - llvm_unreachable(0); + IMPLEMENT_VECTOR_FCMP(!=); + default: + dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; + llvm_unreachable(0); } + // in vector case mask out NaN elements + if (Ty->isVectorTy()) + for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) + if (DestMask.AggregateVal[_i].IntVal == false) + Dest.AggregateVal[_i].IntVal = APInt(1,false); + return Dest; } @@ -318,6 +402,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(<=, Float); IMPLEMENT_FCMP(<=, Double); + IMPLEMENT_VECTOR_FCMP(<=); default: dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -331,6 +416,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(>=, Float); IMPLEMENT_FCMP(>=, Double); + IMPLEMENT_VECTOR_FCMP(>=); default: dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -344,6 +430,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(<, Float); IMPLEMENT_FCMP(<, Double); + IMPLEMENT_VECTOR_FCMP(<); default: dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -357,6 +444,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(>, Float); IMPLEMENT_FCMP(>, Double); + IMPLEMENT_VECTOR_FCMP(>); default: dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -375,18 +463,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, return Dest; \ } +#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC) \ + if (TY->isVectorTy()) { \ + GenericValue DestMask = Dest; \ + Dest = _FUNC(Src1, Src2, Ty); \ + for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) \ + if (DestMask.AggregateVal[_i].IntVal == true) \ + Dest.AggregateVal[_i].IntVal = APInt(1,true); \ + return Dest; \ + } static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ) return executeFCMP_OEQ(Src1, Src2, Ty); + } static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE) return executeFCMP_ONE(Src1, Src2, Ty); } @@ -394,6 +496,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE) return executeFCMP_OLE(Src1, Src2, Ty); } @@ -401,6 +505,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE) return executeFCMP_OGE(Src1, Src2, Ty); } @@ -408,6 +514,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT) return executeFCMP_OLT(Src1, Src2, Ty); } @@ -415,33 +523,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT) return executeFCMP_OGT(Src1, Src2, Ty); } static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; - if (Ty->isFloatTy()) + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].FloatVal == + Src1.AggregateVal[_i].FloatVal) && + (Src2.AggregateVal[_i].FloatVal == + Src2.AggregateVal[_i].FloatVal))); + } else { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].DoubleVal == + Src1.AggregateVal[_i].DoubleVal) && + (Src2.AggregateVal[_i].DoubleVal == + Src2.AggregateVal[_i].DoubleVal))); + } + } else if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && Src2.FloatVal == Src2.FloatVal)); - else + else { Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && Src2.DoubleVal == Src2.DoubleVal)); + } return Dest; } static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; - if (Ty->isFloatTy()) + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].FloatVal != + Src1.AggregateVal[_i].FloatVal) || + (Src2.AggregateVal[_i].FloatVal != + Src2.AggregateVal[_i].FloatVal))); + } else { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].DoubleVal != + Src1.AggregateVal[_i].DoubleVal) || + (Src2.AggregateVal[_i].DoubleVal != + Src2.AggregateVal[_i].DoubleVal))); + } + } else if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || Src2.FloatVal != Src2.FloatVal)); - else + else { Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || Src2.DoubleVal != Src2.DoubleVal)); + } return Dest; } +static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2, + const Type *Ty, const bool val) { + GenericValue Dest; + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) + Dest.AggregateVal[_i].IntVal = APInt(1,val); + } else { + Dest.IntVal = APInt(1, val); + } + + return Dest; +} + void Interpreter::visitFCmpInst(FCmpInst &I) { ExecutionContext &SF = ECStack.back(); Type *Ty = I.getOperand(0)->getType(); @@ -450,8 +613,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { GenericValue R; // Result switch (I.getPredicate()) { - case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break; - case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break; + default: + dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; + llvm_unreachable(0); + break; + case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); + break; + case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true); + break; case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break; case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break; case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break; @@ -466,9 +635,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break; case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; - default: - dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; - llvm_unreachable(0); } SetValue(&I, R, SF); @@ -502,16 +668,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty); case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty); case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty); - case FCmpInst::FCMP_FALSE: { - GenericValue Result; - Result.IntVal = APInt(1, false); - return Result; - } - case FCmpInst::FCMP_TRUE: { - GenericValue Result; - Result.IntVal = APInt(1, true); - return Result; - } + case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false); + case FCmpInst::FCMP_TRUE: return executeFCMP_BOOL(Src1, Src2, Ty, true); default: dbgs() << "Unhandled Cmp predicate\n"; llvm_unreachable(0); @@ -525,27 +683,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result - switch (I.getOpcode()) { - case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break; - case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break; - case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break; - case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break; - case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break; - case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break; - case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break; - case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break; - case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break; - case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break; - case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break; - case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break; - case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break; - case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; - case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; - default: - dbgs() << "Don't know how to handle this binary operator!\n-->" << I; - llvm_unreachable(0); + // First process vector operation + if (Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + R.AggregateVal.resize(Src1.AggregateVal.size()); + + // Macros to execute binary operation 'OP' over integer vectors +#define INTEGER_VECTOR_OPERATION(OP) \ + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \ + R.AggregateVal[i].IntVal = \ + Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal; + + // Additional macros to execute binary operations udiv/sdiv/urem/srem since + // they have different notation. +#define INTEGER_VECTOR_FUNCTION(OP) \ + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \ + R.AggregateVal[i].IntVal = \ + Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal); + + // Macros to execute binary operation 'OP' over floating point type TY + // (float or double) vectors +#define FLOAT_VECTOR_FUNCTION(OP, TY) \ + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \ + R.AggregateVal[i].TY = \ + Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY; + + // Macros to choose appropriate TY: float or double and run operation + // execution +#define FLOAT_VECTOR_OP(OP) { \ + if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) \ + FLOAT_VECTOR_FUNCTION(OP, FloatVal) \ + else { \ + if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) \ + FLOAT_VECTOR_FUNCTION(OP, DoubleVal) \ + else { \ + dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \ + llvm_unreachable(0); \ + } \ + } \ +} + + switch(I.getOpcode()){ + default: + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; + llvm_unreachable(0); + break; + case Instruction::Add: INTEGER_VECTOR_OPERATION(+) break; + case Instruction::Sub: INTEGER_VECTOR_OPERATION(-) break; + case Instruction::Mul: INTEGER_VECTOR_OPERATION(*) break; + case Instruction::UDiv: INTEGER_VECTOR_FUNCTION(udiv) break; + case Instruction::SDiv: INTEGER_VECTOR_FUNCTION(sdiv) break; + case Instruction::URem: INTEGER_VECTOR_FUNCTION(urem) break; + case Instruction::SRem: INTEGER_VECTOR_FUNCTION(srem) break; + case Instruction::And: INTEGER_VECTOR_OPERATION(&) break; + case Instruction::Or: INTEGER_VECTOR_OPERATION(|) break; + case Instruction::Xor: INTEGER_VECTOR_OPERATION(^) break; + case Instruction::FAdd: FLOAT_VECTOR_OP(+) break; + case Instruction::FSub: FLOAT_VECTOR_OP(-) break; + case Instruction::FMul: FLOAT_VECTOR_OP(*) break; + case Instruction::FDiv: FLOAT_VECTOR_OP(/) break; + case Instruction::FRem: + if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) + R.AggregateVal[i].FloatVal = + fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal); + else { + if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) + R.AggregateVal[i].DoubleVal = + fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal); + else { + dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; + llvm_unreachable(0); + } + } + break; + } + } else { + switch (I.getOpcode()) { + default: + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; + llvm_unreachable(0); + break; + case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break; + case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break; + case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break; + case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break; + case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break; + case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break; + case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break; + case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break; + case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break; + case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break; + case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break; + case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break; + case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break; + case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; + case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; + } } - SetValue(&I, R, SF); } diff --git a/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/test/ExecutionEngine/test-interp-vec-arithm_float.ll new file mode 100644 index 0000000000..d7f4ac90a9 --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-arithm_float.ll @@ -0,0 +1,20 @@ +; RUN: %lli %s > /dev/null + + +define i32 @main() { + + %A_float = fadd <4 x float> <float 0.0, float 11.0, float 22.0, float 33.0>, <float 44.0, float 55.0, float 66.0, float 77.0> + %B_float = fsub <4 x float> %A_float, <float 88.0, float 99.0, float 100.0, float 111.0> + %C_float = fmul <4 x float> %B_float, %B_float + %D_float = fdiv <4 x float> %C_float, %B_float + %E_float = frem <4 x float> %D_float, %A_float + + + %A_double = fadd <3 x double> <double 0.0, double 111.0, double 222.0>, <double 444.0, double 555.0, double 665.0> + %B_double = fsub <3 x double> %A_double, <double 888.0, double 999.0, double 1001.0> + %C_double = fmul <3 x double> %B_double, %B_double + %D_double = fdiv <3 x double> %C_double, %B_double + %E_double = frem <3 x double> %D_double, %A_double + + ret i32 0 +} diff --git a/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/test/ExecutionEngine/test-interp-vec-arithm_int.ll new file mode 100644 index 0000000000..0ee14fe31b --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-arithm_int.ll @@ -0,0 +1,37 @@ +; RUN: %lli %s > /dev/null + +define i32 @main() { + %A_i8 = add <5 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4>, <i8 12, i8 34, i8 56, i8 78, i8 89> + %B_i8 = sub <5 x i8> %A_i8, <i8 11, i8 22, i8 33, i8 44, i8 55> + %C_i8 = mul <5 x i8> %B_i8, %B_i8 + %D_i8 = sdiv <5 x i8> %C_i8, %C_i8 + %E_i8 = srem <5 x i8> %D_i8, %D_i8 + %F_i8 = udiv <5 x i8> <i8 5, i8 6, i8 7, i8 8, i8 9>, <i8 6, i8 5, i8 4, i8 3, i8 2> + %G_i8 = urem <5 x i8> <i8 6, i8 7, i8 8, i8 9, i8 10>, <i8 5, i8 4, i8 2, i8 2, i8 1> + + %A_i16 = add <4 x i16> <i16 0, i16 1, i16 2, i16 3>, <i16 123, i16 345, i16 567, i16 789> + %B_i16 = sub <4 x i16> %A_i16, <i16 111, i16 222, i16 333, i16 444> + %C_i16 = mul <4 x i16> %B_i16, %B_i16 + %D_i16 = sdiv <4 x i16> %C_i16, %C_i16 + %E_i16 = srem <4 x i16> %D_i16, %D_i16 + %F_i16 = udiv <4 x i16> <i16 5, i16 6, i16 7, i16 8>, <i16 6, i16 5, i16 4, i16 3> + %G_i16 = urem <4 x i16> <i16 6, i16 7, i16 8, i16 9>, <i16 5, i16 4, i16 3, i16 2> + + %A_i32 = add <3 x i32> <i32 0, i32 1, i32 2>, <i32 1234, i32 3456, i32 5678> + %B_i32 = sub <3 x i32> %A_i32, <i32 1111, i32 2222, i32 3333> + %C_i32 = mul <3 x i32> %B_i32, %B_i32 + %D_i32 = sdiv <3 x i32> %C_i32, %C_i32 + %E_i32 = srem <3 x i32> %D_i32, %D_i32 + %F_i32 = udiv <3 x i32> <i32 5, i32 6, i32 7>, <i32 6, i32 5, i32 4> + %G_i32 = urem <3 x i32> <i32 6, i32 7, i32 8>, <i32 5, i32 4, i32 3> + + %A_i64 = add <2 x i64> <i64 0, i64 1>, <i64 12455, i64 34567> + %B_i64 = sub <2 x i64> %A_i64, <i64 11111, i64 22222> + %C_i64 = mul <2 x i64> %B_i64, %B_i64 + %D_i64 = sdiv <2 x i64> %C_i64, %C_i64 + %E_i64 = srem <2 x i64> %D_i64, %D_i64 + %F_i64 = udiv <2 x i64> <i64 5, i64 6>, <i64 6, i64 5> + %G_i64 = urem <2 x i64> <i64 6, i64 7>, <i64 5, i64 3> + + ret i32 0 +} diff --git a/test/ExecutionEngine/test-interp-vec-logical.ll b/test/ExecutionEngine/test-interp-vec-logical.ll new file mode 100644 index 0000000000..f8f1f0d863 --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-logical.ll @@ -0,0 +1,22 @@ +; RUN: %lli %s > /dev/null + +define i32 @main() { + %A_i8 = and <5 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4>, <i8 8, i8 8, i8 8, i8 8, i8 8> + %B_i8 = or <5 x i8> %A_i8, <i8 7, i8 7, i8 7, i8 7, i8 7> + %C_i8 = xor <5 x i8> %B_i8, %A_i8 + + %A_i16 = and <4 x i16> <i16 4, i16 4, i16 4, i16 4>, <i16 8, i16 8, i16 8, i16 8> + %B_i16 = or <4 x i16> %A_i16, <i16 7, i16 7, i16 7, i16 7> + %C_i16 = xor <4 x i16> %B_i16, %A_i16 + + %A_i32 = and <3 x i32> <i32 4, i32 4, i32 4>, <i32 8, i32 8, i32 8> + %B_i32 = or <3 x i32> %A_i32, <i32 7, i32 7, i32 7> + %C_i32 = xor <3 x i32> %B_i32, %A_i32 + + %A_i64 = and <2 x i64> <i64 4, i64 4>, <i64 8, i64 8> + %B_i64 = or <2 x i64> %A_i64, <i64 7, i64 7> + %C_i64 = xor <2 x i64> %B_i64, %A_i64 + + ret i32 0 +} + diff --git a/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/test/ExecutionEngine/test-interp-vec-setcond-fp.ll new file mode 100644 index 0000000000..8b9b7c76d3 --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-setcond-fp.ll @@ -0,0 +1,25 @@ +; RUN: %lli %s > /dev/null + +define i32 @main() { + %double1 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0> + %double2 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0> + %float1 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0> + %float2 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0> + %test49 = fcmp oeq <3 x float> %float1, %float2 + %test50 = fcmp oge <3 x float> %float1, %float2 + %test51 = fcmp ogt <3 x float> %float1, %float2 + %test52 = fcmp ole <3 x float> %float1, %float2 + %test53 = fcmp olt <3 x float> %float1, %float2 + %test54 = fcmp une <3 x float> %float1, %float2 + + %test55 = fcmp oeq <2 x double> %double1, %double2 + %test56 = fcmp oge <2 x double> %double1, %double2 + %test57 = fcmp ogt <2 x double> %double1, %double2 + %test58 = fcmp ole <2 x double> %double1, %double2 + %test59 = fcmp olt <2 x double> %double1, %double2 + %test60 = fcmp une <2 x double> %double1, %double2 + + ret i32 0 +} + + diff --git a/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/test/ExecutionEngine/test-interp-vec-setcond-int.ll new file mode 100644 index 0000000000..4c89109105 --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-setcond-int.ll @@ -0,0 +1,69 @@ +; RUN: %lli %s > /dev/null + +define i32 @main() { + %int1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0> + %int2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0> + %long1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0> + %long2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0> + %sbyte1 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0> + %sbyte2 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0> + %short1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0> + %short2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0> + %ubyte1 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0> + %ubyte2 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0> + %uint1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0> + %uint2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0> + %ulong1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0> + %ulong2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0> + %ushort1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0> + %ushort2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0> + %test1 = icmp eq <5 x i8> %ubyte1, %ubyte2 + %test2 = icmp uge <5 x i8> %ubyte1, %ubyte2 + %test3 = icmp ugt <5 x i8> %ubyte1, %ubyte2 + %test4 = icmp ule <5 x i8> %ubyte1, %ubyte2 + %test5 = icmp ult <5 x i8> %ubyte1, %ubyte2 + %test6 = icmp ne <5 x i8> %ubyte1, %ubyte2 + %test7 = icmp eq <4 x i16> %ushort1, %ushort2 + %test8 = icmp uge <4 x i16> %ushort1, %ushort2 + %test9 = icmp ugt <4 x i16> %ushort1, %ushort2 + %test10 = icmp ule <4 x i16> %ushort1, %ushort2 + %test11 = icmp ult <4 x i16> %ushort1, %ushort2 + %test12 = icmp ne <4 x i16> %ushort1, %ushort2 + %test13 = icmp eq <3 x i32> %uint1, %uint2 + %test14 = icmp uge <3 x i32> %uint1, %uint2 + %test15 = icmp ugt <3 x i32> %uint1, %uint2 + %test16 = icmp ule <3 x i32> %uint1, %uint2 + %test17 = icmp ult <3 x i32> %uint1, %uint2 + %test18 = icmp ne <3 x i32> %uint1, %uint2 + %test19 = icmp eq <2 x i64> %ulong1, %ulong2 + %test20 = icmp uge <2 x i64> %ulong1, %ulong2 + %test21 = icmp ugt <2 x i64> %ulong1, %ulong2 + %test22 = icmp ule <2 x i64> %ulong1, %ulong2 + %test23 = icmp ult <2 x i64> %ulong1, %ulong2 + %test24 = icmp ne <2 x i64> %ulong1, %ulong2 + %test25 = icmp eq <5 x i8> %sbyte1, %sbyte2 + %test26 = icmp sge <5 x i8> %sbyte1, %sbyte2 + %test27 = icmp sgt <5 x i8> %sbyte1, %sbyte2 + %test28 = icmp sle <5 x i8> %sbyte1, %sbyte2 + %test29 = icmp slt <5 x i8> %sbyte1, %sbyte2 + %test30 = icmp ne <5 x i8> %sbyte1, %sbyte2 + %test31 = icmp eq <4 x i16> %short1, %short2 + %test32 = icmp sge <4 x i16> %short1, %short2 + %test33 = icmp sgt <4 x i16> %short1, %short2 + %test34 = icmp sle <4 x i16> %short1, %short2 + %test35 = icmp slt <4 x i16> %short1, %short2 + %test36 = icmp ne <4 x i16> %short1, %short2 + %test37 = icmp eq <3 x i32> %int1, %int2 + %test38 = icmp sge <3 x i32> %int1, %int2 + %test39 = icmp sgt <3 x i32> %int1, %int2 + %test40 = icmp sle <3 x i32> %int1, %int2 + %test41 = icmp slt <3 x i32> %int1, %int2 + %test42 = icmp ne <3 x i32> %int1, %int2 + %test43 = icmp eq <2 x i64> %long1, %long2 + %test44 = icmp sge <2 x i64> %long1, %long2 + %test45 = icmp sgt <2 x i64> %long1, %long2 + %test46 = icmp sle <2 x i64> %long1, %long2 + %test47 = icmp slt <2 x i64> %long1, %long2 + %test48 = icmp ne <2 x i64> %long1, %long2 + ret i32 0 +} |