summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-04-12 20:45:20 +0000
committerNadav Rotem <nrotem@apple.com>2013-04-12 20:45:20 +0000
commitaffe889d08fe95ab4ef3d818efa1a2a98fd7e736 (patch)
tree07c782c73ac47be9f5a0bba9d3c5189647d7f13d
parent3f42936af8be447c339a7c41bcd2b1468e39135b (diff)
downloadllvm-affe889d08fe95ab4ef3d818efa1a2a98fd7e736.tar.gz
llvm-affe889d08fe95ab4ef3d818efa1a2a98fd7e736.tar.bz2
llvm-affe889d08fe95ab4ef3d818efa1a2a98fd7e736.tar.xz
Add support for additional vector instructions in the interpreter.
patch by Veselov, Yuri <Yuri.Veselov@intel.com>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179409 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp321
-rw-r--r--test/ExecutionEngine/test-interp-vec-arithm_float.ll20
-rw-r--r--test/ExecutionEngine/test-interp-vec-arithm_int.ll37
-rw-r--r--test/ExecutionEngine/test-interp-vec-logical.ll22
-rw-r--r--test/ExecutionEngine/test-interp-vec-setcond-fp.ll25
-rw-r--r--test/ExecutionEngine/test-interp-vec-setcond-int.ll69
6 files changed, 450 insertions, 44 deletions
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 526c04e082..eb75e492e7 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
break;
+#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY) \
+ case Type::VectorTyID: { \
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \
+ for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \
+ Dest.AggregateVal[_i].IntVal = APInt(1, \
+ Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\
+ } break;
+
// Handle pointers specially because they must be compared with only as much
// width as the host has. We _do not_ want to be comparing 64 bit values when
// running on a 32-bit target, otherwise the upper 32 bits might mess up
@@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(eq,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty);
IMPLEMENT_POINTER_ICMP(==);
default:
dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
@@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ne,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty);
IMPLEMENT_POINTER_ICMP(!=);
default:
dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
@@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ult,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty);
IMPLEMENT_POINTER_ICMP(<);
default:
dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
@@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(slt,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty);
IMPLEMENT_POINTER_ICMP(<);
default:
dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
@@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty);
IMPLEMENT_POINTER_ICMP(>);
default:
dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
@@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty);
IMPLEMENT_POINTER_ICMP(>);
default:
dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
@@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ule,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty);
IMPLEMENT_POINTER_ICMP(<=);
default:
dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
@@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sle,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty);
IMPLEMENT_POINTER_ICMP(<=);
default:
dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
@@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(uge,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty);
IMPLEMENT_POINTER_ICMP(>=);
default:
dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
@@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sge,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty);
IMPLEMENT_POINTER_ICMP(>=);
default:
dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
@@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
break
+#define IMPLEMENT_VECTOR_FCMP_T(OP, TY) \
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \
+ for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \
+ Dest.AggregateVal[_i].IntVal = APInt(1, \
+ Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\
+ break;
+
+#define IMPLEMENT_VECTOR_FCMP(OP) \
+ case Type::VectorTyID: \
+ if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { \
+ IMPLEMENT_VECTOR_FCMP_T(OP, Float); \
+ } else { \
+ IMPLEMENT_VECTOR_FCMP_T(OP, Double); \
+ }
+
static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(==, Float);
IMPLEMENT_FCMP(==, Double);
+ IMPLEMENT_VECTOR_FCMP(==);
default:
dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -298,17 +334,62 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
return Dest;
}
+#define IMPLEMENT_SCALAR_NANS(TY, X,Y) \
+ if (TY->isFloatTy()) { \
+ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \
+ Dest.IntVal = APInt(1,false); \
+ return Dest; \
+ } \
+ } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+ Dest.IntVal = APInt(1,false); \
+ return Dest; \
+ }
+
+#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG) \
+ assert(X.AggregateVal.size() == Y.AggregateVal.size()); \
+ Dest.AggregateVal.resize( X.AggregateVal.size() ); \
+ for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) { \
+ if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val || \
+ Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val) \
+ Dest.AggregateVal[_i].IntVal = APInt(1,FLAG); \
+ else { \
+ Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG); \
+ } \
+ }
+
+#define MASK_VECTOR_NANS(TY, X,Y, FLAG) \
+ if (TY->isVectorTy()) \
+ if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) { \
+ MASK_VECTOR_NANS_T(X, Y, Float, FLAG) \
+ } else { \
+ MASK_VECTOR_NANS_T(X, Y, Double, FLAG) \
+ } \
+
+
+
static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
- Type *Ty) {
+ Type *Ty)
+{
GenericValue Dest;
+ // if input is scalar value and Src1 or Src2 is NaN return false
+ IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2)
+ // if vector input detect NaNs and fill mask
+ MASK_VECTOR_NANS(Ty, Src1, Src2, false)
+ GenericValue DestMask = Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(!=, Float);
IMPLEMENT_FCMP(!=, Double);
-
- default:
- dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ IMPLEMENT_VECTOR_FCMP(!=);
+ default:
+ dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
}
+ // in vector case mask out NaN elements
+ if (Ty->isVectorTy())
+ for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+ if (DestMask.AggregateVal[_i].IntVal == false)
+ Dest.AggregateVal[_i].IntVal = APInt(1,false);
+
return Dest;
}
@@ -318,6 +399,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(<=, Float);
IMPLEMENT_FCMP(<=, Double);
+ IMPLEMENT_VECTOR_FCMP(<=);
default:
dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -331,6 +413,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(>=, Float);
IMPLEMENT_FCMP(>=, Double);
+ IMPLEMENT_VECTOR_FCMP(>=);
default:
dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -344,6 +427,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(<, Float);
IMPLEMENT_FCMP(<, Double);
+ IMPLEMENT_VECTOR_FCMP(<);
default:
dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -357,6 +441,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(>, Float);
IMPLEMENT_FCMP(>, Double);
+ IMPLEMENT_VECTOR_FCMP(>);
default:
dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -375,18 +460,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
return Dest; \
}
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC) \
+ if (TY->isVectorTy()) { \
+ GenericValue DestMask = Dest; \
+ Dest = _FUNC(Src1, Src2, Ty); \
+ for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) \
+ if (DestMask.AggregateVal[_i].IntVal == true) \
+ Dest.AggregateVal[_i].IntVal = APInt(1,true); \
+ return Dest; \
+ }
static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ)
return executeFCMP_OEQ(Src1, Src2, Ty);
+
}
static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE)
return executeFCMP_ONE(Src1, Src2, Ty);
}
@@ -394,6 +493,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE)
return executeFCMP_OLE(Src1, Src2, Ty);
}
@@ -401,6 +502,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE)
return executeFCMP_OGE(Src1, Src2, Ty);
}
@@ -408,6 +511,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT)
return executeFCMP_OLT(Src1, Src2, Ty);
}
@@ -415,33 +520,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT)
return executeFCMP_OGT(Src1, Src2, Ty);
}
static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
- if (Ty->isFloatTy())
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].FloatVal ==
+ Src1.AggregateVal[_i].FloatVal) &&
+ (Src2.AggregateVal[_i].FloatVal ==
+ Src2.AggregateVal[_i].FloatVal)));
+ } else {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].DoubleVal ==
+ Src1.AggregateVal[_i].DoubleVal) &&
+ (Src2.AggregateVal[_i].DoubleVal ==
+ Src2.AggregateVal[_i].DoubleVal)));
+ }
+ } else if (Ty->isFloatTy())
Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
Src2.FloatVal == Src2.FloatVal));
- else
+ else {
Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
Src2.DoubleVal == Src2.DoubleVal));
+ }
return Dest;
}
static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
- if (Ty->isFloatTy())
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].FloatVal !=
+ Src1.AggregateVal[_i].FloatVal) ||
+ (Src2.AggregateVal[_i].FloatVal !=
+ Src2.AggregateVal[_i].FloatVal)));
+ } else {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].DoubleVal !=
+ Src1.AggregateVal[_i].DoubleVal) ||
+ (Src2.AggregateVal[_i].DoubleVal !=
+ Src2.AggregateVal[_i].DoubleVal)));
+ }
+ } else if (Ty->isFloatTy())
Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
Src2.FloatVal != Src2.FloatVal));
- else
+ else {
Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
Src2.DoubleVal != Src2.DoubleVal));
+ }
return Dest;
}
+static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
+ const Type *Ty, const bool val) {
+ GenericValue Dest;
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,val);
+ } else {
+ Dest.IntVal = APInt(1, val);
+ }
+
+ return Dest;
+}
+
void Interpreter::visitFCmpInst(FCmpInst &I) {
ExecutionContext &SF = ECStack.back();
Type *Ty = I.getOperand(0)->getType();
@@ -450,8 +610,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
GenericValue R; // Result
switch (I.getPredicate()) {
- case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
- case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break;
+ default:
+ dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+ llvm_unreachable(0);
+ break;
+ case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
+ break;
+ case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true);
+ break;
case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break;
@@ -466,9 +632,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
- default:
- dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
- llvm_unreachable(0);
}
SetValue(&I, R, SF);
@@ -502,16 +665,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty);
case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty);
case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty);
- case FCmpInst::FCMP_FALSE: {
- GenericValue Result;
- Result.IntVal = APInt(1, false);
- return Result;
- }
- case FCmpInst::FCMP_TRUE: {
- GenericValue Result;
- Result.IntVal = APInt(1, true);
- return Result;
- }
+ case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false);
+ case FCmpInst::FCMP_TRUE: return executeFCMP_BOOL(Src1, Src2, Ty, true);
default:
dbgs() << "Unhandled Cmp predicate\n";
llvm_unreachable(0);
@@ -525,27 +680,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
- switch (I.getOpcode()) {
- case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
- case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
- case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break;
- case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break;
- case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break;
- case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break;
- case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break;
- case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break;
- case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
- case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
- case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
- case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
- case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
- case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
- case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
- default:
- dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
- llvm_unreachable(0);
+ // First process vector operation
+ if (Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ R.AggregateVal.resize(Src1.AggregateVal.size());
+
+ // Macros to execute binary operation 'OP' over integer vectors
+#define INTEGER_VECTOR_OPERATION(OP) \
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \
+ R.AggregateVal[i].IntVal = \
+ Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal;
+
+ // Additional macros to execute binary operations udiv/sdiv/urem/srem since
+ // they have different notation.
+#define INTEGER_VECTOR_FUNCTION(OP) \
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \
+ R.AggregateVal[i].IntVal = \
+ Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal);
+
+ // Macros to execute binary operation 'OP' over floating point type TY
+ // (float or double) vectors
+#define FLOAT_VECTOR_FUNCTION(OP, TY) \
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \
+ R.AggregateVal[i].TY = \
+ Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY;
+
+ // Macros to choose appropriate TY: float or double and run operation
+ // execution
+#define FLOAT_VECTOR_OP(OP) { \
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) \
+ FLOAT_VECTOR_FUNCTION(OP, FloatVal) \
+ else { \
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) \
+ FLOAT_VECTOR_FUNCTION(OP, DoubleVal) \
+ else { \
+ dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
+ llvm_unreachable(0); \
+ } \
+ } \
+}
+
+ switch(I.getOpcode()){
+ default:
+ dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+ llvm_unreachable(0);
+ break;
+ case Instruction::Add: INTEGER_VECTOR_OPERATION(+) break;
+ case Instruction::Sub: INTEGER_VECTOR_OPERATION(-) break;
+ case Instruction::Mul: INTEGER_VECTOR_OPERATION(*) break;
+ case Instruction::UDiv: INTEGER_VECTOR_FUNCTION(udiv) break;
+ case Instruction::SDiv: INTEGER_VECTOR_FUNCTION(sdiv) break;
+ case Instruction::URem: INTEGER_VECTOR_FUNCTION(urem) break;
+ case Instruction::SRem: INTEGER_VECTOR_FUNCTION(srem) break;
+ case Instruction::And: INTEGER_VECTOR_OPERATION(&) break;
+ case Instruction::Or: INTEGER_VECTOR_OPERATION(|) break;
+ case Instruction::Xor: INTEGER_VECTOR_OPERATION(^) break;
+ case Instruction::FAdd: FLOAT_VECTOR_OP(+) break;
+ case Instruction::FSub: FLOAT_VECTOR_OP(-) break;
+ case Instruction::FMul: FLOAT_VECTOR_OP(*) break;
+ case Instruction::FDiv: FLOAT_VECTOR_OP(/) break;
+ case Instruction::FRem:
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+ R.AggregateVal[i].FloatVal =
+ fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
+ else {
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+ R.AggregateVal[i].DoubleVal =
+ fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
+ else {
+ dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ }
+ break;
+ }
+ } else {
+ switch (I.getOpcode()) {
+ default:
+ dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+ llvm_unreachable(0);
+ break;
+ case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
+ case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
+ case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break;
+ case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break;
+ case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break;
+ case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break;
+ case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break;
+ case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break;
+ case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+ case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+ case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+ case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+ case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
+ case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
+ case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+ }
}
-
SetValue(&I, R, SF);
}
diff --git a/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/test/ExecutionEngine/test-interp-vec-arithm_float.ll
new file mode 100644
index 0000000000..d7f4ac90a9
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-arithm_float.ll
@@ -0,0 +1,20 @@
+; RUN: %lli %s > /dev/null
+
+
+define i32 @main() {
+
+ %A_float = fadd <4 x float> <float 0.0, float 11.0, float 22.0, float 33.0>, <float 44.0, float 55.0, float 66.0, float 77.0>
+ %B_float = fsub <4 x float> %A_float, <float 88.0, float 99.0, float 100.0, float 111.0>
+ %C_float = fmul <4 x float> %B_float, %B_float
+ %D_float = fdiv <4 x float> %C_float, %B_float
+ %E_float = frem <4 x float> %D_float, %A_float
+
+
+ %A_double = fadd <3 x double> <double 0.0, double 111.0, double 222.0>, <double 444.0, double 555.0, double 665.0>
+ %B_double = fsub <3 x double> %A_double, <double 888.0, double 999.0, double 1001.0>
+ %C_double = fmul <3 x double> %B_double, %B_double
+ %D_double = fdiv <3 x double> %C_double, %B_double
+ %E_double = frem <3 x double> %D_double, %A_double
+
+ ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/test/ExecutionEngine/test-interp-vec-arithm_int.ll
new file mode 100644
index 0000000000..0ee14fe31b
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-arithm_int.ll
@@ -0,0 +1,37 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+ %A_i8 = add <5 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4>, <i8 12, i8 34, i8 56, i8 78, i8 89>
+ %B_i8 = sub <5 x i8> %A_i8, <i8 11, i8 22, i8 33, i8 44, i8 55>
+ %C_i8 = mul <5 x i8> %B_i8, %B_i8
+ %D_i8 = sdiv <5 x i8> %C_i8, %C_i8
+ %E_i8 = srem <5 x i8> %D_i8, %D_i8
+ %F_i8 = udiv <5 x i8> <i8 5, i8 6, i8 7, i8 8, i8 9>, <i8 6, i8 5, i8 4, i8 3, i8 2>
+ %G_i8 = urem <5 x i8> <i8 6, i8 7, i8 8, i8 9, i8 10>, <i8 5, i8 4, i8 2, i8 2, i8 1>
+
+ %A_i16 = add <4 x i16> <i16 0, i16 1, i16 2, i16 3>, <i16 123, i16 345, i16 567, i16 789>
+ %B_i16 = sub <4 x i16> %A_i16, <i16 111, i16 222, i16 333, i16 444>
+ %C_i16 = mul <4 x i16> %B_i16, %B_i16
+ %D_i16 = sdiv <4 x i16> %C_i16, %C_i16
+ %E_i16 = srem <4 x i16> %D_i16, %D_i16
+ %F_i16 = udiv <4 x i16> <i16 5, i16 6, i16 7, i16 8>, <i16 6, i16 5, i16 4, i16 3>
+ %G_i16 = urem <4 x i16> <i16 6, i16 7, i16 8, i16 9>, <i16 5, i16 4, i16 3, i16 2>
+
+ %A_i32 = add <3 x i32> <i32 0, i32 1, i32 2>, <i32 1234, i32 3456, i32 5678>
+ %B_i32 = sub <3 x i32> %A_i32, <i32 1111, i32 2222, i32 3333>
+ %C_i32 = mul <3 x i32> %B_i32, %B_i32
+ %D_i32 = sdiv <3 x i32> %C_i32, %C_i32
+ %E_i32 = srem <3 x i32> %D_i32, %D_i32
+ %F_i32 = udiv <3 x i32> <i32 5, i32 6, i32 7>, <i32 6, i32 5, i32 4>
+ %G_i32 = urem <3 x i32> <i32 6, i32 7, i32 8>, <i32 5, i32 4, i32 3>
+
+ %A_i64 = add <2 x i64> <i64 0, i64 1>, <i64 12455, i64 34567>
+ %B_i64 = sub <2 x i64> %A_i64, <i64 11111, i64 22222>
+ %C_i64 = mul <2 x i64> %B_i64, %B_i64
+ %D_i64 = sdiv <2 x i64> %C_i64, %C_i64
+ %E_i64 = srem <2 x i64> %D_i64, %D_i64
+ %F_i64 = udiv <2 x i64> <i64 5, i64 6>, <i64 6, i64 5>
+ %G_i64 = urem <2 x i64> <i64 6, i64 7>, <i64 5, i64 3>
+
+ ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-logical.ll b/test/ExecutionEngine/test-interp-vec-logical.ll
new file mode 100644
index 0000000000..f8f1f0d863
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-logical.ll
@@ -0,0 +1,22 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+ %A_i8 = and <5 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4>, <i8 8, i8 8, i8 8, i8 8, i8 8>
+ %B_i8 = or <5 x i8> %A_i8, <i8 7, i8 7, i8 7, i8 7, i8 7>
+ %C_i8 = xor <5 x i8> %B_i8, %A_i8
+
+ %A_i16 = and <4 x i16> <i16 4, i16 4, i16 4, i16 4>, <i16 8, i16 8, i16 8, i16 8>
+ %B_i16 = or <4 x i16> %A_i16, <i16 7, i16 7, i16 7, i16 7>
+ %C_i16 = xor <4 x i16> %B_i16, %A_i16
+
+ %A_i32 = and <3 x i32> <i32 4, i32 4, i32 4>, <i32 8, i32 8, i32 8>
+ %B_i32 = or <3 x i32> %A_i32, <i32 7, i32 7, i32 7>
+ %C_i32 = xor <3 x i32> %B_i32, %A_i32
+
+ %A_i64 = and <2 x i64> <i64 4, i64 4>, <i64 8, i64 8>
+ %B_i64 = or <2 x i64> %A_i64, <i64 7, i64 7>
+ %C_i64 = xor <2 x i64> %B_i64, %A_i64
+
+ ret i32 0
+}
+
diff --git a/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
new file mode 100644
index 0000000000..8b9b7c76d3
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
@@ -0,0 +1,25 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+ %double1 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0>
+ %double2 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0>
+ %float1 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0>
+ %float2 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0>
+ %test49 = fcmp oeq <3 x float> %float1, %float2
+ %test50 = fcmp oge <3 x float> %float1, %float2
+ %test51 = fcmp ogt <3 x float> %float1, %float2
+ %test52 = fcmp ole <3 x float> %float1, %float2
+ %test53 = fcmp olt <3 x float> %float1, %float2
+ %test54 = fcmp une <3 x float> %float1, %float2
+
+ %test55 = fcmp oeq <2 x double> %double1, %double2
+ %test56 = fcmp oge <2 x double> %double1, %double2
+ %test57 = fcmp ogt <2 x double> %double1, %double2
+ %test58 = fcmp ole <2 x double> %double1, %double2
+ %test59 = fcmp olt <2 x double> %double1, %double2
+ %test60 = fcmp une <2 x double> %double1, %double2
+
+ ret i32 0
+}
+
+
diff --git a/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/test/ExecutionEngine/test-interp-vec-setcond-int.ll
new file mode 100644
index 0000000000..4c89109105
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-setcond-int.ll
@@ -0,0 +1,69 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+ %int1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+ %int2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+ %long1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+ %long2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+ %sbyte1 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+ %sbyte2 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+ %short1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+ %short2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+ %ubyte1 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+ %ubyte2 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+ %uint1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+ %uint2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+ %ulong1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+ %ulong2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+ %ushort1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+ %ushort2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+ %test1 = icmp eq <5 x i8> %ubyte1, %ubyte2
+ %test2 = icmp uge <5 x i8> %ubyte1, %ubyte2
+ %test3 = icmp ugt <5 x i8> %ubyte1, %ubyte2
+ %test4 = icmp ule <5 x i8> %ubyte1, %ubyte2
+ %test5 = icmp ult <5 x i8> %ubyte1, %ubyte2
+ %test6 = icmp ne <5 x i8> %ubyte1, %ubyte2
+ %test7 = icmp eq <4 x i16> %ushort1, %ushort2
+ %test8 = icmp uge <4 x i16> %ushort1, %ushort2
+ %test9 = icmp ugt <4 x i16> %ushort1, %ushort2
+ %test10 = icmp ule <4 x i16> %ushort1, %ushort2
+ %test11 = icmp ult <4 x i16> %ushort1, %ushort2
+ %test12 = icmp ne <4 x i16> %ushort1, %ushort2
+ %test13 = icmp eq <3 x i32> %uint1, %uint2
+ %test14 = icmp uge <3 x i32> %uint1, %uint2
+ %test15 = icmp ugt <3 x i32> %uint1, %uint2
+ %test16 = icmp ule <3 x i32> %uint1, %uint2
+ %test17 = icmp ult <3 x i32> %uint1, %uint2
+ %test18 = icmp ne <3 x i32> %uint1, %uint2
+ %test19 = icmp eq <2 x i64> %ulong1, %ulong2
+ %test20 = icmp uge <2 x i64> %ulong1, %ulong2
+ %test21 = icmp ugt <2 x i64> %ulong1, %ulong2
+ %test22 = icmp ule <2 x i64> %ulong1, %ulong2
+ %test23 = icmp ult <2 x i64> %ulong1, %ulong2
+ %test24 = icmp ne <2 x i64> %ulong1, %ulong2
+ %test25 = icmp eq <5 x i8> %sbyte1, %sbyte2
+ %test26 = icmp sge <5 x i8> %sbyte1, %sbyte2
+ %test27 = icmp sgt <5 x i8> %sbyte1, %sbyte2
+ %test28 = icmp sle <5 x i8> %sbyte1, %sbyte2
+ %test29 = icmp slt <5 x i8> %sbyte1, %sbyte2
+ %test30 = icmp ne <5 x i8> %sbyte1, %sbyte2
+ %test31 = icmp eq <4 x i16> %short1, %short2
+ %test32 = icmp sge <4 x i16> %short1, %short2
+ %test33 = icmp sgt <4 x i16> %short1, %short2
+ %test34 = icmp sle <4 x i16> %short1, %short2
+ %test35 = icmp slt <4 x i16> %short1, %short2
+ %test36 = icmp ne <4 x i16> %short1, %short2
+ %test37 = icmp eq <3 x i32> %int1, %int2
+ %test38 = icmp sge <3 x i32> %int1, %int2
+ %test39 = icmp sgt <3 x i32> %int1, %int2
+ %test40 = icmp sle <3 x i32> %int1, %int2
+ %test41 = icmp slt <3 x i32> %int1, %int2
+ %test42 = icmp ne <3 x i32> %int1, %int2
+ %test43 = icmp eq <2 x i64> %long1, %long2
+ %test44 = icmp sge <2 x i64> %long1, %long2
+ %test45 = icmp sgt <2 x i64> %long1, %long2
+ %test46 = icmp sle <2 x i64> %long1, %long2
+ %test47 = icmp slt <2 x i64> %long1, %long2
+ %test48 = icmp ne <2 x i64> %long1, %long2
+ ret i32 0
+}