summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuentin Colombet <qcolombet@apple.com>2014-05-21 22:00:39 +0000
committerQuentin Colombet <qcolombet@apple.com>2014-05-21 22:00:39 +0000
commitfd0096a42c6d21e922e99669b1752a03987ebc84 (patch)
treeac0cf379547a9df9e459becef1b32e81f5368a91
parentd1b5bdaebdcdfc85854e6dac538bcc273b6a486a (diff)
downloadllvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.gz
llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.bz2
llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.xz
[X86] Fix a bug in the lowering of BLENDI introduced in r209043.
ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). <rdar://problem/16975435> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209324 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--test/CodeGen/X86/avx-blend.ll38
-rw-r--r--test/CodeGen/X86/blend-msb.ll12
3 files changed, 48 insertions, 12 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 72743a97ab..61828759fc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7980,7 +7980,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// This function assumes its argument is a BUILD_VECTOR of constand or
+// This function assumes its argument is a BUILD_VECTOR of constants or
// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
// true.
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
@@ -8004,9 +8004,13 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
Lane2Cond = !isZero(SndLaneEltCond);
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
- MaskValue |= !!Lane1Cond << i;
+ // Lane1Cond != 0, means we want the first argument.
+ // Lane1Cond == 0, means we want the second argument.
+ // The encoding of this argument is 0 for the first argument, 1
+ // for the second. Therefore, invert the condition.
+ MaskValue |= !Lane1Cond << i;
else if (Lane1Cond < 0)
- MaskValue |= !!Lane2Cond << i;
+ MaskValue |= !Lane2Cond << i;
else
return false;
}
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 8577a616c3..4d4f6c1a03 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -3,7 +3,16 @@
; AVX128 tests:
;CHECK-LABEL: vsel_float:
-;CHECK: vblendps $5
+; select mask is <i1 true, i1 false, i1 true, i1 false>.
+; Big endian representation is 0101 = 5.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 1010 = 10.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+; result is in xmm0 => destination argument.
+;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
;CHECK-LABEL: vsel_i32:
-;CHECK: vblendps $5
+;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
;CHECK-LABEL: vsel_float8:
;CHECK-NOT: vinsertf128
-;CHECK: vblendps $17
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
;CHECK: ret
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
;CHECK-LABEL: vsel_i328:
;CHECK-NOT: vinsertf128
-;CHECK: vblendps $17
+;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
;CHECK-NEXT: ret
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
@@ -69,8 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
}
;CHECK-LABEL: vsel_double8:
-;CHECK: vblendpd $1
-;CHECK: vblendpd $1
+; select mask is 2x: 0001 => intel mask: ~0001 = 14
+; ABI:
+; v1 is in ymm0 and ymm1.
+; v2 is in ymm2 and ymm3.
+; result is in ymm0 and ymm1.
+; Compute the low part: res.low = blend v1.low, v2.low, blendmask
+;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
+; Compute the high part.
+;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
;CHECK: ret
define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
@@ -78,8 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
}
;CHECK-LABEL: vsel_i648:
-;CHECK: vblendpd $1
-;CHECK: vblendpd $1
+;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
+;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
;CHECK: ret
define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 4e17a714bf..34aaf2c31a 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -22,7 +22,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
}
;CHECK-LABEL: vsel_8xi16:
-;CHECK: pblendw $17
+; The select mask is
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+;CHECK: pblendw $238, %xmm1, %xmm0
;CHECK: ret
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2