[X86] Fix a bug in the lowering of BLENDI introduced in r209043.

ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). <rdar://problem/16975435> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209324 91177308-0d34-0410-b5e6-96231b3b80d8
author: Quentin Colombet <qcolombet@apple.com> 2014-05-21 22:00:39 +0000
committer: Quentin Colombet <qcolombet@apple.com> 2014-05-21 22:00:39 +0000
commit: fd0096a42c6d21e922e99669b1752a03987ebc84 (patch)
tree: ac0cf379547a9df9e459becef1b32e81f5368a91
parent: d1b5bdaebdcdfc85854e6dac538bcc273b6a486a (diff)
download: llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.gz
llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.bz2
llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.xz
3 files changed, 48 insertions, 12 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 72743a97ab..61828759fc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7980,7 +7980,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-// This function assumes its argument is a BUILD_VECTOR of constand or
+// This function assumes its argument is a BUILD_VECTOR of constants or
 // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
 // true.
 static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
@@ -8004,9 +8004,13 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
       Lane2Cond = !isZero(SndLaneEltCond);
 
     if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
-      MaskValue |= !!Lane1Cond << i;
+      // Lane1Cond != 0, means we want the first argument.
+      // Lane1Cond == 0, means we want the second argument.
+      // The encoding of this argument is 0 for the first argument, 1
+      // for the second. Therefore, invert the condition.
+      MaskValue |= !Lane1Cond << i;
     else if (Lane1Cond < 0)
-      MaskValue |= !!Lane2Cond << i;
+      MaskValue |= !Lane2Cond << i;
     else
       return false;
   }
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
index 8577a616c3..4d4f6c1a03 100644
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -3,7 +3,16 @@
 ; AVX128 tests:
 
 ;CHECK-LABEL: vsel_float:
-;CHECK: vblendps    $5
+; select mask is <i1 true, i1 false, i1 true, i1 false>.
+; Big endian representation is 0101 = 5.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 1010 = 10.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+; result is in xmm0 => destination argument.
+;CHECK: vblendps    $10, %xmm1, %xmm0, %xmm0
 ;CHECK: ret
 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
 
 
 ;CHECK-LABEL: vsel_i32:
-;CHECK: vblendps   $5
+;CHECK: vblendps   $10, %xmm1, %xmm0, %xmm0
 ;CHECK: ret
 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
   %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
 
 ;CHECK-LABEL: vsel_float8:
 ;CHECK-NOT: vinsertf128
-;CHECK: vblendps    $17
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
 ;CHECK: ret
 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
 
 ;CHECK-LABEL: vsel_i328:
 ;CHECK-NOT: vinsertf128
-;CHECK: vblendps    $17
+;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
 ;CHECK-NEXT: ret
 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
@@ -69,8 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
 }
 
 ;CHECK-LABEL: vsel_double8:
-;CHECK: vblendpd    $1
-;CHECK: vblendpd    $1
+; select mask is 2x: 0001 => intel mask: ~0001 = 14
+; ABI:
+; v1 is in ymm0 and ymm1.
+; v2 is in ymm2 and ymm3.
+; result is in ymm0 and ymm1.
+; Compute the low part: res.low = blend v1.low, v2.low, blendmask
+;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
+; Compute the high part.
+;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
 ;CHECK: ret
 define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
@@ -78,8 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
 }
 
 ;CHECK-LABEL: vsel_i648:
-;CHECK: vblendpd    $1
-;CHECK: vblendpd    $1
+;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
+;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
 ;CHECK: ret
 define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 4e17a714bf..34aaf2c31a 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -22,7 +22,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
 }
 
 ;CHECK-LABEL: vsel_8xi16:
-;CHECK: pblendw $17
+; The select mask is
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+;CHECK: pblendw $238, %xmm1, %xmm0
 ;CHECK: ret
 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
   %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
author	Quentin Colombet <qcolombet@apple.com>	2014-05-21 22:00:39 +0000
committer	Quentin Colombet <qcolombet@apple.com>	2014-05-21 22:00:39 +0000
commit	fd0096a42c6d21e922e99669b1752a03987ebc84 (patch)
tree	ac0cf379547a9df9e459becef1b32e81f5368a91
parent	d1b5bdaebdcdfc85854e6dac538bcc273b6a486a (diff)
download	llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.gz llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.bz2 llvm-fd0096a42c6d21e922e99669b1752a03987ebc84.tar.xz