summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2008-03-09 05:42:06 +0000
committerChris Lattner <sabre@nondot.org>2008-03-09 05:42:06 +0000
commit67f453aae7ac55f30af9e961a21029beabaaf47b (patch)
tree7a7fd9ea19992b97cc52890188a1cdab8940c738
parent529de8a45702cd34968d79d13f95ed1e5d5fa250 (diff)
downloadllvm-67f453aae7ac55f30af9e961a21029beabaaf47b.tar.gz
llvm-67f453aae7ac55f30af9e961a21029beabaaf47b.tar.bz2
llvm-67f453aae7ac55f30af9e961a21029beabaaf47b.tar.xz
Finish implementing a readme entry: when inserting an i64 variable
into a vector of zeros or undef, and when the top part is obviously zero, we can just use movd + shuffle. This allows us to compile vec_set-B.ll into: _test3: movl $1234567, %eax andl 4(%esp), %eax movd %eax, %xmm0 ret instead of: _test3: subl $28, %esp movl $1234567, %eax andl 32(%esp), %eax movl %eax, (%esp) movl $0, 4(%esp) movq (%esp), %xmm0 addl $28, %esp ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48090 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/README-SSE.txt38
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--test/CodeGen/X86/vec_set-B.ll24
3 files changed, 29 insertions, 43 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7087c68173..4d7224514c 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -781,41 +781,3 @@ LLVM should be able to generate the same thing as gcc. This looks like it is
just a matter of matching (scalar_to_vector (load x)) to movd.
//===---------------------------------------------------------------------===//
-
-These two functions should compile to identical code on x86-32:
-
-define <2 x i64> @test2(i64 %arg) {
-entry:
- %A = and i64 %arg, 1234567
- %B = insertelement <2 x i64> undef, i64 %A, i32 0
- ret <2 x i64> %B
-}
-
-define <2 x i64> @test2(i64 %arg) {
-entry:
- %A = and i64 %arg, 1234567
- %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
- ret <2 x i64> %B
-}
-
-The later compiles to:
-
-_test2:
- movl $1234567, %eax
- andl 4(%esp), %eax
- movd %eax, %xmm0
- ret
-
-the former compiles to:
-
-_test2:
- subl $28, %esp
- movl $1234567, %eax
- andl 32(%esp), %eax
- movl %eax, (%esp)
- movl $0, 4(%esp)
- movaps (%esp), %xmm0
- addl $28, %esp
- ret
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 133a2da1f6..be7f91c6ae 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3063,11 +3063,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::UNDEF, VT);
}
- // Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
- return SDOperand();
-
- // Special case for single non-zero element.
+ // Special case for single non-zero, non-undef, element.
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
@@ -3141,6 +3137,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
}
}
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1)
+ return SDOperand();
+
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)
diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
new file mode 100644
index 0000000000..e4e5667d2f
--- /dev/null
+++ b/test/CodeGen/X86/vec_set-B.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep movaps
+; RUN: llvm-as < %s | llc -march=x86 | grep esp | count 2
+
+; These should both generate something like this:
+;_test3:
+; movl $1234567, %eax
+; andl 4(%esp), %eax
+; movd %eax, %xmm0
+; ret
+
+define <2 x i64> @test3(i64 %arg) {
+entry:
+ %A = and i64 %arg, 1234567
+ %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
+ ret <2 x i64> %B
+}
+
+define <2 x i64> @test2(i64 %arg) {
+entry:
+ %A = and i64 %arg, 1234567
+ %B = insertelement <2 x i64> undef, i64 %A, i32 0
+ ret <2 x i64> %B
+}
+