summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-04-07 20:22:56 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-04-07 20:22:56 +0000
commitcd3d60c4505efad809a3d8b4ba9aed315568f8d8 (patch)
treeb2c693de3c8970350de1585f3c608a8824768169
parent8764c8979c66966b5af62a0a316acead47c038cd (diff)
downloadllvm-cd3d60c4505efad809a3d8b4ba9aed315568f8d8.tar.gz
llvm-cd3d60c4505efad809a3d8b4ba9aed315568f8d8.tar.bz2
llvm-cd3d60c4505efad809a3d8b4ba9aed315568f8d8.tar.xz
TargetLowering: Fix getTypeConversion handling of extended vector types
The code in getTypeConversion attempts to promote the element vector type before it trys to split or widen the vector. After it failed finding a legal vector type by promoting it would continue using the promoted vector element type. Thereby missing legal splitted vector types. For example the type v32i32 that has a legal split of 4 x v3i32 on x86/sse2 would be transformed to: v32i256 and from there on successively split to: v16i256, v8i256, v1i256 and then finally ends up as an i64 type. By resetting the vector element type to the original vector element type that existed before the promotion the code will attempt to split the vector type to smaller vector widths of the same type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178999 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetLowering.h5
-rw-r--r--test/Analysis/CostModel/X86/testshiftashr.ll8
-rw-r--r--test/Analysis/CostModel/X86/testshiftlshr.ll9
-rw-r--r--test/Analysis/CostModel/X86/testshiftshl.ll9
4 files changed, 17 insertions, 14 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index cb62ed33a1..e169bcf9e4 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1523,6 +1523,7 @@ public:
// or until the element integer type is too big. If a legal type was not
// found, fallback to the usual mechanism of widening/splitting the
// vector.
+ EVT OldEltVT = EltVT;
while (1) {
// Increase the bitwidth of the element to the next pow-of-two
// (which is greater than 8 bits).
@@ -1541,6 +1542,10 @@ public:
return LegalizeKind(TypePromoteInteger,
EVT::getVectorVT(Context, EltVT, NumElts));
}
+
+ // Reset the type to the unexpanded type if we did not find a legal vector
+ // type with a promoted vector element type.
+ EltVT = OldEltVT;
}
// Try to widen the vector until a legal type is found.
diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll
index f35eea8716..d96a92fe2a 100644
--- a/test/Analysis/CostModel/X86/testshiftashr.ll
+++ b/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -113,7 +113,7 @@ entry:
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
entry:
; SSE2: shift32i32
- ; SSE2: cost of 256 {{.*}} ashr
+ ; SSE2: cost of 320 {{.*}} ashr
; SSE2-CODEGEN: shift32i32
; SSE2-CODEGEN: sarl %cl
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 256 {{.*}} ashr
+ ; SSE2: cost of 320 {{.*}} ashr
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: sarq %cl
@@ -373,7 +373,7 @@ define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
entry:
; SSE2: shift32i32c
; getTypeConversion fails here and promotes this to a i64.
- ; SSE2: cost of 256 {{.*}} ashr
+ ; SSE2: cost of 8 {{.*}} ashr
; SSE2-CODEGEN: shift32i32c
; SSE2-CODEGEN: psrad $3
%0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
@@ -443,7 +443,7 @@ entry:
define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
entry:
; SSE2: shift32i64c
- ; SSE2: cost of 256 {{.*}} ashr
+ ; SSE2: cost of 320 {{.*}} ashr
; SSE2-CODEGEN: shift32i64c
; SSE2-CODEGEN: sarq $3
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 8d6ef38742..7bc8d89e4a 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -113,7 +113,7 @@ entry:
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
entry:
; SSE2: shift32i32
- ; SSE2: cost of 256 {{.*}} lshr
+ ; SSE2: cost of 320 {{.*}} lshr
; SSE2-CODEGEN: shift32i32
; SSE2-CODEGEN: shrl %cl
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 256 {{.*}} lshr
+ ; SSE2: cost of 320 {{.*}} lshr
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: shrq %cl
@@ -372,8 +372,7 @@ entry:
define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
entry:
; SSE2: shift32i32c
- ; getTypeConversion fails here and promotes this to a i64.
- ; SSE2: cost of 256 {{.*}} lshr
+ ; SSE2: cost of 8 {{.*}} lshr
; SSE2-CODEGEN: shift32i32c
; SSE2-CODEGEN: psrld $3
%0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
@@ -443,7 +442,7 @@ entry:
define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
entry:
; SSE2: shift32i64c
- ; SSE2: cost of 256 {{.*}} lshr
+ ; SSE2: cost of 16 {{.*}} lshr
; SSE2-CODEGEN: shift32i64c
; SSE2-CODEGEN: psrlq $3
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
index f45a698792..40effd0292 100644
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -113,7 +113,7 @@ entry:
define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
entry:
; SSE2: shift32i32
- ; SSE2: cost of 256 {{.*}} shl
+ ; SSE2: cost of 80 {{.*}} shl
; SSE2-CODEGEN: shift32i32
; SSE2-CODEGEN: pmuludq
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 256 {{.*}} shl
+ ; SSE2: cost of 320 {{.*}} shl
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: shlq %cl
@@ -372,8 +372,7 @@ entry:
define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
entry:
; SSE2: shift32i32c
- ; getTypeConversion fails here and promotes this to a i64.
- ; SSE2: cost of 256 {{.*}} shl
+ ; SSE2: cost of 8 {{.*}} shl
; SSE2-CODEGEN: shift32i32c
; SSE2-CODEGEN: pslld $3
%0 = shl %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
@@ -443,7 +442,7 @@ entry:
define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
entry:
; SSE2: shift32i64c
- ; SSE2: cost of 256 {{.*}} shl
+ ; SSE2: cost of 16 {{.*}} shl
; SSE2-CODEGEN: shift32i64c
; SSE2-CODEGEN: psllq $3