SHLD/SHRD are VectorPath (microcode) instructions known to have poor latency on certain architectures. While generating SHLD/SHRD instructions is acceptable when optimizing for size, optimizing for speed on these platforms should be implemented using alternative sequences of instructions composed of add, adc, shr, shl, or and lea which are directPath instructions. These alternative instructions not only have a lower latency but they also increase the decode bandwidth by allowing simultaneous decoding of a third directPath instruction.

AMD's processors family K7, K8, K10, K12, K15 and K16 are known to have SHLD/SHRD instructions with very poor latency. Optimization guides for these processors recommend using an alternative sequence of instructions. For these AMD's processors, I disabled folding (or (x << c) | (y >> (64 - c))) when we are not optimizing for size. It might be beneficial to disable this folding for some of the Intel's processors. However, since I couldn't find specific recommendations regarding using SHLD/SHRD instructions on Intel's processors, I haven't disabled this peephole for Intel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195383 91177308-0d34-0410-b5e6-96231b3b80d8
author: Ekaterina Romanova <katya_romanova@playstation.sony.com> 2013-11-21 23:21:26 +0000
committer: Ekaterina Romanova <katya_romanova@playstation.sony.com> 2013-11-21 23:21:26 +0000
commit: 46f7257ed1bbd2a169f6c930a805e702407d955b (patch)
tree: 35e063ff39eb0d44f13c7e126c9e964902fb49b4 /test/CodeGen/X86/x86-64-double-precision-shift-left.ll
parent: 934d1f83aecf8f01646f4b2a09167309a4c1bb8d (diff)
download: llvm-46f7257ed1bbd2a169f6c930a805e702407d955b.tar.gz
llvm-46f7257ed1bbd2a169f6c930a805e702407d955b.tar.bz2
llvm-46f7257ed1bbd2a169f6c930a805e702407d955b.tar.xz
1 files changed, 77 insertions, 0 deletions
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
new file mode 100644
index 0000000000..f2380f23b8
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; Verify that for the architectures that are known to have poor latency
+; double precision shift instructions we generate alternative sequence 
+; of instructions with lower latencies instead of shld instruction.
+
+;uint64_t lshift1(uint64_t a, uint64_t b)
+;{
+;    return (a << 1) | (b >> 63);
+;}
+
+; CHECK:             lshift1:
+; CHECK:             addq    {{.*}},{{.*}}
+; CHECK-NEXT:        shrq    $63, {{.*}}
+; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
+
+
+define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+  %shl = shl i64 %a, 1
+  %shr = lshr i64 %b, 63
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+;uint64_t lshift2(uint64_t a, uint64_t b)
+;{
+;    return (a << 2) | (b >> 62);
+;}
+
+; CHECK:             lshift2:
+; CHECK:             shlq    $2, {{.*}}
+; CHECK-NEXT:        shrq    $62, {{.*}}
+; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
+
+define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+  %shl = shl i64 %a, 2
+  %shr = lshr i64 %b, 62
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+;uint64_t lshift7(uint64_t a, uint64_t b)
+;{
+;    return (a << 7) | (b >> 57);
+;}
+
+; CHECK:             lshift7:
+; CHECK:             shlq    $7, {{.*}}
+; CHECK-NEXT:        shrq    $57, {{.*}}
+; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
+
+define i64 @lshift7(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+  %shl = shl i64 %a, 7
+  %shr = lshr i64 %b, 57
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+;uint64_t lshift63(uint64_t a, uint64_t b)
+;{
+;    return (a << 63) | (b >> 1);
+;}
+
+; CHECK:             lshift63:
+; CHECK:             shlq    $63, {{.*}}
+; CHECK-NEXT:        shrq    {{.*}}
+; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
+
+define i64 @lshift63(i64 %a, i64 %b) nounwind readnone uwtable {
+entry:
+  %shl = shl i64 %a, 63
+  %shr = lshr i64 %b, 1
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
author	Ekaterina Romanova <katya_romanova@playstation.sony.com>	2013-11-21 23:21:26 +0000
committer	Ekaterina Romanova <katya_romanova@playstation.sony.com>	2013-11-21 23:21:26 +0000
commit	46f7257ed1bbd2a169f6c930a805e702407d955b (patch)
tree	35e063ff39eb0d44f13c7e126c9e964902fb49b4 /test/CodeGen/X86/x86-64-double-precision-shift-left.ll
parent	934d1f83aecf8f01646f4b2a09167309a4c1bb8d (diff)
download	llvm-46f7257ed1bbd2a169f6c930a805e702407d955b.tar.gz llvm-46f7257ed1bbd2a169f6c930a805e702407d955b.tar.bz2 llvm-46f7257ed1bbd2a169f6c930a805e702407d955b.tar.xz