summaryrefslogtreecommitdiff
path: root/lib/Support
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-10-15 10:08:31 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-10-15 10:08:31 +0000
commit6e6a558ebce556476d8fd659b419a2760f2ab154 (patch)
tree0331aaeaa342b5fd9f0adf98895d3d91e2182f4f /lib/Support
parente9b58d0aac4e89b53a4be0e6f289b66649e1512b (diff)
downloadllvm-6e6a558ebce556476d8fd659b419a2760f2ab154.tar.gz
llvm-6e6a558ebce556476d8fd659b419a2760f2ab154.tar.bz2
llvm-6e6a558ebce556476d8fd659b419a2760f2ab154.tar.xz
Add a bad char heuristic to StringRef::find.
Based on Horspool's simplified version of Boyer-Moore. We use a constant-sized table of uint8_ts to keep cache thrashing low, needles bigger than 255 bytes are uncommon anyways. The worst case is still O(n*m) but we do a lot better on the average case now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142061 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Support')
-rw-r--r--lib/Support/StringRef.cpp29
1 files changed, 26 insertions, 3 deletions
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index b5b4f94760..a862ed2fa9 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -144,9 +144,32 @@ size_t StringRef::find(StringRef Str, size_t From) const {
size_t N = Str.size();
if (N > Length)
return npos;
- for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
- if (substr(i, N).equals(Str))
- return i;
+
+ // For short haystacks or unsupported needles fall back to the naive algorithm
+ if (Length < 16 || N > 255 || N == 0) {
+ for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+ if (substr(i, N).equals(Str))
+ return i;
+ return npos;
+ }
+
+ // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
+ uint8_t BadCharSkip[256];
+ std::memset(BadCharSkip, N, 256);
+ for (unsigned i = 0; i != N-1; ++i)
+ BadCharSkip[(uint8_t)Str[i]] = N-1-i;
+
+ unsigned Len = Length, Pos = min(From, Length);
+ while (Len >= N) {
+ if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
+ return Pos;
+
+ // Otherwise skip the appropriate number of bytes.
+ uint8_t Skip = BadCharSkip[(uint8_t)Data[Pos+N-1]];
+ Len -= Skip;
+ Pos += Skip;
+ }
+
return npos;
}