lib/Support/StringMap.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

//===--- CStringMap.cpp - CString Hash table map implementation -----------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the CStringMap class.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/CStringMap.h"
#include <cassert>
using namespace llvm;

CStringMapVisitor::~CStringMapVisitor() {
}

CStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) {
  assert((InitSize & (InitSize-1)) == 0 &&
         "Init Size must be a power of 2 or zero!");
  NumBuckets = InitSize ? InitSize : 512;
  ItemSize = itemSize;
  NumItems = 0;
  
  TheTable = new ItemBucket[NumBuckets]();
  memset(TheTable, 0, NumBuckets*sizeof(ItemBucket));
}


/// HashString - Compute a hash code for the specified string.
///
static unsigned HashString(const char *Start, const char *End) {
  // Bernstein hash function.
  unsigned int Result = 0;
  // TODO: investigate whether a modified bernstein hash function performs
  // better: http://eternallyconfuzzled.com/tuts/hashing.html#existing
  //   X*33+c -> X*33^c
  while (Start != End)
    Result = Result * 33 + *Start++;
  Result = Result + (Result >> 5);
  return Result;
}

/// LookupBucketFor - Look up the bucket that the specified string should end
/// up in.  If it already exists as a key in the map, the Item pointer for the
/// specified bucket will be non-null.  Otherwise, it will be null.  In either
/// case, the FullHashValue field of the bucket will be set to the hash value
/// of the string.
unsigned CStringMapImpl::LookupBucketFor(const char *NameStart,
                                         const char *NameEnd) {
  unsigned HTSize = NumBuckets;
  unsigned FullHashValue = HashString(NameStart, NameEnd);
  unsigned BucketNo = FullHashValue & (HTSize-1);
  
  unsigned ProbeAmt = 1;
  while (1) {
    ItemBucket &Bucket = TheTable[BucketNo];
    void *BucketItem = Bucket.Item;
    // If we found an empty bucket, this key isn't in the table yet, return it.
    if (BucketItem == 0) {
      Bucket.FullHashValue = FullHashValue;
      return BucketNo;
    }
    
    // If the full hash value matches, check deeply for a match.  The common
    // case here is that we are only looking at the buckets (for item info
    // being non-null and for the full hash value) not at the items.  This
    // is important for cache locality.
    if (Bucket.FullHashValue == FullHashValue) {
      // Do the comparison like this because NameStart isn't necessarily
      // null-terminated!
      char *ItemStr = (char*)BucketItem+ItemSize;
      if (strlen(ItemStr) == unsigned(NameEnd-NameStart) &&
          memcmp(ItemStr, NameStart, (NameEnd-NameStart)) == 0) {
        // We found a match!
        return BucketNo;
      }
    }
    
    // Okay, we didn't find the item.  Probe to the next bucket.
    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
    
    // Use quadratic probing, it has fewer clumping artifacts than linear
    // probing and has good cache behavior in the common case.
    ++ProbeAmt;
  }
}

/// RehashTable - Grow the table, redistributing values into the buckets with
/// the appropriate mod-of-hashtable-size.
void CStringMapImpl::RehashTable() {
  unsigned NewSize = NumBuckets*2;
  ItemBucket *NewTableArray = new ItemBucket[NewSize]();
  memset(NewTableArray, 0, NewSize*sizeof(ItemBucket));
  
  // Rehash all the items into their new buckets.  Luckily :) we already have
  // the hash values available, so we don't have to rehash any strings.
  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
    if (IB->Item) {
      // Fast case, bucket available.
      unsigned FullHash = IB->FullHashValue;
      unsigned NewBucket = FullHash & (NewSize-1);
      if (NewTableArray[NewBucket].Item == 0) {
        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
        continue;
      }
      
      unsigned ProbeSize = 1;
      do {
        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
      } while (NewTableArray[NewBucket].Item);
      
      // Finally found a slot.  Fill it in.
      NewTableArray[NewBucket].Item = IB->Item;
      NewTableArray[NewBucket].FullHashValue = FullHash;
    }
  }
  
  delete[] TheTable;
  
  TheTable = NewTableArray;
  NumBuckets = NewSize;
}


/// VisitEntries - This method walks through all of the items,
/// invoking Visitor.Visit for each of them.
void CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const {
  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
    if (void *Id = IB->Item)
      Visitor.Visit((char*)Id + ItemSize, Id);
  }
}