9
9
#ifndef HT__ARRAY_HASH_H
10
10
#define HT__ARRAY_HASH_T
11
11
12
+ #include < vector>
13
+ #include < string>
14
+
12
15
#include < hash.hpp>
13
16
14
- #include < cstdlib>
15
- #include < cstring>
16
17
#include < iostream>
17
18
18
19
namespace ht {
19
20
template <
20
21
class Value ,
21
- class Hash =superfast ,
22
- class Allocator =std::allocator<char > >
22
+ class Hash =crapwow ,
23
+ class Allocator =std::allocator<std::pair<std::string,Value> > >
23
24
class ArrayHash {
24
25
public:
25
26
/* Some typedefs */
26
- typedef size_t size_type;
27
- typedef Value value_type;
28
- typedef Hash hash_type;
29
- typedef Allocator allocator_type;
27
+ typedef size_t size_type;
28
+ typedef std::string key_type;
29
+ typedef Value value_type;
30
+ typedef std::pair<key_type,value_type> pair_type;
31
+ typedef Hash hash_type;
32
+ typedef Allocator allocator_type;
30
33
31
34
/* Share the hash function object across all such classes */
32
35
static const hash_type hasher;
@@ -37,15 +40,9 @@ namespace ht {
37
40
/* Allocate the number of bins that we'll need. For this initial
38
41
* allocation, we'll actually use malloc, but for the allocations of
39
42
* the contents of each bin, we'll use the allocator */
40
- std::allocator<char *> tmp;
41
- try {
42
- bins = tmp.allocate (num_bins);
43
- for (size_type i = 0 ; i < num_bins; ++i) {
44
- bins[i] = NULL ;
45
- }
46
- } catch (std::bad_alloc e) {
43
+ bins = new std::vector<pair_type>[b];
44
+ if (bins == NULL ) {
47
45
std::cout << " Could not allocate" << std::endl;
48
- bins = NULL ;
49
46
}
50
47
}
51
48
@@ -54,15 +51,7 @@ namespace ht {
54
51
/* Bail out early -- nothing to see here */
55
52
return ;
56
53
}
57
-
58
- for (size_type i = 0 ; i < num_bins; ++i) {
59
- if (bins[i] == NULL ) {
60
- continue ;
61
- }
62
- /* Now, we have to deallocate the char * we were given */
63
- free (static_cast <void *>(bins[i]));
64
- bins[i] = NULL ;
65
- }
54
+ delete[] bins;
66
55
}
67
56
68
57
/* *
@@ -75,208 +64,75 @@ namespace ht {
75
64
*
76
65
* If the key doesn't exist, create it, and return a reference to where
77
66
* we'd store it */
78
- value_type& operator [](const char * key) {
79
- value_type& ref (get (key, strlen (key)));
80
- return ref;
81
- }
82
-
83
67
value_type& operator [](const std::string& key) {
84
- return get (key.c_str (), key.length ());
85
- }
86
-
87
- /* Retrieve only
88
- *
89
- * Supports binary keys by providing a length, too. Missing keys are
90
- * automatically inserted, and a reference to the new item is returned
91
- */
92
- value_type& get (const char * k, size_type len) {
93
68
/* We need to hash the function, see if it exists, and if it already
94
69
* exists in the value, when we'll update the value and return a
95
70
* reference. If it doesn't, we'll append. If that bin is NULL, then
96
71
* we'll go ahead and allocate it already */
97
- size_type position = hasher (k, len) % num_bins;
98
- char * bin = bins[position];
99
- if (bin == NULL ) {
100
- /* Allocate some space, set, return early. We need enough for a
101
- * size_type for how many items are in the list, enough for the
102
- * key, and then enough for a copy of the value */
103
- size_type new_len = sizeof (size_type) + sizeof (size_type) +
104
- aligned (len) + aligned (sizeof (value_type));
105
- bin = bins[position] = static_cast <char *>(malloc (new_len));
106
- // std::cout << " Allocating " << new_len << " for " << position << " into " << static_cast<void*>(bin) << std::endl;
107
- /* Now, we'll advance bin as we add more to it. First, set the
108
- * count to be just 1 */
109
- *(reinterpret_cast <size_type*>(bin)) = 1 ;
110
- bin += sizeof (size_type);
111
- return set_record (bin, k, len);
112
- }
113
-
114
- /* If we've gotten this far, the bin existed -- we need to do a
115
- * linear scan to see if it's already in here. First, let's see how
116
- * many items are actually in this array */
117
- if (find_in_bin (bin, k, len)) {
118
- // std::cout << "Found record in bin" << std::endl;
119
- return get_value (bin);
72
+ size_type position = hasher (key.c_str (), key.length ()) % num_bins;
73
+ std::vector<pair_type>& bin (bins[position]);
74
+ /* I _believe_ there is a more C++-y way of doing this, but it's
75
+ * late, and I'm not entirely sure */
76
+ typename std::vector<pair_type>::iterator it (bin.begin ());
77
+ for (; it != bin.end (); ++it) {
78
+ if (it->first == key) {
79
+ /* Return a reference to the value */
80
+ return it->second ;
81
+ }
120
82
}
121
-
122
- /* If we've gotten this far, the key doesn't exist in its table, in
123
- * which case we'll have to append it to the end of the character
124
- * array that we created. This will involve a reallocation.
125
- *
126
- * Apparently the std::allocator doesn't actually use the hint
127
- * provided by `allocate`, but we'll give it a shot anyways. We may
128
- * eventually want to use this with an allocator that is aware of
129
- * realloc
130
- *
131
- * In order to determine how many items we need to declare, we have
132
- * to compare to the original pointer, and then add how much space
133
- * we'll need for this appended item.
134
- */
135
- // std::cout << "New record in existing bin" << std::endl;
136
- size_type old_len = bin - bins[position];
137
- size_type new_len = old_len + sizeof (size_type) + aligned (len)
138
- + aligned (sizeof (value_type));
139
- /* We'll make use of hint */
140
- // std::cout << "Deallocating " << old_len << " for " << position << " out of " << static_cast<void*>(bins[position]) << std::endl;
141
- // std::cout << " Allocating " << new_len << " for " << position << " into " << static_cast<void*>(bin) << std::endl;
142
- bin = static_cast <char *>(realloc (bins[position], new_len));
143
- // if (bin != bins[position]) {
144
- // //std::cout << "Realloc failed! " << std::endl;
145
- // /* Now, we'll copy, deallocate, swap */
146
- // // memcpy(bin, bins[position], old_len);
147
- // // free(static_cast<void*>(bins[position]));
148
- // } else {
149
- // //std::cout << "Realloc to the rescue!" << std::endl;
150
- // }
151
- // std::cout << "New position: " << reinterpret_cast<void*>(bin) << " old: " << reinterpret_cast<void*>(bins[position]) << std::endl;
152
- bins[position] = bin;
153
- /* Now increment the count by one, and then add the new value */
154
- ++(*bin);
155
- bin += old_len;
156
- return set_record (bin, k, len);
83
+
84
+ /* If we didn't find one, we should add one */
85
+ bin.push_back (make_pair (key, value_type ()));
86
+ return bin.back ().second ;
157
87
}
158
88
159
89
/* Insert
160
90
*
161
91
* This interface is provided for both continuity and to support binary
162
92
* data (where the length of the char* buffer is provided)
163
93
*/
164
- value_type& insert (const char * key, const value_type& value) {
165
- return insert (key, strlen (key), value);
166
- }
167
-
168
- value_type& insert (const char * k, size_type len, const value_type& v) {
169
- return (get (k, len) = v);
170
- }
171
-
172
- value_type& insert (const std::string& key, const value_type& value) {
173
- return insert (key.c_str (), key.length (), value);
94
+ value_type& insert (const std::string& key, const value_type& v) {
95
+ value_type& ref (operator [](key));
96
+ ref = v;
97
+ return ref;
174
98
}
175
99
176
100
/* Remove
177
101
*
178
102
* If the provided key exists, then it is removed. If the key does not
179
103
* exist, it has no effect. Normally, it would return a reference, but
180
104
* since it's possible the key doesn't exist, we can't. */
181
- void remove (const char * k) {
182
- remove (k, strlen (k));
183
- }
184
-
185
- void remove (const char * k, size_type len) {
186
- size_type position = hasher (k, len) % num_bins;
187
- char * bin = bins[position];
188
- if (bin == NULL ) { return ; }
189
-
190
- if (!find_in_bin (bin, k, len)) { return ; }
191
-
192
- /* Otherwise, we've got just a little bit of work to do. First
193
- * things first, we need to decrement the number of items we have
194
- * in the bin */
195
- // --(*reinterpret_cast<size_type*>(bin));
196
- }
197
-
198
105
void remove (const std::string& key) {
199
- remove (key.c_str (), key.length ());
106
+ size_type position = hasher (key.c_str (), key.length ()) % num_bins;
107
+ std::vector<pair_type>& bin (bins[position]);
108
+ typename std::vector<pair_type>::iterator it (bin.begin ());
109
+ for (; it != bin.end (); ++it) {
110
+ if (it->first == key) {
111
+ bin.erase (it, it);
112
+ return ;
113
+ }
114
+ }
200
115
}
201
116
202
117
/* Existence
203
118
*
204
119
* Returns true if the provided key exists, else, false */
205
- bool exists (const char * k) {
206
- return exists (k, strlen (k));
207
- }
208
-
209
- bool exists (const char * k, size_type len) {
210
- size_type position = hasher (k, len) % num_bins;
211
- char * bin = bins[position];
212
- if (bin == NULL ) { return false ; }
213
-
214
- return find_in_bin (bin, k, len);
215
- }
216
-
217
120
bool exists (const std::string& key) {
218
- return exists (key.c_str (), key.length ());
219
- }
220
-
221
- private:
222
- /* How many bins are we using? */
223
- size_type num_bins;
224
- /* We need an array of char*'s */
225
- char ** bins;
226
-
227
- /* This is for byte alignment purposes. Turns out we have to align
228
- * things well */
229
- size_type aligned (size_type len, size_type multiple=8 ) {
230
- size_t remainder = len % multiple;
231
- if (remainder ) {
232
- return len + multiple - remainder ;
233
- }
234
- return len;
235
- }
236
-
237
- /* Return a reference to the value at the provided record as returned by
238
- * find_in_bin */
239
- value_type& get_value (char * record) {
240
- size_type *len = reinterpret_cast <size_type* >(record);
241
- value_type* cpy = reinterpret_cast <value_type*>(record +
242
- sizeof (size_type) + aligned (*len));
243
- return *cpy;
244
- }
245
-
246
- /* Fill in a record starting at the provided pointer, and return a
247
- * reference to the value_type stored in it */
248
- value_type& set_record (char * r, const char * k, size_type len) {
249
- /* Now, set the length of the string key that we're inserting */
250
- *(reinterpret_cast <size_type*>(r)) = len;
251
- r += sizeof (size_type);
252
- /* Now, copy the string into the new memory */
253
- memcpy (reinterpret_cast <void *>(r),
254
- reinterpret_cast <const void *>(k), len);
255
- r += aligned (len);
256
- /* Lastly, we have have to create a new value type */
257
- value_type* cpy = new (r) value_type ();
258
- return *cpy;
259
- }
260
-
261
- /* Find the start of a record in the provided bin. Returns NULL if not
262
- * found and returns a pointer starting where the length is encoded */
263
- bool find_in_bin (char *& bin, const char * k, size_type len) {
264
- size_type* count = reinterpret_cast <size_type*>(bin);
265
- /* Let's advance the pointer as we're moving along */
266
- bin += sizeof (size_type);
267
- for (size_type i = 0 ; i < *count; ++i) {
268
- /* If the lengths of the two keys aren't equal, then they can't
269
- * be equal */
270
- size_type* l = reinterpret_cast <size_type*>(bin);
271
- if (*l == len && !strncmp (bin + sizeof (size_type), k, len)) {
121
+ size_type position = hasher (key.c_str (), key.length ()) % num_bins;
122
+ std::vector<pair_type>& bin (bins[position]);
123
+ typename std::vector<pair_type>::iterator it (bin.begin ());
124
+ for (; it != bin.end (); ++it) {
125
+ if (it->first == key) {
272
126
return true ;
273
127
}
274
- /* Advance the pointer to just past this item */
275
- bin += (sizeof (size_type) + aligned (*l) +
276
- aligned (sizeof (value_type)));
277
128
}
278
129
return false ;
279
130
}
131
+ private:
132
+ /* How many bins are we using? */
133
+ size_type num_bins;
134
+ /* We need an array of char*'s */
135
+ std::vector<pair_type>* bins;
280
136
};
281
137
}
282
138
0 commit comments