1 | /* |
1 | /* |
2 | * static char *rcsid_shstr_c = |
|
|
3 | * "$Id: shstr.C,v 1.1 2006/08/13 17:16:01 elmex Exp $"; |
|
|
4 | * |
|
|
5 | * shstr.c |
2 | * shstr.C |
6 | * |
|
|
7 | * This is a simple shared strings package with a simple interface. |
|
|
8 | * |
|
|
9 | * char *find_string(const char *str) |
|
|
10 | * char *add_string(const char *str) |
|
|
11 | * char *add_refcount(char *str) |
|
|
12 | * void free_string(char *str) |
|
|
13 | * char *ss_dump_table(int what) |
|
|
14 | * void ss_dump_statistics() |
|
|
15 | * |
|
|
16 | * Author: Kjetil T. Homme, Oslo 1992. |
|
|
17 | */ |
3 | */ |
18 | |
4 | |
19 | #include <stdio.h> |
|
|
20 | #include <stddef.h> |
|
|
21 | #include <stdlib.h> |
|
|
22 | #include <sys/types.h> |
|
|
23 | #include <limits.h> |
|
|
24 | #include <string.h> |
5 | #include <cstring> |
|
|
6 | #include <cstdlib> |
25 | |
7 | |
26 | #if defined (__sun__) && defined (StupidSunHeaders) |
8 | #include <tr1/unordered_set> |
27 | #include <sys/time.h> |
|
|
28 | #include "sunos.h" |
|
|
29 | #endif |
|
|
30 | |
9 | |
31 | #include "shstr.h" |
10 | #include "shstr.h" |
32 | |
11 | |
33 | #ifndef WIN32 |
12 | // NOTE: even with lots of stuff loaded, we do not usually have >>20000 strings. |
34 | #include <autoconf.h> |
13 | // maybe refcounting is just overhead? |
35 | #endif |
|
|
36 | #ifdef HAVE_LIBDMALLOC |
|
|
37 | #include <dmalloc.h> |
|
|
38 | #endif |
|
|
39 | |
14 | |
40 | static shared_string *hash_table[TABLESIZE]; |
15 | struct hash |
|
|
16 | { |
|
|
17 | std::size_t operator ()(const char *s) const |
|
|
18 | { |
|
|
19 | unsigned long hash = 0; |
|
|
20 | unsigned int i = 0; |
41 | |
21 | |
42 | /* |
22 | /* use the one-at-a-time hash function, which supposedly is |
43 | * Initialises the hash-table used by the shared string library. |
23 | * better than the djb2-like one used by perl5.005, but |
44 | */ |
24 | * certainly is better then the bug used here before. |
|
|
25 | * see http://burtleburtle.net/bob/hash/doobs.html |
|
|
26 | */ |
|
|
27 | while (*s) |
|
|
28 | { |
|
|
29 | hash += *s++; |
|
|
30 | hash += hash << 10; |
|
|
31 | hash ^= hash >> 6; |
|
|
32 | } |
45 | |
33 | |
46 | void |
34 | hash += hash << 3; |
47 | init_hash_table(void) { |
35 | hash ^= hash >> 11; |
48 | /* A static object should be zeroed out always */ |
36 | hash += hash << 15; |
49 | #if !defined(__STDC__) |
37 | |
50 | (void) memset((void *)hash_table, 0, TABLESIZE * sizeof(shared_string *)); |
38 | return hash; |
51 | #endif |
39 | } |
|
|
40 | }; |
|
|
41 | |
|
|
42 | struct equal |
|
|
43 | { |
|
|
44 | bool operator ()(const char *a, const char *b) const |
|
|
45 | { |
|
|
46 | return !strcmp (a, b); |
|
|
47 | } |
|
|
48 | }; |
|
|
49 | |
|
|
50 | typedef std::tr1::unordered_set<const char *, hash, equal> HT; |
|
|
51 | |
|
|
52 | static HT ht; |
|
|
53 | |
|
|
54 | const char * |
|
|
55 | shstr::find (const char *s) |
|
|
56 | { |
|
|
57 | if (!s) |
|
|
58 | return s; |
|
|
59 | |
|
|
60 | HT::iterator i = ht.find (s); |
|
|
61 | |
|
|
62 | return i != ht.end () |
|
|
63 | ? *i |
|
|
64 | : 0; |
52 | } |
65 | } |
53 | |
66 | |
54 | /* |
67 | const char * |
55 | * Hashing-function used by the shared string library. |
68 | shstr::intern (const char *s) |
56 | */ |
69 | { |
|
|
70 | if (!s) |
|
|
71 | return s; |
57 | |
72 | |
58 | static int |
73 | if (const char *found = find (s)) |
59 | hashstr(const char *str) { |
74 | { |
60 | unsigned long hash = 0; |
75 | ++refcnt (found); |
61 | int i = 0; |
76 | return found; |
62 | unsigned rot = 0; |
77 | } |
63 | const char *p; |
|
|
64 | |
78 | |
65 | GATHER(hash_stats.calls); |
79 | int len = strlen (s); |
66 | |
80 | |
67 | for (p = str; i < MAXSTRING && *p; p++, i++) { |
81 | const char *v = (const char *)(2 + (int *)malloc (sizeof (int) * 2 + len + 1)); |
68 | hash ^= (unsigned long) *p << rot; |
82 | |
69 | rot += 2; |
83 | length (v) = len; |
70 | if (rot >= (sizeof(long) - sizeof(char)) * CHAR_BIT) |
84 | refcnt (v) = 1; |
71 | rot = 0; |
85 | |
72 | } |
86 | memcpy ((char *)v, s, len + 1); |
73 | return (hash % TABLESIZE); |
87 | |
|
|
88 | ht.insert (v); |
|
|
89 | |
|
|
90 | return v; |
74 | } |
91 | } |
75 | |
92 | |
76 | /* |
93 | // periodically test refcounts == 0 for a few strings |
77 | * Allocates and initialises a new shared_string structure, containing |
94 | // this is the ONLY thing that erases stuff from ht. keep it that way. |
78 | * the string str. |
95 | void |
79 | */ |
96 | shstr::gc () |
|
|
97 | { |
|
|
98 | static const char *curpos; |
80 | |
99 | |
81 | static shared_string * |
100 | HT::iterator i = curpos ? ht.find (curpos) : ht.begin (); |
82 | new_shared_string(const char *str) { |
|
|
83 | shared_string *ss; |
|
|
84 | |
101 | |
85 | /* Allocate room for a struct which can hold str. Note |
102 | if (i == ht.end ()) |
86 | * that some bytes for the string are already allocated in the |
103 | i = ht.begin (); |
87 | * shared_string struct. |
|
|
88 | */ |
|
|
89 | ss = (shared_string *) malloc(sizeof(shared_string) - PADDING + |
|
|
90 | strlen(str) + 1); |
|
|
91 | ss->u.previous = NULL; |
|
|
92 | ss->next = NULL; |
|
|
93 | ss->refcount = 1; |
|
|
94 | strcpy(ss->string, str); |
|
|
95 | |
104 | |
96 | return ss; |
105 | // go through all strings roughly once every 4 minutes |
|
|
106 | for (int n = ht.size () / 256 + 16; --n; ) |
|
|
107 | { |
|
|
108 | if (i == ht.end ()) |
|
|
109 | { |
|
|
110 | curpos = 0; |
|
|
111 | return; |
|
|
112 | } |
|
|
113 | |
|
|
114 | if (!refcnt (*i)) |
|
|
115 | { |
|
|
116 | HT::iterator o = i++; |
|
|
117 | const char *s = *o; |
|
|
118 | ht.erase (o); |
|
|
119 | |
|
|
120 | //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s)); |
|
|
121 | free (-2 + (int *)s); |
|
|
122 | } |
|
|
123 | else |
|
|
124 | ++i; |
|
|
125 | } |
|
|
126 | |
|
|
127 | curpos = *i; |
97 | } |
128 | } |
98 | |
129 | |
99 | /* |
130 | //TODO: this should of course not be here |
100 | * Description: |
|
|
101 | * This will add 'str' to the hash table. If there's no entry for this |
|
|
102 | * string, a copy will be allocated, and a pointer to that is returned. |
|
|
103 | * Return values: |
|
|
104 | * - pointer to string identical to str |
|
|
105 | */ |
|
|
106 | |
|
|
107 | const char * |
|
|
108 | add_string(const char *str) { |
|
|
109 | shared_string *ss; |
|
|
110 | int ind; |
|
|
111 | |
|
|
112 | GATHER(add_stats.calls); |
|
|
113 | |
|
|
114 | /* Should really core dump here, since functions should not be calling |
|
|
115 | * add_string with a null parameter. But this will prevent a few |
|
|
116 | * core dumps. |
|
|
117 | */ |
|
|
118 | if (str==NULL) { |
|
|
119 | #ifdef MANY_CORES |
|
|
120 | abort(); |
|
|
121 | #else |
|
|
122 | return NULL; |
|
|
123 | #endif |
|
|
124 | } |
|
|
125 | |
|
|
126 | ind = hashstr(str); |
|
|
127 | ss = hash_table[ind]; |
|
|
128 | |
|
|
129 | /* Is there an entry for that hash? |
|
|
130 | */ |
|
|
131 | if (ss) { |
|
|
132 | /* Simple case first: See if the first pointer matches. |
|
|
133 | */ |
|
|
134 | if (str != ss->string) { |
|
|
135 | GATHER(add_stats.strcmps); |
|
|
136 | if (strcmp(ss->string, str)) { |
|
|
137 | /* Apparantly, a string with the same hash value has this |
|
|
138 | * slot. We must see in the list if "str" has been |
|
|
139 | * registered earlier. |
|
|
140 | */ |
|
|
141 | while (ss->next) { |
|
|
142 | GATHER(add_stats.search); |
|
|
143 | ss = ss->next; |
|
|
144 | if (ss->string != str) { |
|
|
145 | GATHER(add_stats.strcmps); |
|
|
146 | if (strcmp(ss->string, str)) { |
|
|
147 | /* This wasn't the right string... |
|
|
148 | */ |
|
|
149 | continue; |
|
|
150 | } |
|
|
151 | } |
|
|
152 | /* We found an entry for this string. Fix the |
|
|
153 | * refcount and exit. |
|
|
154 | */ |
|
|
155 | GATHER(add_stats.linked); |
|
|
156 | ++(ss->refcount); |
|
|
157 | |
|
|
158 | return ss->string; |
|
|
159 | } |
|
|
160 | /* There are no occurences of this string in the hash table. |
|
|
161 | */ |
|
|
162 | { |
|
|
163 | shared_string *new_ss; |
|
|
164 | |
|
|
165 | GATHER(add_stats.linked); |
|
|
166 | new_ss = new_shared_string(str); |
|
|
167 | ss->next = new_ss; |
|
|
168 | new_ss->u.previous = ss; |
|
|
169 | return new_ss->string; |
|
|
170 | } |
|
|
171 | } |
|
|
172 | /* Fall through. |
|
|
173 | */ |
|
|
174 | } |
|
|
175 | GATHER(add_stats.hashed); |
|
|
176 | ++(ss->refcount); |
|
|
177 | return ss->string; |
|
|
178 | } else { |
|
|
179 | /* The string isn't registered, and the slot is empty. |
|
|
180 | */ |
|
|
181 | GATHER(add_stats.hashed); |
|
|
182 | hash_table[ind] = new_shared_string(str); |
|
|
183 | |
|
|
184 | /* One bit in refcount is used to keep track of the union. |
|
|
185 | */ |
|
|
186 | hash_table[ind]->refcount |= TOPBIT; |
|
|
187 | hash_table[ind]->u.array = &(hash_table[ind]); |
|
|
188 | |
|
|
189 | return hash_table[ind]->string; |
|
|
190 | } |
|
|
191 | } |
|
|
192 | |
|
|
193 | /* |
|
|
194 | * Description: |
|
|
195 | * This will increase the refcount of the string str, which *must* |
|
|
196 | * have been returned from a previous add_string(). |
|
|
197 | * Return values: |
|
|
198 | * - str |
|
|
199 | */ |
|
|
200 | |
|
|
201 | const char * |
|
|
202 | add_refcount(const char *str) { |
|
|
203 | GATHER(add_ref_stats.calls); |
|
|
204 | ++(SS(str)->refcount); |
|
|
205 | /* This function should be declared |
|
|
206 | * const char *add_refcount(const char *) |
|
|
207 | * Unfortunately, that would require changing a lot of structs |
|
|
208 | * |
|
|
209 | * Yup, many changes, but may have make some bugs more visible :) |
|
|
210 | * -- Ryo 2005-08-12 |
|
|
211 | */ |
|
|
212 | return (char*)str; |
|
|
213 | } |
|
|
214 | |
|
|
215 | /* |
|
|
216 | * Description: |
|
|
217 | * This will return the refcount of the string str, which *must* |
|
|
218 | * have been returned from a previous add_string(). |
|
|
219 | * Return values: |
|
|
220 | * - length |
|
|
221 | */ |
|
|
222 | |
|
|
223 | int |
|
|
224 | query_refcount(const char *str) { |
|
|
225 | return (SS(str)->refcount) & ~TOPBIT; |
|
|
226 | } |
|
|
227 | |
|
|
228 | /* |
|
|
229 | * Description: |
|
|
230 | * This will see if str is in the hash table, and return the address |
|
|
231 | * of that string if it exists. |
|
|
232 | * Return values: |
|
|
233 | * - pointer to identical string or NULL |
|
|
234 | */ |
|
|
235 | |
|
|
236 | const char * |
|
|
237 | find_string(const char *str) { |
|
|
238 | shared_string *ss; |
|
|
239 | int ind; |
|
|
240 | |
|
|
241 | GATHER(find_stats.calls); |
|
|
242 | |
|
|
243 | ind = hashstr(str); |
|
|
244 | ss = hash_table[ind]; |
|
|
245 | |
|
|
246 | /* Is there an entry for that hash? |
|
|
247 | */ |
|
|
248 | if (ss) { |
|
|
249 | /* Simple case first: Is the first string the right one? |
|
|
250 | */ |
|
|
251 | GATHER(find_stats.strcmps); |
|
|
252 | if (!strcmp(ss->string, str)) { |
|
|
253 | GATHER(find_stats.hashed); |
|
|
254 | return ss->string; |
|
|
255 | } else { |
|
|
256 | /* Recurse through the linked list, if there's one. |
|
|
257 | */ |
|
|
258 | while (ss->next) { |
|
|
259 | GATHER(find_stats.search); |
|
|
260 | GATHER(find_stats.strcmps); |
|
|
261 | ss = ss->next; |
|
|
262 | if (!strcmp(ss->string, str)) { |
|
|
263 | GATHER(find_stats.linked); |
|
|
264 | return ss->string; |
|
|
265 | } |
|
|
266 | } |
|
|
267 | /* No match. Fall through. |
|
|
268 | */ |
|
|
269 | } |
|
|
270 | } |
|
|
271 | return NULL; |
|
|
272 | } |
|
|
273 | |
|
|
274 | /* |
|
|
275 | * Description: |
|
|
276 | * This will reduce the refcount, and if it has reached 0, str will |
|
|
277 | * be freed. |
|
|
278 | * Return values: |
|
|
279 | * None |
|
|
280 | */ |
|
|
281 | |
|
|
282 | void |
|
|
283 | free_string(const char *str) { |
|
|
284 | shared_string *ss; |
|
|
285 | |
|
|
286 | GATHER(free_stats.calls); |
|
|
287 | |
|
|
288 | ss = SS(str); |
|
|
289 | |
|
|
290 | if ((--ss->refcount & ~TOPBIT) == 0) { |
|
|
291 | /* Remove this entry. |
|
|
292 | */ |
|
|
293 | if (ss->refcount & TOPBIT) { |
|
|
294 | /* We must put a new value into the hash_table[]. |
|
|
295 | */ |
|
|
296 | if (ss->next) { |
|
|
297 | *(ss->u.array) = ss->next; |
|
|
298 | ss->next->u.array = ss->u.array; |
|
|
299 | ss->next->refcount |= TOPBIT; |
|
|
300 | } else { |
|
|
301 | *(ss->u.array) = NULL; |
|
|
302 | } |
|
|
303 | free(ss); |
|
|
304 | } else { |
|
|
305 | /* Relink and free this struct. |
|
|
306 | */ |
|
|
307 | if (ss->next) |
|
|
308 | ss->next->u.previous = ss->u.previous; |
|
|
309 | ss->u.previous->next = ss->next; |
|
|
310 | free(ss); |
|
|
311 | } |
|
|
312 | } |
|
|
313 | } |
|
|
314 | |
|
|
315 | #ifdef SS_STATISTICS |
|
|
316 | |
|
|
317 | extern char errmsg[]; |
|
|
318 | |
|
|
319 | /* |
|
|
320 | * Description: |
|
|
321 | * The routines will gather statistics if SS_STATISTICS is defined. |
|
|
322 | * A call to this function will cause the statistics to be dumped |
|
|
323 | * into an external string errmsg, which must be large. |
|
|
324 | * Return values: |
|
|
325 | * None |
|
|
326 | */ |
|
|
327 | |
|
|
328 | void |
|
|
329 | ss_dump_statistics(void) { |
|
|
330 | static char line[80]; |
|
|
331 | |
|
|
332 | sprintf(errmsg, "%-13s %6s %6s %6s %6s %6s\n", |
|
|
333 | "", "calls", "hashed", "strcmp", "search", "linked"); |
|
|
334 | sprintf(line, "%-13s %6d %6d %6d %6d %6d\n", |
|
|
335 | "add_string:", add_stats.calls, add_stats.hashed, |
|
|
336 | add_stats.strcmps, add_stats.search, add_stats.linked); |
|
|
337 | strcat(errmsg, line); |
|
|
338 | sprintf(line, "%-13s %6d\n", |
|
|
339 | "add_refcount:", add_ref_stats.calls); |
|
|
340 | strcat(errmsg, line); |
|
|
341 | sprintf(line, "%-13s %6d\n", |
|
|
342 | "free_string:", free_stats.calls); |
|
|
343 | strcat(errmsg, line); |
|
|
344 | sprintf(line, "%-13s %6d %6d %6d %6d %6d\n", |
|
|
345 | "find_string:", find_stats.calls, find_stats.hashed, |
|
|
346 | find_stats.strcmps, find_stats.search, find_stats.linked); |
|
|
347 | strcat(errmsg, line); |
|
|
348 | sprintf(line, "%-13s %6d\n", |
|
|
349 | "hashstr:", hash_stats.calls); |
|
|
350 | strcat(errmsg, line); |
|
|
351 | } |
|
|
352 | #endif /* SS_STATISTICS */ |
|
|
353 | |
|
|
354 | /* |
|
|
355 | * Description: |
|
|
356 | * If (what & SS_DUMP_TABLE) dump the contents of the hash table to |
|
|
357 | * stderr. If (what & SS_DUMP_TOTALS) return a string which |
|
|
358 | * says how many entries etc. there are in the table. |
|
|
359 | * Return values: |
|
|
360 | * - a string or NULL |
|
|
361 | */ |
|
|
362 | |
|
|
363 | const char * |
|
|
364 | ss_dump_table(int what) { |
|
|
365 | static char totals[80]; |
|
|
366 | int entries = 0, refs = 0, links = 0; |
|
|
367 | int i; |
|
|
368 | |
|
|
369 | for (i = 0; i < TABLESIZE; i++) { |
|
|
370 | shared_string *ss; |
|
|
371 | |
|
|
372 | if ((ss = hash_table[i])!=NULL) { |
|
|
373 | ++entries; |
|
|
374 | refs += (ss->refcount & ~TOPBIT); |
|
|
375 | #if 1 /* Can't use stderr any longer, need to include global.h and |
|
|
376 | if (what & SS_DUMP_TABLE) |
|
|
377 | * use logfile. */ |
|
|
378 | fprintf(stderr, "%4d -- %4d refs '%s' %c\n", |
|
|
379 | i, (ss->refcount & ~TOPBIT), ss->string, |
|
|
380 | (ss->refcount & TOPBIT ? ' ' : '#')); |
|
|
381 | #endif |
|
|
382 | while (ss->next) { |
|
|
383 | ss = ss->next; |
|
|
384 | ++links; |
|
|
385 | refs += (ss->refcount & ~TOPBIT); |
|
|
386 | #if 1 |
|
|
387 | if (what & SS_DUMP_TABLE) |
|
|
388 | fprintf(stderr, " -- %4d refs '%s' %c\n", |
|
|
389 | (ss->refcount & ~TOPBIT), ss->string, |
|
|
390 | (ss->refcount & TOPBIT ? '*' : ' ')); |
|
|
391 | #endif |
|
|
392 | } |
|
|
393 | } |
|
|
394 | } |
|
|
395 | |
|
|
396 | if (what & SS_DUMP_TOTALS) { |
|
|
397 | sprintf(totals, "\n%d entries, %d refs, %d links.", |
|
|
398 | entries, refs, links); |
|
|
399 | return totals; |
|
|
400 | } |
|
|
401 | return NULL; |
|
|
402 | } |
|
|
403 | |
|
|
404 | /* buf_overflow() - we don't want to exceed the buffer size of |
131 | /* buf_overflow() - we don't want to exceed the buffer size of |
405 | * buf1 by adding on buf2! Returns true if overflow will occur. |
132 | * buf1 by adding on buf2! Returns true if overflow will occur. |
406 | */ |
133 | */ |
407 | |
134 | |
408 | int |
135 | int |
409 | buf_overflow (const char *buf1, const char *buf2, int bufsize) |
136 | buf_overflow (const char *buf1, const char *buf2, int bufsize) |
410 | { |
137 | { |
411 | int len1 = 0, len2 = 0; |
138 | int len1 = 0, len2 = 0; |
412 | |
139 | |
413 | if (buf1) |
140 | if (buf1) |
414 | len1 = strlen (buf1); |
141 | len1 = strlen (buf1); |
415 | if (buf2) |
142 | if (buf2) |
416 | len2 = strlen (buf2); |
143 | len2 = strlen (buf2); |
417 | if ((len1 + len2) >= bufsize) |
144 | if ((len1 + len2) >= bufsize) |
418 | return 1; |
145 | return 1; |
419 | return 0; |
146 | return 0; |
420 | } |
147 | } |
421 | |
148 | |