|
|
1 | |
1 | /* |
2 | /* |
2 | * static char *rcsid_shstr_c = |
|
|
3 | * "$Id: shstr.C,v 1.1 2006/08/13 17:16:01 elmex Exp $"; |
|
|
4 | * |
|
|
5 | * shstr.c |
3 | * shstr.C |
6 | * |
|
|
7 | * This is a simple shared strings package with a simple interface. |
|
|
8 | * |
|
|
9 | * char *find_string(const char *str) |
|
|
10 | * char *add_string(const char *str) |
|
|
11 | * char *add_refcount(char *str) |
|
|
12 | * void free_string(char *str) |
|
|
13 | * char *ss_dump_table(int what) |
|
|
14 | * void ss_dump_statistics() |
|
|
15 | * |
|
|
16 | * Author: Kjetil T. Homme, Oslo 1992. |
|
|
17 | */ |
4 | */ |
18 | |
5 | |
19 | #include <stdio.h> |
|
|
20 | #include <stddef.h> |
|
|
21 | #include <stdlib.h> |
|
|
22 | #include <sys/types.h> |
|
|
23 | #include <limits.h> |
|
|
24 | #include <string.h> |
6 | #include <cstring> |
|
|
7 | #include <cstdlib> |
25 | |
8 | |
26 | #if defined (__sun__) && defined (StupidSunHeaders) |
9 | #include <glib.h> |
27 | #include <sys/time.h> |
10 | |
28 | #include "sunos.h" |
11 | #include <tr1/unordered_set> |
29 | #endif |
|
|
30 | |
12 | |
31 | #include "shstr.h" |
13 | #include "shstr.h" |
|
|
14 | #include "util.h" |
32 | |
15 | |
33 | #ifndef WIN32 |
16 | typedef |
34 | #include <autoconf.h> |
17 | std::tr1::unordered_set < const char *, |
35 | #endif |
18 | str_hash, |
36 | #ifdef HAVE_LIBDMALLOC |
19 | str_equal > |
37 | #include <dmalloc.h> |
20 | HT; |
38 | #endif |
|
|
39 | |
21 | |
40 | static shared_string *hash_table[TABLESIZE]; |
22 | static HT |
|
|
23 | ht; |
41 | |
24 | |
42 | /* |
25 | static const char * |
43 | * Initialises the hash-table used by the shared string library. |
26 | makevec (const char *s) |
44 | */ |
27 | { |
|
|
28 | int |
|
|
29 | len = strlen (s); |
45 | |
30 | |
46 | void |
31 | const char * |
47 | init_hash_table(void) { |
32 | v = (const char *) (2 + (int *) g_slice_alloc (sizeof (int) * 2 + len + 1)); |
48 | /* A static object should be zeroed out always */ |
33 | |
49 | #if !defined(__STDC__) |
34 | shstr::length (v) = len; |
50 | (void) memset((void *)hash_table, 0, TABLESIZE * sizeof(shared_string *)); |
35 | shstr::refcnt (v) = 1; |
51 | #endif |
36 | |
|
|
37 | memcpy ((char *) v, s, len + 1); |
|
|
38 | |
|
|
39 | return v; |
52 | } |
40 | } |
53 | |
41 | |
54 | /* |
42 | const char * |
55 | * Hashing-function used by the shared string library. |
43 | shstr::null = makevec ("<nil>"); |
56 | */ |
|
|
57 | |
44 | |
58 | static int |
45 | // what weird misoptimisation is this again? |
59 | hashstr(const char *str) { |
46 | const shstr undead_name ("undead"); |
60 | unsigned long hash = 0; |
|
|
61 | int i = 0; |
|
|
62 | unsigned rot = 0; |
|
|
63 | const char *p; |
|
|
64 | |
47 | |
65 | GATHER(hash_stats.calls); |
48 | shstr skill_names[NUM_SKILLS]; |
66 | |
49 | |
67 | for (p = str; i < MAXSTRING && *p; p++, i++) { |
50 | const char * |
68 | hash ^= (unsigned long) *p << rot; |
51 | shstr::find (const char *s) |
69 | rot += 2; |
52 | { |
70 | if (rot >= (sizeof(long) - sizeof(char)) * CHAR_BIT) |
53 | if (!s) |
71 | rot = 0; |
54 | return s; |
72 | } |
55 | |
73 | return (hash % TABLESIZE); |
56 | HT::iterator i = ht.find (s); |
|
|
57 | |
|
|
58 | return i != ht.end ()? *i : 0; |
74 | } |
59 | } |
75 | |
60 | |
76 | /* |
61 | const char * |
77 | * Allocates and initialises a new shared_string structure, containing |
62 | shstr::intern (const char *s) |
78 | * the string str. |
63 | { |
79 | */ |
64 | if (!s) |
|
|
65 | return null; |
80 | |
66 | |
81 | static shared_string * |
67 | if (const char *found = find (s)) |
82 | new_shared_string(const char *str) { |
68 | { |
83 | shared_string *ss; |
69 | ++refcnt (found); |
|
|
70 | return found; |
|
|
71 | } |
84 | |
72 | |
85 | /* Allocate room for a struct which can hold str. Note |
73 | s = makevec (s); |
86 | * that some bytes for the string are already allocated in the |
74 | ht.insert (s); |
87 | * shared_string struct. |
|
|
88 | */ |
|
|
89 | ss = (shared_string *) malloc(sizeof(shared_string) - PADDING + |
|
|
90 | strlen(str) + 1); |
|
|
91 | ss->u.previous = NULL; |
|
|
92 | ss->next = NULL; |
|
|
93 | ss->refcount = 1; |
|
|
94 | strcpy(ss->string, str); |
|
|
95 | |
|
|
96 | return ss; |
75 | return s; |
97 | } |
76 | } |
98 | |
77 | |
99 | /* |
78 | // periodically test refcounts == 0 for a few strings |
100 | * Description: |
79 | // this is the ONLY thing that erases stuff from ht. keep it that way. |
101 | * This will add 'str' to the hash table. If there's no entry for this |
80 | void |
102 | * string, a copy will be allocated, and a pointer to that is returned. |
81 | shstr::gc () |
103 | * Return values: |
82 | { |
104 | * - pointer to string identical to str |
83 | return; //D |
105 | */ |
84 | //D currently disabled: some datastructures might still store them |
|
|
85 | //D but their pointers will become invalidated |
|
|
86 | static const char *curpos; |
106 | |
87 | |
107 | const char * |
88 | HT::iterator i = curpos ? ht.find (curpos) : ht.begin (); |
108 | add_string(const char *str) { |
|
|
109 | shared_string *ss; |
|
|
110 | int ind; |
|
|
111 | |
89 | |
112 | GATHER(add_stats.calls); |
90 | if (i == ht.end ()) |
|
|
91 | i = ht.begin (); |
113 | |
92 | |
114 | /* Should really core dump here, since functions should not be calling |
93 | // go through all strings roughly once every 4 minutes |
115 | * add_string with a null parameter. But this will prevent a few |
94 | int n = ht.size () / 256 + 16; |
116 | * core dumps. |
95 | |
117 | */ |
96 | for (;;) |
118 | if (str==NULL) { |
97 | { |
119 | #ifdef MANY_CORES |
98 | if (i == ht.end ()) |
120 | abort(); |
99 | { |
121 | #else |
100 | curpos = 0; |
122 | return NULL; |
101 | return; |
123 | #endif |
102 | } |
|
|
103 | else if (!--n) |
|
|
104 | break; |
|
|
105 | else if (!refcnt (*i)) |
|
|
106 | { |
|
|
107 | HT::iterator o = i++; |
|
|
108 | const char *s = *o; |
|
|
109 | |
|
|
110 | ht.erase (o); |
|
|
111 | |
|
|
112 | //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s)); |
|
|
113 | g_slice_free1 (sizeof (int) * 2 + length (s) + 1, -2 + (int *) s); |
|
|
114 | } |
|
|
115 | else |
|
|
116 | ++i; |
124 | } |
117 | } |
125 | |
118 | |
126 | ind = hashstr(str); |
119 | curpos = *i; |
127 | ss = hash_table[ind]; |
|
|
128 | |
|
|
129 | /* Is there an entry for that hash? |
|
|
130 | */ |
|
|
131 | if (ss) { |
|
|
132 | /* Simple case first: See if the first pointer matches. |
|
|
133 | */ |
|
|
134 | if (str != ss->string) { |
|
|
135 | GATHER(add_stats.strcmps); |
|
|
136 | if (strcmp(ss->string, str)) { |
|
|
137 | /* Apparantly, a string with the same hash value has this |
|
|
138 | * slot. We must see in the list if "str" has been |
|
|
139 | * registered earlier. |
|
|
140 | */ |
|
|
141 | while (ss->next) { |
|
|
142 | GATHER(add_stats.search); |
|
|
143 | ss = ss->next; |
|
|
144 | if (ss->string != str) { |
|
|
145 | GATHER(add_stats.strcmps); |
|
|
146 | if (strcmp(ss->string, str)) { |
|
|
147 | /* This wasn't the right string... |
|
|
148 | */ |
|
|
149 | continue; |
|
|
150 | } |
|
|
151 | } |
|
|
152 | /* We found an entry for this string. Fix the |
|
|
153 | * refcount and exit. |
|
|
154 | */ |
|
|
155 | GATHER(add_stats.linked); |
|
|
156 | ++(ss->refcount); |
|
|
157 | |
|
|
158 | return ss->string; |
|
|
159 | } |
|
|
160 | /* There are no occurences of this string in the hash table. |
|
|
161 | */ |
|
|
162 | { |
|
|
163 | shared_string *new_ss; |
|
|
164 | |
|
|
165 | GATHER(add_stats.linked); |
|
|
166 | new_ss = new_shared_string(str); |
|
|
167 | ss->next = new_ss; |
|
|
168 | new_ss->u.previous = ss; |
|
|
169 | return new_ss->string; |
|
|
170 | } |
|
|
171 | } |
|
|
172 | /* Fall through. |
|
|
173 | */ |
|
|
174 | } |
|
|
175 | GATHER(add_stats.hashed); |
|
|
176 | ++(ss->refcount); |
|
|
177 | return ss->string; |
|
|
178 | } else { |
|
|
179 | /* The string isn't registered, and the slot is empty. |
|
|
180 | */ |
|
|
181 | GATHER(add_stats.hashed); |
|
|
182 | hash_table[ind] = new_shared_string(str); |
|
|
183 | |
|
|
184 | /* One bit in refcount is used to keep track of the union. |
|
|
185 | */ |
|
|
186 | hash_table[ind]->refcount |= TOPBIT; |
|
|
187 | hash_table[ind]->u.array = &(hash_table[ind]); |
|
|
188 | |
|
|
189 | return hash_table[ind]->string; |
|
|
190 | } |
|
|
191 | } |
120 | } |
192 | |
121 | |
193 | /* |
122 | //TODO: this should of course not be here |
194 | * Description: |
|
|
195 | * This will increase the refcount of the string str, which *must* |
|
|
196 | * have been returned from a previous add_string(). |
|
|
197 | * Return values: |
|
|
198 | * - str |
|
|
199 | */ |
|
|
200 | |
|
|
201 | const char * |
|
|
202 | add_refcount(const char *str) { |
|
|
203 | GATHER(add_ref_stats.calls); |
|
|
204 | ++(SS(str)->refcount); |
|
|
205 | /* This function should be declared |
|
|
206 | * const char *add_refcount(const char *) |
|
|
207 | * Unfortunately, that would require changing a lot of structs |
|
|
208 | * |
|
|
209 | * Yup, many changes, but may have make some bugs more visible :) |
|
|
210 | * -- Ryo 2005-08-12 |
|
|
211 | */ |
|
|
212 | return (char*)str; |
|
|
213 | } |
|
|
214 | |
|
|
215 | /* |
|
|
216 | * Description: |
|
|
217 | * This will return the refcount of the string str, which *must* |
|
|
218 | * have been returned from a previous add_string(). |
|
|
219 | * Return values: |
|
|
220 | * - length |
|
|
221 | */ |
|
|
222 | |
|
|
223 | int |
|
|
224 | query_refcount(const char *str) { |
|
|
225 | return (SS(str)->refcount) & ~TOPBIT; |
|
|
226 | } |
|
|
227 | |
|
|
228 | /* |
|
|
229 | * Description: |
|
|
230 | * This will see if str is in the hash table, and return the address |
|
|
231 | * of that string if it exists. |
|
|
232 | * Return values: |
|
|
233 | * - pointer to identical string or NULL |
|
|
234 | */ |
|
|
235 | |
|
|
236 | const char * |
|
|
237 | find_string(const char *str) { |
|
|
238 | shared_string *ss; |
|
|
239 | int ind; |
|
|
240 | |
|
|
241 | GATHER(find_stats.calls); |
|
|
242 | |
|
|
243 | ind = hashstr(str); |
|
|
244 | ss = hash_table[ind]; |
|
|
245 | |
|
|
246 | /* Is there an entry for that hash? |
|
|
247 | */ |
|
|
248 | if (ss) { |
|
|
249 | /* Simple case first: Is the first string the right one? |
|
|
250 | */ |
|
|
251 | GATHER(find_stats.strcmps); |
|
|
252 | if (!strcmp(ss->string, str)) { |
|
|
253 | GATHER(find_stats.hashed); |
|
|
254 | return ss->string; |
|
|
255 | } else { |
|
|
256 | /* Recurse through the linked list, if there's one. |
|
|
257 | */ |
|
|
258 | while (ss->next) { |
|
|
259 | GATHER(find_stats.search); |
|
|
260 | GATHER(find_stats.strcmps); |
|
|
261 | ss = ss->next; |
|
|
262 | if (!strcmp(ss->string, str)) { |
|
|
263 | GATHER(find_stats.linked); |
|
|
264 | return ss->string; |
|
|
265 | } |
|
|
266 | } |
|
|
267 | /* No match. Fall through. |
|
|
268 | */ |
|
|
269 | } |
|
|
270 | } |
|
|
271 | return NULL; |
|
|
272 | } |
|
|
273 | |
|
|
274 | /* |
|
|
275 | * Description: |
|
|
276 | * This will reduce the refcount, and if it has reached 0, str will |
|
|
277 | * be freed. |
|
|
278 | * Return values: |
|
|
279 | * None |
|
|
280 | */ |
|
|
281 | |
|
|
282 | void |
|
|
283 | free_string(const char *str) { |
|
|
284 | shared_string *ss; |
|
|
285 | |
|
|
286 | GATHER(free_stats.calls); |
|
|
287 | |
|
|
288 | ss = SS(str); |
|
|
289 | |
|
|
290 | if ((--ss->refcount & ~TOPBIT) == 0) { |
|
|
291 | /* Remove this entry. |
|
|
292 | */ |
|
|
293 | if (ss->refcount & TOPBIT) { |
|
|
294 | /* We must put a new value into the hash_table[]. |
|
|
295 | */ |
|
|
296 | if (ss->next) { |
|
|
297 | *(ss->u.array) = ss->next; |
|
|
298 | ss->next->u.array = ss->u.array; |
|
|
299 | ss->next->refcount |= TOPBIT; |
|
|
300 | } else { |
|
|
301 | *(ss->u.array) = NULL; |
|
|
302 | } |
|
|
303 | free(ss); |
|
|
304 | } else { |
|
|
305 | /* Relink and free this struct. |
|
|
306 | */ |
|
|
307 | if (ss->next) |
|
|
308 | ss->next->u.previous = ss->u.previous; |
|
|
309 | ss->u.previous->next = ss->next; |
|
|
310 | free(ss); |
|
|
311 | } |
|
|
312 | } |
|
|
313 | } |
|
|
314 | |
|
|
315 | #ifdef SS_STATISTICS |
|
|
316 | |
|
|
317 | extern char errmsg[]; |
|
|
318 | |
|
|
319 | /* |
|
|
320 | * Description: |
|
|
321 | * The routines will gather statistics if SS_STATISTICS is defined. |
|
|
322 | * A call to this function will cause the statistics to be dumped |
|
|
323 | * into an external string errmsg, which must be large. |
|
|
324 | * Return values: |
|
|
325 | * None |
|
|
326 | */ |
|
|
327 | |
|
|
328 | void |
|
|
329 | ss_dump_statistics(void) { |
|
|
330 | static char line[80]; |
|
|
331 | |
|
|
332 | sprintf(errmsg, "%-13s %6s %6s %6s %6s %6s\n", |
|
|
333 | "", "calls", "hashed", "strcmp", "search", "linked"); |
|
|
334 | sprintf(line, "%-13s %6d %6d %6d %6d %6d\n", |
|
|
335 | "add_string:", add_stats.calls, add_stats.hashed, |
|
|
336 | add_stats.strcmps, add_stats.search, add_stats.linked); |
|
|
337 | strcat(errmsg, line); |
|
|
338 | sprintf(line, "%-13s %6d\n", |
|
|
339 | "add_refcount:", add_ref_stats.calls); |
|
|
340 | strcat(errmsg, line); |
|
|
341 | sprintf(line, "%-13s %6d\n", |
|
|
342 | "free_string:", free_stats.calls); |
|
|
343 | strcat(errmsg, line); |
|
|
344 | sprintf(line, "%-13s %6d %6d %6d %6d %6d\n", |
|
|
345 | "find_string:", find_stats.calls, find_stats.hashed, |
|
|
346 | find_stats.strcmps, find_stats.search, find_stats.linked); |
|
|
347 | strcat(errmsg, line); |
|
|
348 | sprintf(line, "%-13s %6d\n", |
|
|
349 | "hashstr:", hash_stats.calls); |
|
|
350 | strcat(errmsg, line); |
|
|
351 | } |
|
|
352 | #endif /* SS_STATISTICS */ |
|
|
353 | |
|
|
354 | /* |
|
|
355 | * Description: |
|
|
356 | * If (what & SS_DUMP_TABLE) dump the contents of the hash table to |
|
|
357 | * stderr. If (what & SS_DUMP_TOTALS) return a string which |
|
|
358 | * says how many entries etc. there are in the table. |
|
|
359 | * Return values: |
|
|
360 | * - a string or NULL |
|
|
361 | */ |
|
|
362 | |
|
|
363 | const char * |
|
|
364 | ss_dump_table(int what) { |
|
|
365 | static char totals[80]; |
|
|
366 | int entries = 0, refs = 0, links = 0; |
|
|
367 | int i; |
|
|
368 | |
|
|
369 | for (i = 0; i < TABLESIZE; i++) { |
|
|
370 | shared_string *ss; |
|
|
371 | |
|
|
372 | if ((ss = hash_table[i])!=NULL) { |
|
|
373 | ++entries; |
|
|
374 | refs += (ss->refcount & ~TOPBIT); |
|
|
375 | #if 1 /* Can't use stderr any longer, need to include global.h and |
|
|
376 | if (what & SS_DUMP_TABLE) |
|
|
377 | * use logfile. */ |
|
|
378 | fprintf(stderr, "%4d -- %4d refs '%s' %c\n", |
|
|
379 | i, (ss->refcount & ~TOPBIT), ss->string, |
|
|
380 | (ss->refcount & TOPBIT ? ' ' : '#')); |
|
|
381 | #endif |
|
|
382 | while (ss->next) { |
|
|
383 | ss = ss->next; |
|
|
384 | ++links; |
|
|
385 | refs += (ss->refcount & ~TOPBIT); |
|
|
386 | #if 1 |
|
|
387 | if (what & SS_DUMP_TABLE) |
|
|
388 | fprintf(stderr, " -- %4d refs '%s' %c\n", |
|
|
389 | (ss->refcount & ~TOPBIT), ss->string, |
|
|
390 | (ss->refcount & TOPBIT ? '*' : ' ')); |
|
|
391 | #endif |
|
|
392 | } |
|
|
393 | } |
|
|
394 | } |
|
|
395 | |
|
|
396 | if (what & SS_DUMP_TOTALS) { |
|
|
397 | sprintf(totals, "\n%d entries, %d refs, %d links.", |
|
|
398 | entries, refs, links); |
|
|
399 | return totals; |
|
|
400 | } |
|
|
401 | return NULL; |
|
|
402 | } |
|
|
403 | |
123 | |
404 | /* buf_overflow() - we don't want to exceed the buffer size of |
124 | /* buf_overflow() - we don't want to exceed the buffer size of |
405 | * buf1 by adding on buf2! Returns true if overflow will occur. |
125 | * buf1 by adding on buf2! Returns true if overflow will occur. |
406 | */ |
126 | */ |
407 | |
127 | |
408 | int |
128 | int |
409 | buf_overflow (const char *buf1, const char *buf2, int bufsize) |
129 | buf_overflow (const char *buf1, const char *buf2, int bufsize) |
410 | { |
130 | { |
411 | int len1 = 0, len2 = 0; |
131 | int len1 = 0, len2 = 0; |
412 | |
132 | |
413 | if (buf1) |
133 | if (buf1) |
414 | len1 = strlen (buf1); |
134 | len1 = strlen (buf1); |
415 | if (buf2) |
135 | if (buf2) |
416 | len2 = strlen (buf2); |
136 | len2 = strlen (buf2); |
417 | if ((len1 + len2) >= bufsize) |
137 | if ((len1 + len2) >= bufsize) |
418 | return 1; |
|
|
419 | return 0; |
138 | return 1; |
|
|
139 | return 0; |
420 | } |
140 | } |
421 | |
|
|