1 |
elmex |
1.1 |
/* |
2 |
root |
1.3 |
* shstr.C |
3 |
elmex |
1.1 |
*/ |
4 |
|
|
|
5 |
root |
1.3 |
#include <cstring> |
6 |
|
|
#include <cstdlib> |
7 |
|
|
|
8 |
|
|
#include <tr1/unordered_set> |
9 |
elmex |
1.1 |
|
10 |
|
|
#include "shstr.h" |
11 |
|
|
|
12 |
root |
1.3 |
struct hash |
13 |
|
|
{ |
14 |
|
|
std::size_t operator ()(const char *s) const |
15 |
|
|
{ |
16 |
elmex |
1.1 |
unsigned long hash = 0; |
17 |
root |
1.3 |
unsigned int i = 0; |
18 |
elmex |
1.1 |
|
19 |
root |
1.3 |
/* use the one-at-a-time hash function, which supposedly is |
20 |
|
|
* better than the djb2-like one used by perl5.005, but |
21 |
|
|
* certainly is better then the bug used here before. |
22 |
|
|
* see http://burtleburtle.net/bob/hash/doobs.html |
23 |
elmex |
1.1 |
*/ |
24 |
root |
1.3 |
while (*s) |
25 |
|
|
{ |
26 |
|
|
hash += *s++; |
27 |
|
|
hash += hash << 10; |
28 |
|
|
hash ^= hash >> 6; |
29 |
|
|
} |
30 |
|
|
|
31 |
|
|
hash += hash << 3; |
32 |
|
|
hash ^= hash >> 11; |
33 |
|
|
hash += hash << 15; |
34 |
|
|
|
35 |
|
|
return hash; |
36 |
|
|
} |
37 |
|
|
}; |
38 |
elmex |
1.1 |
|
39 |
root |
1.3 |
struct equal |
40 |
|
|
{ |
41 |
|
|
bool operator ()(const char *a, const char *b) const |
42 |
|
|
{ |
43 |
|
|
return !strcmp (a, b); |
44 |
|
|
} |
45 |
|
|
}; |
46 |
elmex |
1.1 |
|
47 |
root |
1.3 |
typedef std::tr1::unordered_set<const char *, hash, equal> HT; |
48 |
elmex |
1.1 |
|
49 |
root |
1.3 |
static HT ht; |
50 |
elmex |
1.1 |
|
51 |
|
|
const char * |
52 |
root |
1.3 |
shstr::find (const char *s) |
53 |
|
|
{ |
54 |
|
|
HT::iterator i = ht.find (s); |
55 |
elmex |
1.1 |
|
56 |
root |
1.3 |
return i != ht.end () |
57 |
|
|
? (char *)*i |
58 |
|
|
: 0; |
59 |
elmex |
1.1 |
} |
60 |
|
|
|
61 |
|
|
const char * |
62 |
root |
1.3 |
shstr::intern (const char *s) |
63 |
|
|
{ |
64 |
|
|
HT::iterator i = ht.find (s); |
65 |
elmex |
1.1 |
|
66 |
root |
1.3 |
if (i != ht.end ()) |
67 |
|
|
return (char *)*i; |
68 |
elmex |
1.1 |
|
69 |
root |
1.3 |
int len = strlen (s); |
70 |
elmex |
1.1 |
|
71 |
root |
1.3 |
int *v = (int *)malloc (sizeof (int) * 2 + len + 1); |
72 |
elmex |
1.1 |
|
73 |
root |
1.3 |
v [0] = len; |
74 |
|
|
v [1] = 0; |
75 |
elmex |
1.1 |
|
76 |
root |
1.3 |
v += 2; |
77 |
elmex |
1.1 |
|
78 |
root |
1.3 |
memcpy (v, s, len + 1); |
79 |
elmex |
1.1 |
|
80 |
root |
1.3 |
ht.insert ((char *)v); |
81 |
elmex |
1.1 |
|
82 |
root |
1.3 |
return (char *)v; |
83 |
elmex |
1.1 |
} |
84 |
|
|
|
85 |
root |
1.3 |
// TODO: periodically test refcounts == 0 for a few strings (e.g. one hash bucket, |
86 |
|
|
// exploiting the fatc that iterators stay valid for unordered_set). |
87 |
elmex |
1.1 |
void |
88 |
root |
1.3 |
shstr::gc () |
89 |
|
|
{ |
90 |
elmex |
1.1 |
} |
91 |
|
|
|
92 |
|
|
/* buf_overflow() - we don't want to exceed the buffer size of |
93 |
|
|
* buf1 by adding on buf2! Returns true if overflow will occur. |
94 |
|
|
*/ |
95 |
|
|
|
96 |
|
|
int |
97 |
|
|
buf_overflow (const char *buf1, const char *buf2, int bufsize) |
98 |
|
|
{ |
99 |
root |
1.3 |
int len1 = 0, len2 = 0; |
100 |
elmex |
1.1 |
|
101 |
|
|
if (buf1) |
102 |
root |
1.2 |
len1 = strlen (buf1); |
103 |
elmex |
1.1 |
if (buf2) |
104 |
root |
1.2 |
len2 = strlen (buf2); |
105 |
elmex |
1.1 |
if ((len1 + len2) >= bufsize) |
106 |
root |
1.2 |
return 1; |
107 |
elmex |
1.1 |
return 0; |
108 |
|
|
} |
109 |
|
|
|