… | |
… | |
46 | |
46 | |
47 | typedef std::tr1::unordered_set<const char *, hash, equal> HT; |
47 | typedef std::tr1::unordered_set<const char *, hash, equal> HT; |
48 | |
48 | |
49 | static HT ht; |
49 | static HT ht; |
50 | |
50 | |
|
|
51 | static const char *makevec (const char *s) |
|
|
52 | { |
|
|
53 | int len = strlen (s); |
|
|
54 | |
|
|
55 | const char *v = (const char *)(2 + (int *)malloc (sizeof (int) * 2 + len + 1)); |
|
|
56 | |
|
|
57 | shstr::length (v) = len; |
|
|
58 | shstr::refcnt (v) = 1; |
|
|
59 | |
|
|
60 | memcpy ((char *)v, s, len + 1); |
|
|
61 | |
|
|
62 | return s; |
|
|
63 | } |
|
|
64 | |
|
|
65 | const char *shstr::null = makevec ("<nil>"); |
|
|
66 | |
51 | const char * |
67 | const char * |
52 | shstr::find (const char *s) |
68 | shstr::find (const char *s) |
53 | { |
69 | { |
54 | if (!s) |
70 | if (!s) |
55 | return s; |
71 | return s; |
56 | |
72 | |
57 | HT::iterator i = ht.find (s); |
73 | HT::iterator i = ht.find (s); |
58 | |
74 | |
59 | return i != ht.end () |
75 | return i != ht.end () |
60 | ? (char *)*i |
76 | ? *i |
61 | : 0; |
77 | : 0; |
62 | } |
78 | } |
63 | |
79 | |
64 | const char * |
80 | const char * |
65 | shstr::intern (const char *s) |
81 | shstr::intern (const char *s) |
66 | { |
82 | { |
67 | if (!s) |
83 | if (!s) |
68 | return s; |
84 | return null; |
69 | |
85 | |
70 | if (const char *found = find (s)) |
86 | if (const char *found = find (s)) |
|
|
87 | { |
|
|
88 | ++refcnt (found); |
71 | return found; |
89 | return found; |
|
|
90 | } |
72 | |
91 | |
73 | int len = strlen (s); |
92 | s = makevec (s); |
74 | |
|
|
75 | int *v = (int *)malloc (sizeof (int) * 2 + len + 1); |
|
|
76 | |
|
|
77 | v [0] = len; |
|
|
78 | v [1] = 0; |
|
|
79 | |
|
|
80 | v += 2; |
|
|
81 | |
|
|
82 | memcpy (v, s, len + 1); |
|
|
83 | |
|
|
84 | ht.insert ((char *)v); |
93 | ht.insert (s); |
85 | |
94 | return s; |
86 | return (char *)v; |
|
|
87 | } |
95 | } |
88 | |
96 | |
89 | // TODO: periodically test refcounts == 0 for a few strings (e.g. one hash bucket, |
97 | // periodically test refcounts == 0 for a few strings |
90 | // exploiting the fatc that iterators stay valid for unordered_set). |
98 | // this is the ONLY thing that erases stuff from ht. keep it that way. |
91 | void |
99 | void |
92 | shstr::gc () |
100 | shstr::gc () |
93 | { |
101 | { |
|
|
102 | static const char *curpos; |
|
|
103 | |
|
|
104 | HT::iterator i = curpos ? ht.find (curpos) : ht.begin (); |
|
|
105 | |
|
|
106 | if (i == ht.end ()) |
|
|
107 | i = ht.begin (); |
|
|
108 | |
|
|
109 | // go through all strings roughly once every 4 minutes |
|
|
110 | int n = ht.size () / 256 + 16; |
|
|
111 | |
|
|
112 | for (;;) |
|
|
113 | { |
|
|
114 | if (i == ht.end ()) |
|
|
115 | { |
|
|
116 | curpos = 0; |
|
|
117 | return; |
|
|
118 | } |
|
|
119 | else if (!--n) |
|
|
120 | break; |
|
|
121 | else if (!refcnt (*i)) |
|
|
122 | { |
|
|
123 | HT::iterator o = i++; |
|
|
124 | const char *s = *o; |
|
|
125 | ht.erase (o); |
|
|
126 | |
|
|
127 | //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s)); |
|
|
128 | free (-2 + (int *)s); |
|
|
129 | } |
|
|
130 | else |
|
|
131 | ++i; |
|
|
132 | } |
|
|
133 | |
|
|
134 | curpos = *i; |
94 | } |
135 | } |
95 | |
136 | |
|
|
137 | //TODO: this should of course not be here |
96 | /* buf_overflow() - we don't want to exceed the buffer size of |
138 | /* buf_overflow() - we don't want to exceed the buffer size of |
97 | * buf1 by adding on buf2! Returns true if overflow will occur. |
139 | * buf1 by adding on buf2! Returns true if overflow will occur. |
98 | */ |
140 | */ |
99 | |
141 | |
100 | int |
142 | int |