… | |
… | |
6 | #include <cstdlib> |
6 | #include <cstdlib> |
7 | |
7 | |
8 | #include <tr1/unordered_set> |
8 | #include <tr1/unordered_set> |
9 | |
9 | |
10 | #include "shstr.h" |
10 | #include "shstr.h" |
|
|
11 | |
|
|
12 | // NOTE: even with lots of stuff loaded, we do not usually have >>20000 strings. |
|
|
13 | // maybe refcounting is just overhead? |
11 | |
14 | |
12 | struct hash |
15 | struct hash |
13 | { |
16 | { |
14 | std::size_t operator ()(const char *s) const |
17 | std::size_t operator ()(const char *s) const |
15 | { |
18 | { |
… | |
… | |
49 | static HT ht; |
52 | static HT ht; |
50 | |
53 | |
51 | const char * |
54 | const char * |
52 | shstr::find (const char *s) |
55 | shstr::find (const char *s) |
53 | { |
56 | { |
|
|
57 | if (!s) |
|
|
58 | return s; |
|
|
59 | |
54 | HT::iterator i = ht.find (s); |
60 | HT::iterator i = ht.find (s); |
55 | |
61 | |
56 | return i != ht.end () |
62 | return i != ht.end () |
57 | ? (char *)*i |
63 | ? *i |
58 | : 0; |
64 | : 0; |
59 | } |
65 | } |
60 | |
66 | |
61 | const char * |
67 | const char * |
62 | shstr::intern (const char *s) |
68 | shstr::intern (const char *s) |
63 | { |
69 | { |
64 | HT::iterator i = ht.find (s); |
70 | if (!s) |
|
|
71 | return s; |
65 | |
72 | |
66 | if (i != ht.end ()) |
73 | if (const char *found = find (s)) |
67 | return (char *)*i; |
74 | { |
|
|
75 | ++refcnt (found); |
|
|
76 | return found; |
|
|
77 | } |
68 | |
78 | |
69 | int len = strlen (s); |
79 | int len = strlen (s); |
70 | |
80 | |
71 | int *v = (int *)malloc (sizeof (int) * 2 + len + 1); |
81 | const char *v = (const char *)(2 + (int *)malloc (sizeof (int) * 2 + len + 1)); |
72 | |
82 | |
73 | v [0] = len; |
83 | length (v) = len; |
74 | v [1] = 0; |
84 | refcnt (v) = 1; |
75 | |
85 | |
76 | v += 2; |
86 | memcpy ((char *)v, s, len + 1); |
77 | |
87 | |
78 | memcpy (v, s, len + 1); |
88 | ht.insert (v); |
79 | |
89 | |
80 | ht.insert ((char *)v); |
90 | return v; |
81 | |
|
|
82 | return (char *)v; |
|
|
83 | } |
91 | } |
84 | |
92 | |
85 | // TODO: periodically test refcounts == 0 for a few strings (e.g. one hash bucket, |
93 | // periodically test refcounts == 0 for a few strings |
86 | // exploiting the fatc that iterators stay valid for unordered_set). |
94 | // this is the ONLY thing that erases stuff from ht. keep it that way. |
87 | void |
95 | void |
88 | shstr::gc () |
96 | shstr::gc () |
89 | { |
97 | { |
|
|
98 | static const char *curpos; |
|
|
99 | |
|
|
100 | HT::iterator i = curpos ? ht.find (curpos) : ht.begin (); |
|
|
101 | |
|
|
102 | if (i == ht.end ()) |
|
|
103 | i = ht.begin (); |
|
|
104 | |
|
|
105 | // go through all strings roughly once every 4 minutes |
|
|
106 | for (int n = ht.size () / 256 + 16; --n; ) |
|
|
107 | { |
|
|
108 | if (i == ht.end ()) |
|
|
109 | { |
|
|
110 | curpos = 0; |
|
|
111 | return; |
|
|
112 | } |
|
|
113 | |
|
|
114 | if (!refcnt (*i)) |
|
|
115 | { |
|
|
116 | HT::iterator o = i++; |
|
|
117 | const char *s = *o; |
|
|
118 | ht.erase (o); |
|
|
119 | |
|
|
120 | //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s)); |
|
|
121 | free (-2 + (int *)s); |
|
|
122 | } |
|
|
123 | else |
|
|
124 | ++i; |
|
|
125 | } |
|
|
126 | |
|
|
127 | curpos = *i; |
90 | } |
128 | } |
91 | |
129 | |
|
|
130 | //TODO: this should of course not be here |
92 | /* buf_overflow() - we don't want to exceed the buffer size of |
131 | /* buf_overflow() - we don't want to exceed the buffer size of |
93 | * buf1 by adding on buf2! Returns true if overflow will occur. |
132 | * buf1 by adding on buf2! Returns true if overflow will occur. |
94 | */ |
133 | */ |
95 | |
134 | |
96 | int |
135 | int |