--- deliantra/server/common/shstr.C 2006/09/03 00:18:40 1.3 +++ deliantra/server/common/shstr.C 2006/09/10 16:00:23 1.13 @@ -1,3 +1,4 @@ + /* * shstr.C */ @@ -5,105 +6,130 @@ #include #include +#include + #include #include "shstr.h" +#include "util.h" -struct hash -{ - std::size_t operator ()(const char *s) const - { - unsigned long hash = 0; - unsigned int i = 0; - - /* use the one-at-a-time hash function, which supposedly is - * better than the djb2-like one used by perl5.005, but - * certainly is better then the bug used here before. - * see http://burtleburtle.net/bob/hash/doobs.html - */ - while (*s) - { - hash += *s++; - hash += hash << 10; - hash ^= hash >> 6; - } - - hash += hash << 3; - hash ^= hash >> 11; - hash += hash << 15; - - return hash; - } -}; +typedef +std::tr1::unordered_set < const char *, + str_hash, + str_equal > + HT; -struct equal +static HT + ht; + +static const char * +makevec (const char *s) { - bool operator ()(const char *a, const char *b) const - { - return !strcmp (a, b); - } -}; + int + len = strlen (s); + + const char * + v = (const char *) (2 + (int *) g_slice_alloc (sizeof (int) * 2 + len + 1)); + + shstr::length (v) = len; + shstr::refcnt (v) = 1; -typedef std::tr1::unordered_set HT; + memcpy ((char *) v, s, len + 1); -static HT ht; + return v; +} + +const char * + shstr::null = makevec (""); + +// what weird misoptimisation is this again? +const shstr undead_name ("undead"); const char * shstr::find (const char *s) { + if (!s) + return s; + HT::iterator i = ht.find (s); - return i != ht.end () - ? (char *)*i - : 0; + return i != ht.end ()? *i : 0; } const char * shstr::intern (const char *s) { - HT::iterator i = ht.find (s); + if (!s) + return null; - if (i != ht.end ()) - return (char *)*i; - - int len = strlen (s); - - int *v = (int *)malloc (sizeof (int) * 2 + len + 1); + if (const char *found = find (s)) + { + ++refcnt (found); + return found; + } + + s = makevec (s); + ht.insert (s); + return s; +} - v [0] = len; - v [1] = 0; +// periodically test refcounts == 0 for a few strings +// this is the ONLY thing that erases stuff from ht. keep it that way. +void +shstr::gc () +{ + static const char *curpos; - v += 2; + HT::iterator i = curpos ? ht.find (curpos) : ht.begin (); - memcpy (v, s, len + 1); + if (i == ht.end ()) + i = ht.begin (); - ht.insert ((char *)v); + // go through all strings roughly once every 4 minutes + int n = ht.size () / 256 + 16; + + for (;;) + { + if (i == ht.end ()) + { + curpos = 0; + return; + } + else if (!--n) + break; + else if (!refcnt (*i)) + { + HT::iterator o = i++; + const char *s = *o; + + ht.erase (o); + + //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s)); + g_slice_free1 (sizeof (int) * 2 + length (s) + 1, -2 + (int *) s); + } + else + ++i; + } - return (char *)v; + curpos = *i; } -// TODO: periodically test refcounts == 0 for a few strings (e.g. one hash bucket, -// exploiting the fatc that iterators stay valid for unordered_set). -void -shstr::gc () -{ -} +//TODO: this should of course not be here /* buf_overflow() - we don't want to exceed the buffer size of * buf1 by adding on buf2! Returns true if overflow will occur. */ -int +int buf_overflow (const char *buf1, const char *buf2, int bufsize) { - int len1 = 0, len2 = 0; + int len1 = 0, len2 = 0; - if (buf1) - len1 = strlen (buf1); - if (buf2) - len2 = strlen (buf2); - if ((len1 + len2) >= bufsize) - return 1; - return 0; + if (buf1) + len1 = strlen (buf1); + if (buf2) + len2 = strlen (buf2); + if ((len1 + len2) >= bufsize) + return 1; + return 0; } -