server/common/shstr.C

/*
 * shstr.C
 */

#include <cstring>
#include <cstdlib>

#include <tr1/unordered_set>

#include "shstr.h"

// NOTE: even with lots of stuff loaded, we do not usually have >>20000 strings.
// maybe refcounting is just overhead?

struct hash
{
  std::size_t operator ()(const char *s) const
  {
    unsigned long hash = 0;
    unsigned int i = 0;

    /* use the one-at-a-time hash function, which supposedly is
     * better than the djb2-like one used by perl5.005, but
     * certainly is better then the bug used here before.
     * see http://burtleburtle.net/bob/hash/doobs.html
     */
    while (*s)
      {
        hash += *s++;
        hash += hash << 10;
        hash ^= hash >>  6;
      }

    hash += hash <<  3;
    hash ^= hash >> 11;
    hash += hash << 15;

    return hash;
  }
};

struct equal
{
  bool operator ()(const char *a, const char *b) const
  {
    return !strcmp (a, b);
  }
};

typedef std::tr1::unordered_set<const char *, hash, equal> HT;

static HT ht;

const char *
shstr::find (const char *s)
{
  if (!s)
    return s;

  HT::iterator i = ht.find (s);

  return i != ht.end ()
    ? *i
    : 0;
}

const char *
shstr::intern (const char *s)
{
  if (!s)
    return s;

  if (const char *found = find (s))
    {
      ++refcnt (found);
      return found;
    }

  int len = strlen (s);

  const char *v = (const char *)(2 + (int *)malloc (sizeof (int) * 2 + len + 1));

  length (v) = len;
  refcnt (v) = 1;

  memcpy ((char *)v, s, len + 1);

  ht.insert (v);

  return v;
}

// periodically test refcounts == 0 for a few strings
// this is the ONLY thing that erases stuff from ht. keep it that way.
void
shstr::gc ()
{
  static const char *curpos;

  HT::iterator i = curpos ? ht.find (curpos) : ht.begin ();

  if (i == ht.end ())
    i = ht.begin ();

  // go through all strings roughly once every 4 minutes
  int n = ht.size () / 256 + 16;

  for (;;)
    {
      if (i == ht.end ())
        {
          curpos = 0;
          return;
        }
      else if (!--n)
        break;
      else if (!refcnt (*i))
        {
          HT::iterator o = i++;
          const char *s = *o;
          ht.erase (o);

          //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s));
          free (-2 + (int *)s);
        }
      else
        ++i;
    }

  curpos = *i;
}

//TODO: this should of course not be here
/* buf_overflow() - we don't want to exceed the buffer size of 
 * buf1 by adding on buf2! Returns true if overflow will occur.
 */

int 
buf_overflow (const char *buf1, const char *buf2, int bufsize)
{
    int len1 = 0, len2 = 0;

    if (buf1)
        len1 = strlen (buf1);
    if (buf2)
        len2 = strlen (buf2);
    if ((len1 + len2) >= bufsize)
        return 1;
    return 0;
}

Revision:	1.6
Committed:	Sun Sep 3 11:37:25 2006 UTC (17 years, 10 months ago) by root
Content type:	text/plain
Branch:	MAIN
Changes since 1.5:	+6 -3 lines
Log Message:	final cleanups
#	User	Rev	Content
1	elmex	1.1	/*
2	root	1.3	* shstr.C
3	elmex	1.1	*/
4
5	root	1.3	#include <cstring>
6			#include <cstdlib>
7
8			#include <tr1/unordered_set>
9	elmex	1.1
10			#include "shstr.h"
11
12	root	1.5	// NOTE: even with lots of stuff loaded, we do not usually have >>20000 strings.
13			// maybe refcounting is just overhead?
14
15	root	1.3	struct hash
16			{
17			std::size_t operator ()(const char *s) const
18			{
19	elmex	1.1	unsigned long hash = 0;
20	root	1.3	unsigned int i = 0;
21	elmex	1.1
22	root	1.3	/* use the one-at-a-time hash function, which supposedly is
23			* better than the djb2-like one used by perl5.005, but
24			* certainly is better then the bug used here before.
25			* see http://burtleburtle.net/bob/hash/doobs.html
26	elmex	1.1	*/
27	root	1.3	while (*s)
28			{
29			hash += *s++;
30			hash += hash << 10;
31			hash ^= hash >> 6;
32			}
33
34			hash += hash << 3;
35			hash ^= hash >> 11;
36			hash += hash << 15;
37
38			return hash;
39			}
40			};
41	elmex	1.1
42	root	1.3	struct equal
43			{
44			bool operator ()(const char a, const char b) const
45			{
46			return !strcmp (a, b);
47			}
48			};
49	elmex	1.1
50	root	1.3	typedef std::tr1::unordered_set<const char *, hash, equal> HT;
51	elmex	1.1
52	root	1.3	static HT ht;
53	elmex	1.1
54			const char *
55	root	1.3	shstr::find (const char *s)
56			{
57	root	1.4	if (!s)
58			return s;
59
60	root	1.3	HT::iterator i = ht.find (s);
61	elmex	1.1
62	root	1.3	return i != ht.end ()
63	root	1.5	? *i
64	root	1.3	: 0;
65	elmex	1.1	}
66
67			const char *
68	root	1.3	shstr::intern (const char *s)
69			{
70	root	1.4	if (!s)
71			return s;
72	elmex	1.1
73	root	1.4	if (const char *found = find (s))
74	root	1.5	{
75			++refcnt (found);
76			return found;
77			}
78	elmex	1.1
79	root	1.3	int len = strlen (s);
80	elmex	1.1
81	root	1.5	const char v = (const char )(2 + (int )malloc (sizeof (int) 2 + len + 1));
82	elmex	1.1
83	root	1.5	length (v) = len;
84			refcnt (v) = 1;
85	elmex	1.1
86	root	1.5	memcpy ((char *)v, s, len + 1);
87	elmex	1.1
88	root	1.5	ht.insert (v);
89	elmex	1.1
90	root	1.5	return v;
91	elmex	1.1	}
92
93	root	1.5	// periodically test refcounts == 0 for a few strings
94			// this is the ONLY thing that erases stuff from ht. keep it that way.
95	elmex	1.1	void
96	root	1.3	shstr::gc ()
97			{
98	root	1.5	static const char *curpos;
99
100			HT::iterator i = curpos ? ht.find (curpos) : ht.begin ();
101
102			if (i == ht.end ())
103			i = ht.begin ();
104
105			// go through all strings roughly once every 4 minutes
106	root	1.6	int n = ht.size () / 256 + 16;
107
108			for (;;)
109	root	1.5	{
110			if (i == ht.end ())
111			{
112			curpos = 0;
113			return;
114			}
115	root	1.6	else if (!--n)
116			break;
117			else if (!refcnt (*i))
118	root	1.5	{
119			HT::iterator o = i++;
120			const char s = o;
121			ht.erase (o);
122
123			//printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s));
124			free (-2 + (int *)s);
125			}
126			else
127			++i;
128			}
129
130			curpos = *i;
131	elmex	1.1	}
132
133	root	1.5	//TODO: this should of course not be here
134	elmex	1.1	/* buf_overflow() - we don't want to exceed the buffer size of
135			* buf1 by adding on buf2! Returns true if overflow will occur.
136			*/
137
138			int
139			buf_overflow (const char buf1, const char buf2, int bufsize)
140			{
141	root	1.3	int len1 = 0, len2 = 0;
142	elmex	1.1
143			if (buf1)
144	root	1.2	len1 = strlen (buf1);
145	elmex	1.1	if (buf2)
146	root	1.2	len2 = strlen (buf2);
147	elmex	1.1	if ((len1 + len2) >= bufsize)
148	root	1.2	return 1;
149	elmex	1.1	return 0;
150			}
151