server/common/shstr.C

/*
 * shstr.C
 */

#include <cstring>
#include <cstdlib>

#include <tr1/unordered_set>

#include "shstr.h"

// NOTE: even with lots of stuff loaded, we do not usually have >>20000 strings.
// maybe refcounting is just overhead?

struct hash
{
  std::size_t operator ()(const char *s) const
  {
    unsigned long hash = 0;
    unsigned int i = 0;

    /* use the one-at-a-time hash function, which supposedly is
     * better than the djb2-like one used by perl5.005, but
     * certainly is better then the bug used here before.
     * see http://burtleburtle.net/bob/hash/doobs.html
     */
    while (*s)
      {
        hash += *s++;
        hash += hash << 10;
        hash ^= hash >>  6;
      }

    hash += hash <<  3;
    hash ^= hash >> 11;
    hash += hash << 15;

    return hash;
  }
};

struct equal
{
  bool operator ()(const char *a, const char *b) const
  {
    return !strcmp (a, b);
  }
};

typedef std::tr1::unordered_set<const char *, hash, equal> HT;

static HT ht;

const char *
shstr::find (const char *s)
{
  if (!s)
    return s;

  HT::iterator i = ht.find (s);

  return i != ht.end ()
    ? *i
    : 0;
}

const char *
shstr::intern (const char *s)
{
  if (!s)
    return s;

  if (const char *found = find (s))
    {
      ++refcnt (found);
      return found;
    }

  int len = strlen (s);

  const char *v = (const char *)(2 + (int *)malloc (sizeof (int) * 2 + len + 1));

  length (v) = len;
  refcnt (v) = 1;

  memcpy ((char *)v, s, len + 1);

  ht.insert (v);

  return v;
}

// periodically test refcounts == 0 for a few strings
// this is the ONLY thing that erases stuff from ht. keep it that way.
void
shstr::gc ()
{
  static const char *curpos;

  HT::iterator i = curpos ? ht.find (curpos) : ht.begin ();

  if (i == ht.end ())
    i = ht.begin ();

  // go through all strings roughly once every 4 minutes
  for (int n = ht.size () / 256 + 16; --n; )
    {
      if (i == ht.end ())
        {
          curpos = 0;
          return;
        }

      if (!refcnt (*i))
        {
          HT::iterator o = i++;
          const char *s = *o;
          ht.erase (o);

          //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s));
          free (-2 + (int *)s);
        }
      else
        ++i;
    }

  curpos = *i;
}

//TODO: this should of course not be here
/* buf_overflow() - we don't want to exceed the buffer size of 
 * buf1 by adding on buf2! Returns true if overflow will occur.
 */

int 
buf_overflow (const char *buf1, const char *buf2, int bufsize)
{
    int len1 = 0, len2 = 0;

    if (buf1)
        len1 = strlen (buf1);
    if (buf2)
        len2 = strlen (buf2);
    if ((len1 + len2) >= bufsize)
        return 1;
    return 0;
}

Revision:	1.5
Committed:	Sun Sep 3 09:00:05 2006 UTC (17 years, 10 months ago) by root
Content type:	text/plain
Branch:	MAIN
Changes since 1.4:	+47 -12 lines
Log Message:	everything seems to work so far
#	Content
1	/*
2	* shstr.C
3	*/
4
5	#include <cstring>
6	#include <cstdlib>
7
8	#include <tr1/unordered_set>
9
10	#include "shstr.h"
11
12	// NOTE: even with lots of stuff loaded, we do not usually have >>20000 strings.
13	// maybe refcounting is just overhead?
14
15	struct hash
16	{
17	std::size_t operator ()(const char *s) const
18	{
19	unsigned long hash = 0;
20	unsigned int i = 0;
21
22	/* use the one-at-a-time hash function, which supposedly is
23	* better than the djb2-like one used by perl5.005, but
24	* certainly is better then the bug used here before.
25	* see http://burtleburtle.net/bob/hash/doobs.html
26	*/
27	while (*s)
28	{
29	hash += *s++;
30	hash += hash << 10;
31	hash ^= hash >> 6;
32	}
33
34	hash += hash << 3;
35	hash ^= hash >> 11;
36	hash += hash << 15;
37
38	return hash;
39	}
40	};
41
42	struct equal
43	{
44	bool operator ()(const char a, const char b) const
45	{
46	return !strcmp (a, b);
47	}
48	};
49
50	typedef std::tr1::unordered_set<const char *, hash, equal> HT;
51
52	static HT ht;
53
54	const char *
55	shstr::find (const char *s)
56	{
57	if (!s)
58	return s;
59
60	HT::iterator i = ht.find (s);
61
62	return i != ht.end ()
63	? *i
64	: 0;
65	}
66
67	const char *
68	shstr::intern (const char *s)
69	{
70	if (!s)
71	return s;
72
73	if (const char *found = find (s))
74	{
75	++refcnt (found);
76	return found;
77	}
78
79	int len = strlen (s);
80
81	const char v = (const char )(2 + (int )malloc (sizeof (int) 2 + len + 1));
82
83	length (v) = len;
84	refcnt (v) = 1;
85
86	memcpy ((char *)v, s, len + 1);
87
88	ht.insert (v);
89
90	return v;
91	}
92
93	// periodically test refcounts == 0 for a few strings
94	// this is the ONLY thing that erases stuff from ht. keep it that way.
95	void
96	shstr::gc ()
97	{
98	static const char *curpos;
99
100	HT::iterator i = curpos ? ht.find (curpos) : ht.begin ();
101
102	if (i == ht.end ())
103	i = ht.begin ();
104
105	// go through all strings roughly once every 4 minutes
106	for (int n = ht.size () / 256 + 16; --n; )
107	{
108	if (i == ht.end ())
109	{
110	curpos = 0;
111	return;
112	}
113
114	if (!refcnt (*i))
115	{
116	HT::iterator o = i++;
117	const char s = o;
118	ht.erase (o);
119
120	//printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s));
121	free (-2 + (int *)s);
122	}
123	else
124	++i;
125	}
126
127	curpos = *i;
128	}
129
130	//TODO: this should of course not be here
131	/* buf_overflow() - we don't want to exceed the buffer size of
132	* buf1 by adding on buf2! Returns true if overflow will occur.
133	*/
134
135	int
136	buf_overflow (const char buf1, const char buf2, int bufsize)
137	{
138	int len1 = 0, len2 = 0;
139
140	if (buf1)
141	len1 = strlen (buf1);
142	if (buf2)
143	len2 = strlen (buf2);
144	if ((len1 + len2) >= bufsize)
145	return 1;
146	return 0;
147	}
148