server/common/shstr.C

/*
 * shstr.C
 */

#include <cstring>
#include <cstdlib>

#include <tr1/unordered_set>

#include "shstr.h"

struct hash
{
  std::size_t operator ()(const char *s) const
  {
    unsigned long hash = 0;
    unsigned int i = 0;

    /* use the one-at-a-time hash function, which supposedly is
     * better than the djb2-like one used by perl5.005, but
     * certainly is better then the bug used here before.
     * see http://burtleburtle.net/bob/hash/doobs.html
     */
    while (*s)
      {
        hash += *s++;
        hash += hash << 10;
        hash ^= hash >>  6;
      }

    hash += hash <<  3;
    hash ^= hash >> 11;
    hash += hash << 15;

    return hash;
  }
};

struct equal
{
  bool operator ()(const char *a, const char *b) const
  {
    return !strcmp (a, b);
  }
};

typedef std::tr1::unordered_set<const char *, hash, equal> HT;

static HT ht;

static const char *makevec (const char *s)
{
  int len = strlen (s);

  const char *v = (const char *)(2 + (int *)malloc (sizeof (int) * 2 + len + 1));

  shstr::length (v) = len;
  shstr::refcnt (v) = 1;

  memcpy ((char *)v, s, len + 1);

  return v;
}

const char *shstr::null = makevec ("<nil>");

// what weird misoptimisation is this again?
const shstr undead_name ("undead");

const char *
shstr::find (const char *s)
{
  if (!s)
    return s;

  HT::iterator i = ht.find (s);

  return i != ht.end ()
    ? *i
    : 0;
}

const char *
shstr::intern (const char *s)
{
  if (!s)
    return null;

  if (const char *found = find (s))
    {
      ++refcnt (found);
      return found;
    }

  s = makevec (s);
  ht.insert (s);
  return s;
}

// periodically test refcounts == 0 for a few strings
// this is the ONLY thing that erases stuff from ht. keep it that way.
void
shstr::gc ()
{
  static const char *curpos;

  HT::iterator i = curpos ? ht.find (curpos) : ht.begin ();

  if (i == ht.end ())
    i = ht.begin ();

  // go through all strings roughly once every 4 minutes
  int n = ht.size () / 256 + 16;

  for (;;)
    {
      if (i == ht.end ())
        {
          curpos = 0;
          return;
        }
      else if (!--n)
        break;
      else if (!refcnt (*i))
        {
          HT::iterator o = i++;
          const char *s = *o;
          ht.erase (o);

          //printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s));
          free (-2 + (int *)s);
        }
      else
        ++i;
    }

  curpos = *i;
}

//TODO: this should of course not be here
/* buf_overflow() - we don't want to exceed the buffer size of 
 * buf1 by adding on buf2! Returns true if overflow will occur.
 */

int 
buf_overflow (const char *buf1, const char *buf2, int bufsize)
{
    int len1 = 0, len2 = 0;

    if (buf1)
        len1 = strlen (buf1);
    if (buf2)
        len2 = strlen (buf2);
    if ((len1 + len2) >= bufsize)
        return 1;
    return 0;
}

Revision:	1.8
Committed:	Mon Sep 4 11:07:59 2006 UTC (17 years, 10 months ago) by root
Content type:	text/plain
Branch:	MAIN
Changes since 1.7:	+4 -1 lines
Log Message:	Changes... - alternative shstr representation, saves code - use glibs splice memory allocator (seems slower) - use simpler memory/lifetime management for objects, no recycling
#	User	Rev	Content
1	elmex	1.1	/*
2	root	1.3	* shstr.C
3	elmex	1.1	*/
4
5	root	1.3	#include <cstring>
6			#include <cstdlib>
7
8			#include <tr1/unordered_set>
9	elmex	1.1
10			#include "shstr.h"
11
12	root	1.3	struct hash
13			{
14			std::size_t operator ()(const char *s) const
15			{
16	elmex	1.1	unsigned long hash = 0;
17	root	1.3	unsigned int i = 0;
18	elmex	1.1
19	root	1.3	/* use the one-at-a-time hash function, which supposedly is
20			* better than the djb2-like one used by perl5.005, but
21			* certainly is better then the bug used here before.
22			* see http://burtleburtle.net/bob/hash/doobs.html
23	elmex	1.1	*/
24	root	1.3	while (*s)
25			{
26			hash += *s++;
27			hash += hash << 10;
28			hash ^= hash >> 6;
29			}
30
31			hash += hash << 3;
32			hash ^= hash >> 11;
33			hash += hash << 15;
34
35			return hash;
36			}
37			};
38	elmex	1.1
39	root	1.3	struct equal
40			{
41			bool operator ()(const char a, const char b) const
42			{
43			return !strcmp (a, b);
44			}
45			};
46	elmex	1.1
47	root	1.3	typedef std::tr1::unordered_set<const char *, hash, equal> HT;
48	elmex	1.1
49	root	1.3	static HT ht;
50	elmex	1.1
51	root	1.7	static const char makevec (const char s)
52			{
53			int len = strlen (s);
54
55			const char v = (const char )(2 + (int )malloc (sizeof (int) 2 + len + 1));
56
57			shstr::length (v) = len;
58			shstr::refcnt (v) = 1;
59
60			memcpy ((char *)v, s, len + 1);
61
62	root	1.8	return v;
63	root	1.7	}
64
65			const char *shstr::null = makevec ("<nil>");
66
67	root	1.8	// what weird misoptimisation is this again?
68			const shstr undead_name ("undead");
69
70	elmex	1.1	const char *
71	root	1.3	shstr::find (const char *s)
72			{
73	root	1.4	if (!s)
74			return s;
75
76	root	1.3	HT::iterator i = ht.find (s);
77	elmex	1.1
78	root	1.3	return i != ht.end ()
79	root	1.5	? *i
80	root	1.3	: 0;
81	elmex	1.1	}
82
83			const char *
84	root	1.3	shstr::intern (const char *s)
85			{
86	root	1.4	if (!s)
87	root	1.7	return null;
88	elmex	1.1
89	root	1.4	if (const char *found = find (s))
90	root	1.5	{
91			++refcnt (found);
92			return found;
93			}
94	elmex	1.1
95	root	1.7	s = makevec (s);
96			ht.insert (s);
97			return s;
98	elmex	1.1	}
99
100	root	1.5	// periodically test refcounts == 0 for a few strings
101			// this is the ONLY thing that erases stuff from ht. keep it that way.
102	elmex	1.1	void
103	root	1.3	shstr::gc ()
104			{
105	root	1.5	static const char *curpos;
106
107			HT::iterator i = curpos ? ht.find (curpos) : ht.begin ();
108
109			if (i == ht.end ())
110			i = ht.begin ();
111
112			// go through all strings roughly once every 4 minutes
113	root	1.6	int n = ht.size () / 256 + 16;
114
115			for (;;)
116	root	1.5	{
117			if (i == ht.end ())
118			{
119			curpos = 0;
120			return;
121			}
122	root	1.6	else if (!--n)
123			break;
124			else if (!refcnt (*i))
125	root	1.5	{
126			HT::iterator o = i++;
127			const char s = o;
128			ht.erase (o);
129
130			//printf ("GC %4d %3d %d >%s<%d\n", (int)ht.size (), n, shstr::refcnt (s), s, shstr::length (s));
131			free (-2 + (int *)s);
132			}
133			else
134			++i;
135			}
136
137			curpos = *i;
138	elmex	1.1	}
139
140	root	1.5	//TODO: this should of course not be here
141	elmex	1.1	/* buf_overflow() - we don't want to exceed the buffer size of
142			* buf1 by adding on buf2! Returns true if overflow will occur.
143			*/
144
145			int
146			buf_overflow (const char buf1, const char buf2, int bufsize)
147			{
148	root	1.3	int len1 = 0, len2 = 0;
149	elmex	1.1
150			if (buf1)
151	root	1.2	len1 = strlen (buf1);
152	elmex	1.1	if (buf2)
153	root	1.2	len2 = strlen (buf2);
154	elmex	1.1	if ((len1 + len2) >= bufsize)
155	root	1.2	return 1;
156	elmex	1.1	return 0;
157			}
158