--- deliantra/server/include/util.h	2007/01/19 22:47:57	1.35
+++ deliantra/server/include/util.h	2012/11/12 02:39:51	1.122
@@ -1,11 +1,35 @@
+/*
+ * This file is part of Deliantra, the Roguelike Realtime MMORPG.
+ *
+ * Copyright (©) 2005,2006,2007,2008,2009,2010,2011,2012 Marc Alexander Lehmann / Robin Redeker / the Deliantra team
+ *
+ * Deliantra is free software: you can redistribute it and/or modify it under
+ * the terms of the Affero GNU General Public License as published by the
+ * Free Software Foundation, either version 3 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the Affero GNU General Public License
+ * and the GNU General Public License along with this program. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * The authors can be reached via e-mail to <support@deliantra.net>
+ */
+
 #ifndef UTIL_H__
 #define UTIL_H__
 
-#if __GNUC__ >= 3
-# define is_constant(c) __builtin_constant_p (c)
-#else
-# define is_constant(c) 0
-#endif
+#include <compiler.h>
+
+#define DEBUG_POISON 0x00 // poison memory before freeing it if != 0
+#define DEBUG_SALLOC  0   // add a debug wrapper around all sallocs
+#define PREFER_MALLOC 0   // use malloc and not the slice allocator
+
+#include <pthread.h>
 
 #include <cstddef>
 #include <cmath>
@@ -17,15 +41,39 @@
 #include <shstr.h>
 #include <traits.h>
 
-// use a gcc extension for auto declarations until ISO C++ sanctifies them
-#define AUTODECL(var,expr) typeof(expr) var = (expr)
+#if DEBUG_SALLOC
+# define g_slice_alloc0(s) debug_slice_alloc0(s)
+# define g_slice_alloc(s) debug_slice_alloc(s)
+# define g_slice_free1(s,p) debug_slice_free1(s,p)
+void *g_slice_alloc (unsigned long size);
+void *g_slice_alloc0 (unsigned long size);
+void g_slice_free1 (unsigned long size, void *ptr);
+#elif PREFER_MALLOC
+# define g_slice_alloc0(s) calloc (1, (s))
+# define g_slice_alloc(s) malloc ((s))
+# define g_slice_free1(s,p) free ((p))
+#endif
+
+// use C0X decltype for auto declarations until ISO C++ sanctifies them (if ever)
+#define auto(var,expr) decltype(expr) var = (expr)
 
-// very ugly macro that basicaly declares and initialises a variable
+#if cplusplus_does_not_suck
+// does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm)
+template<typename T, int N>
+static inline int array_length (const T (&arr)[N])
+{
+  return N;
+}
+#else
+#define array_length(name) (sizeof (name) / sizeof (name [0]))
+#endif
+
+// very ugly macro that basically declares and initialises a variable
 // that is in scope for the next statement only
 // works only for stuff that can be assigned 0 and converts to false
 // (note: works great for pointers)
 // most ugly macro I ever wrote
-#define declvar(type, name, value) if (type name = 0) { } else if (((name) = (value)), 1)
+#define statementvar(type, name, value) if (type name = 0) { } else if (((name) = (value)), 1)
 
 // in range including end
 #define IN_RANGE_INC(val,beg,end) \
@@ -35,17 +83,135 @@
 #define IN_RANGE_EXC(val,beg,end) \
   ((unsigned int)(val) - (unsigned int)(beg) <  (unsigned int)(end) - (unsigned int)(beg))
 
+void cleanup (const char *cause, bool make_core = false);
 void fork_abort (const char *msg);
 
 // rationale for using (U) not (T) is to reduce signed/unsigned issues,
 // as a is often a constant while b is the variable. it is still a bug, though.
-template<typename T, typename U> static inline T min (T a, U b) { return (U)a < b ? (U)a : b; }
-template<typename T, typename U> static inline T max (T a, U b) { return (U)a > b ? (U)a : b; }
+template<typename T, typename U> static inline T min (T a, U b) { return a < (T)b ? a : (T)b; }
+template<typename T, typename U> static inline T max (T a, U b) { return a > (T)b ? a : (T)b; }
 template<typename T, typename U, typename V> static inline T clamp (T v, U a, V b) { return v < (T)a ? (T)a : v >(T)b ? (T)b : v; }
 
+template<typename T, typename U> static inline void min_it (T &v, U m) { v = min (v, (T)m); }
+template<typename T, typename U> static inline void max_it (T &v, U m) { v = max (v, (T)m); }
+template<typename T, typename U, typename V> static inline void clamp_it (T &v, U a, V b) { v = clamp (v, (T)a, (T)b); }
+
 template<typename T, typename U> static inline void swap (T& a, U& b) { T t=a; a=(T)b; b=(U)t; }
 
-// this is much faster than crossfires original algorithm
+template<typename T, typename U, typename V> static inline T min (T a, U b, V c) { return min (a, min (b, c)); }
+template<typename T, typename U, typename V> static inline T max (T a, U b, V c) { return max (a, max (b, c)); }
+
+// sign returns -1 or +1
+template<typename T>
+static inline T sign (T v) { return v < 0 ? -1 : +1; }
+// relies on 2c representation
+template<>
+inline sint8  sign (sint8  v) { return 1 - (sint8  (uint8  (v) >>  7) * 2); }
+template<>
+inline sint16 sign (sint16 v) { return 1 - (sint16 (uint16 (v) >> 15) * 2); }
+template<>
+inline sint32 sign (sint32 v) { return 1 - (sint32 (uint32 (v) >> 31) * 2); }
+
+// sign0 returns -1, 0 or +1
+template<typename T>
+static inline T sign0 (T v) { return v ? sign (v) : 0; }
+
+//clashes with C++0x
+template<typename T, typename U>
+static inline T copysign (T a, U b) { return a > 0 ? b : -b; }
+
+// div* only work correctly for div > 0
+// div, with correct rounding (< 0.5 downwards, >=0.5 upwards)
+template<typename T> static inline T div    (T val, T div)
+{
+  return expect_false (val < 0) ? - ((-val + (div - 1) / 2) / div) : (val + div / 2) / div;
+}
+
+template<> inline float  div (float  val, float  div) { return val / div; }
+template<> inline double div (double val, double div) { return val / div; }
+
+// div, round-up
+template<typename T> static inline T div_ru (T val, T div)
+{
+  return expect_false (val < 0) ? - ((-val                ) / div) : (val + div - 1) / div;
+}
+// div, round-down
+template<typename T> static inline T div_rd (T val, T div)
+{
+  return expect_false (val < 0) ? - ((-val + (div - 1)    ) / div) : (val          ) / div;
+}
+
+// lerp* only work correctly for min_in < max_in
+// Linear intERPolate, scales val from min_in..max_in to min_out..max_out
+template<typename T>
+static inline T
+lerp (T val, T min_in, T max_in, T min_out, T max_out)
+{
+  return min_out + div   <T> ((val - min_in) * (max_out - min_out), max_in  - min_in);
+}
+
+// lerp, round-down
+template<typename T>
+static inline T
+lerp_rd (T val, T min_in, T max_in, T min_out, T max_out)
+{
+  return min_out + div_rd<T> ((val - min_in) * (max_out - min_out), max_in  - min_in);
+}
+
+// lerp, round-up
+template<typename T>
+static inline T
+lerp_ru (T val, T min_in, T max_in, T min_out, T max_out)
+{
+  return min_out + div_ru<T> ((val - min_in) * (max_out - min_out), max_in  - min_in);
+}
+
+// lots of stuff taken from FXT
+
+/* Rotate right. This is used in various places for checksumming */
+//TODO: that sucks, use a better checksum algo
+static inline uint32_t
+rotate_right (uint32_t c, uint32_t count = 1)
+{
+  return (c << (32 - count)) | (c >> count);
+}
+
+static inline uint32_t
+rotate_left (uint32_t c, uint32_t count = 1)
+{
+  return (c >> (32 - count)) | (c << count);
+}
+
+// Return abs(a-b)
+// Both a and b must not have the most significant bit set
+static inline uint32_t
+upos_abs_diff (uint32_t a, uint32_t b)
+{
+  long d1 = b - a;
+  long d2 = (d1 & (d1 >> 31)) << 1;
+
+  return d1 - d2;               // == (b - d) - (a + d);
+}
+
+// Both a and b must not have the most significant bit set
+static inline uint32_t
+upos_min (uint32_t a, uint32_t b)
+{
+  int32_t d = b - a;
+  d &= d >> 31;
+  return a + d;
+}
+
+// Both a and b must not have the most significant bit set
+static inline uint32_t
+upos_max (uint32_t a, uint32_t b)
+{
+  int32_t d = b - a;
+  d &= d >> 31;
+  return b - d;
+}
+
+// this is much faster than crossfire's original algorithm
 // on modern cpus
 inline int
 isqrt (int n)
@@ -53,15 +219,29 @@
   return (int)sqrtf ((float)n);
 }
 
+// this is kind of like the ^^ operator, if it would exist, without sequence point.
+// more handy than it looks like, due to the implicit !! done on its arguments
+inline bool
+logical_xor (bool a, bool b)
+{
+  return a != b;
+}
+
+inline bool
+logical_implies (bool a, bool b)
+{
+  return a <= b;
+}
+
 // this is only twice as fast as naive sqrtf (dx*dy+dy*dy)
 #if 0
 // and has a max. error of 6 in the range -100..+100.
 #else
 // and has a max. error of 9 in the range -100..+100.
 #endif
-inline int 
+inline int
 idistance (int dx, int dy)
-{ 
+{
   unsigned int dx_ = abs (dx);
   unsigned int dy_ = abs (dy);
 
@@ -74,6 +254,26 @@
 #endif
 }
 
+// can be substantially faster than floor, if your value range allows for it
+template<typename T>
+inline T
+fastfloor (T x)
+{
+   return std::floor (x);
+}
+
+inline float
+fastfloor (float x)
+{
+  return sint32(x) - (x < 0);
+}
+
+inline double
+fastfloor (double x)
+{
+  return sint64(x) - (x < 0);
+}
+
 /*
  * absdir(int): Returns a number between 1 and 8, which represent
  * the "absolute" direction of a number (it actually takes care of
@@ -85,6 +285,58 @@
   return ((d - 1) & 7) + 1;
 }
 
+// avoid ctz name because netbsd or freebsd spams it's namespace with it
+#if GCC_VERSION(3,4)
+static inline int least_significant_bit (uint32_t x)
+{
+  return __builtin_ctz (x);
+}
+#else
+int least_significant_bit (uint32_t x);
+#endif
+
+#define for_all_bits_sparse_32(mask, idxvar)      \
+  for (uint32_t idxvar, mask_ = mask;   \
+       mask_ && ((idxvar = least_significant_bit (mask_)), mask_ &= ~(1 << idxvar), 1);)
+
+extern ssize_t slice_alloc; // statistics
+
+void *salloc_ (int n)            throw (std::bad_alloc);
+void *salloc_ (int n, void *src) throw (std::bad_alloc);
+
+// strictly the same as g_slice_alloc, but never returns 0
+template<typename T>
+inline T *salloc (int n = 1)     throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T));              }
+
+// also copies src into the new area, like "memdup"
+// if src is 0, clears the memory
+template<typename T>
+inline T *salloc (int n, T *src) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), (void *)src); }
+
+// clears the memory
+template<typename T>
+inline T *salloc0(int n = 1)     throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), 0);           }
+
+// for symmetry
+template<typename T>
+inline void sfree (T *ptr, int n = 1) throw ()
+{
+  if (expect_true (ptr))
+    {
+      slice_alloc -= n * sizeof (T);
+      if (DEBUG_POISON) memset (ptr, DEBUG_POISON, n * sizeof (T));
+      g_slice_free1 (n * sizeof (T), (void *)ptr);
+    }
+}
+
+// nulls the pointer
+template<typename T>
+inline void sfree0 (T *&ptr, int n = 1) throw ()
+{
+  sfree<T> (ptr, n);
+  ptr = 0;
+}
+
 // makes dynamically allocated objects zero-initialised
 struct zero_initialised
 {
@@ -96,47 +348,53 @@
 
   void *operator new (size_t s)
   {
-    return g_slice_alloc0 (s);
+    return salloc0<char> (s);
   }
 
   void *operator new[] (size_t s)
   {
-    return g_slice_alloc0 (s);
+    return salloc0<char> (s);
   }
 
   void operator delete (void *p, size_t s)
   {
-    g_slice_free1 (s, p);
+    sfree ((char *)p, s);
   }
 
   void operator delete[] (void *p, size_t s)
   {
-    g_slice_free1 (s, p);
+    sfree ((char *)p, s);
   }
 };
 
-void *salloc_ (int n)            throw (std::bad_alloc);
-void *salloc_ (int n, void *src) throw (std::bad_alloc);
+// makes dynamically allocated objects zero-initialised
+struct slice_allocated
+{
+  void *operator new (size_t s, void *p)
+  {
+    return p;
+  }
 
-// strictly the same as g_slice_alloc, but never returns 0
-template<typename T>
-inline T *salloc (int n = 1)     throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T));              }
+  void *operator new (size_t s)
+  {
+    return salloc<char> (s);
+  }
 
-// also copies src into the new area, like "memdup"
-// if src is 0, clears the memory
-template<typename T>
-inline T *salloc (int n, T *src) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), (void *)src); }
+  void *operator new[] (size_t s)
+  {
+    return salloc<char> (s);
+  }
 
-// clears the memory
-template<typename T>
-inline T *salloc0(int n = 1)     throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), 0);           }
+  void operator delete (void *p, size_t s)
+  {
+    sfree ((char *)p, s);
+  }
 
-// for symmetry
-template<typename T>
-inline void sfree (T *ptr, int n = 1) throw ()
-{
-  g_slice_free1 (n * sizeof (T), (void *)ptr);
-}
+  void operator delete[] (void *p, size_t s)
+  {
+    sfree ((char *)p, s);
+  }
+};
 
 // a STL-compatible allocator that uses g_slice
 // boy, this is verbose
@@ -151,14 +409,14 @@
   typedef const Tp &const_reference;
   typedef Tp value_type;
 
-  template <class U> 
+  template <class U>
   struct rebind
   {
     typedef slice_allocator<U> other;
   };
 
   slice_allocator () throw () { }
-  slice_allocator (const slice_allocator &o) throw () { }
+  slice_allocator (const slice_allocator &) throw () { }
   template<typename Tp2>
   slice_allocator (const slice_allocator<Tp2> &) throw () { }
 
@@ -177,7 +435,7 @@
     sfree<Tp> (p, n);
   }
 
-  size_type max_size ()const throw ()
+  size_type max_size () const throw ()
   {
     return size_t (-1) / sizeof (Tp);
   }
@@ -193,82 +451,131 @@
   }
 };
 
-// P. L'Ecuyer, “Maximally Equidistributed Combined Tausworthe Generators”, Mathematics of Computation, 65, 213 (1996), 203–213.
-// http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
-// http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
-struct tausworthe_random_generator
+// basically a memory area, but refcounted
+struct refcnt_buf
 {
-  // generator
-  uint32_t state [4];
+  char *data;
+
+  refcnt_buf (size_t size = 0);
+  refcnt_buf (void *data, size_t size);
 
-  void operator =(const tausworthe_random_generator &src)
+  refcnt_buf (const refcnt_buf &src)
   {
-    state [0] = src.state [0];
-    state [1] = src.state [1];
-    state [2] = src.state [2];
-    state [3] = src.state [3];
+    data = src.data;
+    inc ();
   }
 
-  void seed (uint32_t seed);
-  uint32_t next ();
+  ~refcnt_buf ();
 
-  // uniform distribution
-  uint32_t operator ()(uint32_t r_max)
+  refcnt_buf &operator =(const refcnt_buf &src);
+
+  operator char *()
+  {
+    return data;
+  }
+
+  size_t size () const
   {
-    return is_constant (r_max)
-             ? this->next () % r_max
-             : get_range (r_max);
+    return _size ();
   }
 
-  // return a number within (min .. max)
-  int operator () (int r_min, int r_max)
+protected:
+  enum {
+    overhead = sizeof (uint32_t) * 2
+  };
+
+  uint32_t &_size () const
   {
-    return is_constant (r_min) && is_constant (r_max)
-              ? r_min + (*this) (max (r_max - r_min + 1, 1))
-              : get_range (r_min, r_max);
+    return ((unsigned int *)data)[-2];
   }
 
-  double operator ()()
+  uint32_t &_refcnt () const
   {
-    return this->next () / (double)0xFFFFFFFFU;
+    return ((unsigned int *)data)[-1];
   }
 
-protected:
-  uint32_t get_range (uint32_t r_max);
-  int get_range (int r_min, int r_max);
+  void _alloc (uint32_t size)
+  {
+    data = ((char *)salloc<char> (size + overhead)) + overhead;
+    _size   () = size;
+    _refcnt () = 1;
+  }
+
+  void _dealloc ();
+
+  void inc ()
+  {
+    ++_refcnt ();
+  }
+
+  void dec ()
+  {
+    if (!--_refcnt ())
+      _dealloc ();
+  }
 };
 
-typedef tausworthe_random_generator rand_gen;
+INTERFACE_CLASS (attachable)
+struct refcnt_base
+{
+  typedef int refcnt_t;
+  mutable refcnt_t ACC (RW, refcnt);
+
+  MTH void refcnt_inc () const { ++refcnt; }
+  MTH void refcnt_dec () const { --refcnt; }
+
+  refcnt_base () : refcnt (0) { }
+};
 
-extern rand_gen rndm;
+// to avoid branches with more advanced compilers
+extern refcnt_base::refcnt_t refcnt_dummy;
 
 template<class T>
 struct refptr
 {
+  // p if not null
+  refcnt_base::refcnt_t *refcnt_ref () { return p ? &p->refcnt : &refcnt_dummy; }
+
+  void refcnt_dec ()
+  {
+    if (!is_constant (p))
+      --*refcnt_ref ();
+    else if (p)
+      --p->refcnt;
+  }
+
+  void refcnt_inc ()
+  {
+    if (!is_constant (p))
+      ++*refcnt_ref ();
+    else if (p)
+      ++p->refcnt;
+  }
+
   T *p;
 
   refptr () : p(0) { }
-  refptr (const refptr<T> &p) : p(p.p) { if (p) p->refcnt_inc (); }
-  refptr (T *p) : p(p) { if (p) p->refcnt_inc (); }
-  ~refptr () { if (p) p->refcnt_dec (); }
+  refptr (const refptr<T> &p) : p(p.p) { refcnt_inc (); }
+  refptr (T *p) : p(p) { refcnt_inc (); }
+  ~refptr () { refcnt_dec (); }
 
   const refptr<T> &operator =(T *o)
   {
-    if (p) p->refcnt_dec ();
+    // if decrementing ever destroys we need to reverse the order here
+    refcnt_dec ();
     p = o;
-    if (p) p->refcnt_inc ();
-
+    refcnt_inc ();
     return *this;
   }
 
-  const refptr<T> &operator =(const refptr<T> o)
+  const refptr<T> &operator =(const refptr<T> &o)
   {
     *this = o.p;
     return *this;
   }
 
   T &operator * () const { return *p; }
-  T *operator ->() const { return p; }
+  T *operator ->() const { return  p; }
 
   operator T *() const { return p; }
 };
@@ -278,30 +585,48 @@
 typedef refptr<archetype> arch_ptr;
 typedef refptr<client>    client_ptr;
 typedef refptr<player>    player_ptr;
+typedef refptr<region>    region_ptr;
+
+#define STRHSH_NULL 2166136261
+
+static inline uint32_t
+strhsh (const char *s)
+{
+  // use FNV-1a hash (http://isthe.com/chongo/tech/comp/fnv/)
+  // it is about twice as fast as the one-at-a-time one,
+  // with good distribution.
+  // FNV-1a is faster on many cpus because the multiplication
+  // runs concurrently with the looping logic.
+  // we modify the hash a bit to improve its distribution
+  uint32_t hash = STRHSH_NULL;
+
+  while (*s)
+    hash = (hash ^ *s++) * 16777619U;
+
+  return hash ^ (hash >> 16);
+}
+
+static inline uint32_t
+memhsh (const char *s, size_t len)
+{
+  uint32_t hash = STRHSH_NULL;
+
+  while (len--)
+    hash = (hash ^ *s++) * 16777619U;
+
+  return hash;
+}
 
 struct str_hash
 {
   std::size_t operator ()(const char *s) const
   {
-    unsigned long hash = 0;
-
-    /* use the one-at-a-time hash function, which supposedly is
-     * better than the djb2-like one used by perl5.005, but
-     * certainly is better then the bug used here before.
-     * see http://burtleburtle.net/bob/hash/doobs.html
-     */
-    while (*s)
-      {
-        hash += *s++;
-        hash += hash << 10;
-        hash ^= hash >>  6;
-      }
-
-    hash += hash <<  3;
-    hash ^= hash >> 11;
-    hash += hash << 15;
+    return strhsh (s);
+  }
 
-    return hash;
+  std::size_t operator ()(const shstr &s) const
+  {
+    return strhsh (s);
   }
 };
 
@@ -313,6 +638,10 @@
   }
 };
 
+// Mostly the same as std::vector, but insert/erase can reorder
+// the elements, making append(=insert)/remove O(1) instead of O(n).
+//
+// NOTE: only some forms of erase are available
 template<class T>
 struct unordered_vector : std::vector<T, slice_allocator<T> >
 {
@@ -332,14 +661,46 @@
   }
 };
 
-template<class T, int T::* index>
+// This container blends advantages of linked lists
+// (efficiency) with vectors (random access) by
+// using an unordered vector and storing the vector
+// index inside the object.
+//
+// + memory-efficient on most 64 bit archs
+// + O(1) insert/remove
+// + free unique (but varying) id for inserted objects
+// + cache-friendly iteration
+// - only works for pointers to structs
+//
+// NOTE: only some forms of erase/insert are available
+typedef int object_vector_index;
+
+template<class T, object_vector_index T::*indexmember>
 struct object_vector : std::vector<T *, slice_allocator<T *> >
 {
+  typedef typename object_vector::iterator iterator;
+
+  bool contains (const T *obj) const
+  {
+    return obj->*indexmember;
+  }
+
+  iterator find (const T *obj)
+  {
+    return obj->*indexmember
+      ? this->begin () + obj->*indexmember - 1
+      : this->end ();
+  }
+
+  void push_back (T *obj)
+  {
+    std::vector<T *, slice_allocator<T *> >::push_back (obj);
+    obj->*indexmember = this->size ();
+  }
+
   void insert (T *obj)
   {
-    assert (!(obj->*index));
     push_back (obj);
-    obj->*index = this->size ();
   }
 
   void insert (T &obj)
@@ -349,14 +710,13 @@
 
   void erase (T *obj)
   {
-    assert (obj->*index);
-    int pos = obj->*index;
-    obj->*index = 0;
+    object_vector_index pos = obj->*indexmember;
+    obj->*indexmember = 0;
 
     if (pos < this->size ())
       {
         (*this)[pos - 1] = (*this)[this->size () - 1];
-        (*this)[pos - 1]->*index = pos;
+        (*this)[pos - 1]->*indexmember = pos;
       }
 
     this->pop_back ();
@@ -364,26 +724,147 @@
 
   void erase (T &obj)
   {
-    errase (&obj);
+    erase (&obj);
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////////
+
+// something like a vector or stack, but without
+// out of bounds checking
+template<typename T>
+struct fixed_stack
+{
+  T *data;
+  int size;
+  int max;
+
+  fixed_stack ()
+  : size (0), data (0)
+  {
+  }
+
+  fixed_stack (int max)
+  : size (0), max (max)
+  {
+    data = salloc<T> (max);
+  }
+
+  void reset (int new_max)
+  {
+    sfree (data, max);
+    size = 0;
+    max = new_max;
+    data = salloc<T> (max);
+  }
+
+  void free ()
+  {
+    sfree (data, max);
+    data = 0;
+  }
+
+  ~fixed_stack ()
+  {
+    sfree (data, max);
+  }
+
+  T &operator[](int idx)
+  {
+    return data [idx];
+  }
+
+  void push (T v)
+  {
+    data [size++] = v;
+  }
+
+  T &pop ()
+  {
+    return data [--size];
+  }
+
+  T remove (int idx)
+  {
+    T v = data [idx];
+
+    data [idx] = data [--size];
+
+    return v;
   }
 };
 
+/////////////////////////////////////////////////////////////////////////////
+
 // basically does what strncpy should do, but appends "..." to strings exceeding length
-void assign (char *dst, const char *src, int maxlen);
+// returns the number of bytes actually used (including \0)
+int assign (char *dst, const char *src, int maxsize);
 
 // type-safe version of assign
 template<int N>
-inline void assign (char (&dst)[N], const char *src)
+inline int assign (char (&dst)[N], const char *src)
 {
-  assign ((char *)&dst, src, N);
+  return assign ((char *)&dst, src, N);
 }
 
 typedef double tstamp;
 
-// return current time as timestampe
+// return current time as timestamp
 tstamp now ();
 
 int similar_direction (int a, int b);
 
+// like v?sprintf, but returns a "static" buffer
+char *vformat (const char *format, va_list ap);
+char *format (const char *format, ...) attribute ((format (printf, 1, 2)));
+
+// safety-check player input which will become object->msg
+bool msg_is_safe (const char *msg);
+
+/////////////////////////////////////////////////////////////////////////////
+// threads, very very thin wrappers around pthreads
+
+struct thread
+{
+  pthread_t id;
+
+  void start (void *(*start_routine)(void *), void *arg = 0);
+
+  void cancel ()
+  {
+    pthread_cancel (id);
+  }
+
+  void *join ()
+  {
+    void *ret;
+
+    if (pthread_join (id, &ret))
+      cleanup ("pthread_join failed", 1);
+
+    return ret;
+  }
+};
+
+// note that mutexes are not classes
+typedef pthread_mutex_t smutex;
+
+#if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)
+ #define SMUTEX_INITIALISER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#else
+ #define SMUTEX_INITIALISER PTHREAD_MUTEX_INITIALIZER
+#endif
+
+#define SMUTEX(name) smutex name = SMUTEX_INITIALISER
+#define SMUTEX_LOCK(name)   pthread_mutex_lock   (&(name))
+#define SMUTEX_UNLOCK(name) pthread_mutex_unlock (&(name))
+
+typedef pthread_cond_t scond;
+
+#define SCOND(name) scond name = PTHREAD_COND_INITIALIZER
+#define SCOND_SIGNAL(name)     pthread_cond_signal    (&(name))
+#define SCOND_BROADCAST(name)  pthread_cond_broadcast (&(name))
+#define SCOND_WAIT(name,mutex) pthread_cond_wait      (&(name), &(mutex))
+
 #endif