--- deliantra/server/include/util.h	2007/07/13 15:54:40	1.53
+++ deliantra/server/include/util.h	2018/12/20 04:40:15	1.132
@@ -1,48 +1,34 @@
 /*
- * This file is part of Crossfire TRT, the Roguelike Realtime MORPG.
- * 
- * Copyright (©) 2005,2006,2007 Marc Alexander Lehmann / Robin Redeker / the Crossfire TRT team
- * 
- * Crossfire TRT is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
+ * This file is part of Deliantra, the Roguelike Realtime MMORPG.
+ *
+ * Copyright (©) 2017,2018 Marc Alexander Lehmann / the Deliantra team
+ * Copyright (©) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016 Marc Alexander Lehmann / Robin Redeker / the Deliantra team
+ *
+ * Deliantra is free software: you can redistribute it and/or modify it under
+ * the terms of the Affero GNU General Public License as published by the
+ * Free Software Foundation, either version 3 of the License, or (at your
+ * option) any later version.
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- * 
- * The authors can be reached via e-mail to <crossfire@schmorp.de>
+ *
+ * You should have received a copy of the Affero GNU General Public License
+ * and the GNU General Public License along with this program. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * The authors can be reached via e-mail to <support@deliantra.net>
  */
 
 #ifndef UTIL_H__
 #define UTIL_H__
 
-//#define PREFER_MALLOC
+#define DEBUG_POISON 0x00 // poison memory before freeing it if != 0
+#define DEBUG_SALLOC  0   // add a debug wrapper around all sallocs
+#define PREFER_MALLOC 0   // use malloc and not the slice allocator
 
-#if __GNUC__ >= 3
-# define is_constant(c)             __builtin_constant_p (c)
-# define expect(expr,value)         __builtin_expect ((expr),(value))
-# define prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality)
-#else
-# define is_constant(c)             0
-# define expect(expr,value)         (expr)
-# define prefetch(addr,rw,locality)
-#endif
-
-#if __GNUC__ < 4 || (__GNUC__ == 4 || __GNUC_MINOR__ < 4)
-# define decltype(x) typeof(x)
-#endif
-
-// put into ifs if you are very sure that the expression
-// is mostly true or mosty false. note that these return
-// booleans, not the expression.
-#define expect_false(expr) expect ((expr) != 0, 0)
-#define expect_true(expr)  expect ((expr) != 0, 1)
+#include <pthread.h>
 
 #include <cstddef>
 #include <cmath>
@@ -51,13 +37,27 @@
 
 #include <glib.h>
 
+#include <flat_hash_map.hpp>
+
 #include <shstr.h>
 #include <traits.h>
 
-// use C0X decltype for auto declarations until ISO C++ sanctifies them (if ever)
-#define auto(var,expr) decltype(expr) var = (expr)
+#include "ecb.h"
+
+#if DEBUG_SALLOC
+# define g_slice_alloc0(s) debug_slice_alloc0(s)
+# define g_slice_alloc(s) debug_slice_alloc(s)
+# define g_slice_free1(s,p) debug_slice_free1(s,p)
+void *g_slice_alloc (unsigned long size);
+void *g_slice_alloc0 (unsigned long size);
+void g_slice_free1 (unsigned long size, void *ptr);
+#elif PREFER_MALLOC
+# define g_slice_alloc0(s) calloc (1, (s))
+# define g_slice_alloc(s) malloc ((s))
+# define g_slice_free1(s,p) free ((p))
+#endif
 
-// very ugly macro that basicaly declares and initialises a variable
+// very ugly macro that basically declares and initialises a variable
 // that is in scope for the next statement only
 // works only for stuff that can be assigned 0 and converts to false
 // (note: works great for pointers)
@@ -72,21 +72,87 @@
 #define IN_RANGE_EXC(val,beg,end) \
   ((unsigned int)(val) - (unsigned int)(beg) <  (unsigned int)(end) - (unsigned int)(beg))
 
-void fork_abort (const char *msg);
+ecb_cold void cleanup (const char *cause, bool make_core = false);
+ecb_cold void fork_abort (const char *msg);
 
 // rationale for using (U) not (T) is to reduce signed/unsigned issues,
 // as a is often a constant while b is the variable. it is still a bug, though.
-template<typename T, typename U> static inline T min (T a, U b) { return (U)a < b ? (U)a : b; }
-template<typename T, typename U> static inline T max (T a, U b) { return (U)a > b ? (U)a : b; }
+template<typename T, typename U> static inline T min (T a, U b) { return a < (T)b ? a : (T)b; }
+template<typename T, typename U> static inline T max (T a, U b) { return a > (T)b ? a : (T)b; }
 template<typename T, typename U, typename V> static inline T clamp (T v, U a, V b) { return v < (T)a ? (T)a : v >(T)b ? (T)b : v; }
 
+template<typename T, typename U> static inline void min_it (T &v, U m) { v = min (v, (T)m); }
+template<typename T, typename U> static inline void max_it (T &v, U m) { v = max (v, (T)m); }
+template<typename T, typename U, typename V> static inline void clamp_it (T &v, U a, V b) { v = clamp (v, (T)a, (T)b); }
+
 template<typename T, typename U> static inline void swap (T& a, U& b) { T t=a; a=(T)b; b=(U)t; }
 
+template<typename T, typename U, typename V> static inline T min (T a, U b, V c) { return min (a, min (b, c)); }
+template<typename T, typename U, typename V> static inline T max (T a, U b, V c) { return max (a, max (b, c)); }
+
+// sign returns -1 or +1
+template<typename T>
+static inline T sign (T v) { return v < 0 ? -1 : +1; }
+// relies on 2c representation
+template<>
+inline sint8  sign (sint8  v) { return 1 - (sint8  (uint8  (v) >>  7) * 2); }
+template<>
+inline sint16 sign (sint16 v) { return 1 - (sint16 (uint16 (v) >> 15) * 2); }
+template<>
+inline sint32 sign (sint32 v) { return 1 - (sint32 (uint32 (v) >> 31) * 2); }
+
+// sign0 returns -1, 0 or +1
+template<typename T>
+static inline T sign0 (T v) { return v ? sign (v) : 0; }
+
+//clashes with C++0x
+template<typename T, typename U>
+static inline T copysign (T a, U b) { return a > 0 ? b : -b; }
+
+// div* only work correctly for div > 0
+// div, with correct rounding (< 0.5 downwards, >=0.5 upwards)
+template<typename T> static inline T div    (T val, T div)
+{
+  return ecb_expect_false (val < 0) ? - ((-val + (div - 1) / 2) / div) : (val + div / 2) / div;
+}
+
+template<> inline float  div (float  val, float  div) { return val / div; }
+template<> inline double div (double val, double div) { return val / div; }
+
+// div, round-up
+template<typename T> static inline T div_ru (T val, T div)
+{
+  return ecb_expect_false (val < 0) ? - ((-val                ) / div) : (val + div - 1) / div;
+}
+// div, round-down
+template<typename T> static inline T div_rd (T val, T div)
+{
+  return ecb_expect_false (val < 0) ? - ((-val + (div - 1)    ) / div) : (val          ) / div;
+}
+
+// lerp* only work correctly for min_in < max_in
+// Linear intERPolate, scales val from min_in..max_in to min_out..max_out
 template<typename T>
 static inline T
 lerp (T val, T min_in, T max_in, T min_out, T max_out)
 {
-  return (val - min_in) * (max_out - min_out) / (max_in - min_in) + min_out;
+  return min_out + div   <T> ((val - min_in) * (max_out - min_out), max_in  - min_in);
+}
+
+// lerp, round-down
+template<typename T>
+static inline T
+lerp_rd (T val, T min_in, T max_in, T min_out, T max_out)
+{
+  return min_out + div_rd<T> ((val - min_in) * (max_out - min_out), max_in  - min_in);
+}
+
+// lerp, round-up
+template<typename T>
+static inline T
+lerp_ru (T val, T min_in, T max_in, T min_out, T max_out)
+{
+  return min_out + div_ru<T> ((val - min_in) * (max_out - min_out), max_in  - min_in);
 }
 
 // lots of stuff taken from FXT
@@ -134,7 +200,7 @@
   return b - d;
 }
 
-// this is much faster than crossfires original algorithm
+// this is much faster than crossfire's original algorithm
 // on modern cpus
 inline int
 isqrt (int n)
@@ -142,15 +208,29 @@
   return (int)sqrtf ((float)n);
 }
 
+// this is kind of like the ^^ operator, if it would exist, without sequence point.
+// more handy than it looks like, due to the implicit !! done on its arguments
+inline bool
+logical_xor (bool a, bool b)
+{
+  return a != b;
+}
+
+inline bool
+logical_implies (bool a, bool b)
+{
+  return a <= b;
+}
+
 // this is only twice as fast as naive sqrtf (dx*dy+dy*dy)
 #if 0
 // and has a max. error of 6 in the range -100..+100.
 #else
 // and has a max. error of 9 in the range -100..+100.
 #endif
-inline int 
+inline int
 idistance (int dx, int dy)
-{ 
+{
   unsigned int dx_ = abs (dx);
   unsigned int dy_ = abs (dy);
 
@@ -163,6 +243,26 @@
 #endif
 }
 
+// can be substantially faster than floor, if your value range allows for it
+template<typename T>
+inline T
+fastfloor (T x)
+{
+   return std::floor (x);
+}
+
+inline float
+fastfloor (float x)
+{
+  return sint32(x) - (x < 0);
+}
+
+inline double
+fastfloor (double x)
+{
+  return sint64(x) - (x < 0);
+}
+
 /*
  * absdir(int): Returns a number between 1 and 8, which represent
  * the "absolute" direction of a number (it actually takes care of
@@ -174,6 +274,48 @@
   return ((d - 1) & 7) + 1;
 }
 
+#define for_all_bits_sparse_32(mask, idxvar)      \
+  for (uint32_t idxvar, mask_ = mask;   \
+       mask_ && ((idxvar = ecb_ctz32 (mask_)), mask_ &= ~(1 << idxvar), 1);)
+
+extern ssize_t slice_alloc; // statistics
+
+void *salloc_ (int n);
+void *salloc_ (int n, void *src);
+
+// strictly the same as g_slice_alloc, but never returns 0
+template<typename T>
+inline T *salloc (int n = 1)     { return (T *)salloc_ (n * sizeof (T));              }
+
+// also copies src into the new area, like "memdup"
+// if src is 0, clears the memory
+template<typename T>
+inline T *salloc (int n, T *src) { return (T *)salloc_ (n * sizeof (T), (void *)src); }
+
+// clears the memory
+template<typename T>
+inline T *salloc0(int n = 1)     { return (T *)salloc_ (n * sizeof (T), 0);           }
+
+// for symmetry
+template<typename T>
+inline void sfree (T *ptr, int n = 1) noexcept
+{
+  if (ecb_expect_true (ptr))
+    {
+      slice_alloc -= n * sizeof (T);
+      if (DEBUG_POISON) memset (ptr, DEBUG_POISON, n * sizeof (T));
+      g_slice_free1 (n * sizeof (T), (void *)ptr);
+    }
+}
+
+// nulls the pointer
+template<typename T>
+inline void sfree0 (T *&ptr, int n = 1) noexcept
+{
+  sfree<T> (ptr, n);
+  ptr = 0;
+}
+
 // makes dynamically allocated objects zero-initialised
 struct zero_initialised
 {
@@ -185,183 +327,212 @@
 
   void *operator new (size_t s)
   {
-    return g_slice_alloc0 (s);
+    return salloc0<char> (s);
   }
 
   void *operator new[] (size_t s)
   {
-    return g_slice_alloc0 (s);
+    return salloc0<char> (s);
   }
 
   void operator delete (void *p, size_t s)
   {
-    g_slice_free1 (s, p);
+    sfree ((char *)p, s);
   }
 
   void operator delete[] (void *p, size_t s)
   {
-    g_slice_free1 (s, p);
+    sfree ((char *)p, s);
   }
 };
 
-void *salloc_ (int n)            throw (std::bad_alloc);
-void *salloc_ (int n, void *src) throw (std::bad_alloc);
+// makes dynamically allocated objects zero-initialised
+struct slice_allocated
+{
+  void *operator new (size_t s, void *p)
+  {
+    return p;
+  }
 
-// strictly the same as g_slice_alloc, but never returns 0
-template<typename T>
-inline T *salloc (int n = 1)     throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T));              }
+  void *operator new (size_t s)
+  {
+    return salloc<char> (s);
+  }
 
-// also copies src into the new area, like "memdup"
-// if src is 0, clears the memory
-template<typename T>
-inline T *salloc (int n, T *src) throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), (void *)src); }
+  void *operator new[] (size_t s)
+  {
+    return salloc<char> (s);
+  }
 
-// clears the memory
-template<typename T>
-inline T *salloc0(int n = 1)     throw (std::bad_alloc) { return (T *)salloc_ (n * sizeof (T), 0);           }
+  void operator delete (void *p, size_t s)
+  {
+    sfree ((char *)p, s);
+  }
 
-// for symmetry
-template<typename T>
-inline void sfree (T *ptr, int n = 1) throw ()
-{
-#ifdef PREFER_MALLOC
-  free (ptr);
-#else
-  g_slice_free1 (n * sizeof (T), (void *)ptr);
-#endif
-}
+  void operator delete[] (void *p, size_t s)
+  {
+    sfree ((char *)p, s);
+  }
+};
 
 // a STL-compatible allocator that uses g_slice
-// boy, this is verbose
+// boy, this is much less verbose in newer C++ versions
 template<typename Tp>
 struct slice_allocator
 {
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef Tp *pointer;
-  typedef const Tp *const_pointer;
-  typedef Tp &reference;
-  typedef const Tp &const_reference;
-  typedef Tp value_type;
-
-  template <class U> 
-  struct rebind
-  {
-    typedef slice_allocator<U> other;
-  };
+  using value_type = Tp;
 
-  slice_allocator () throw () { }
-  slice_allocator (const slice_allocator &o) throw () { }
-  template<typename Tp2>
-  slice_allocator (const slice_allocator<Tp2> &) throw () { }
+  slice_allocator () noexcept { }
+  template<class U> slice_allocator (const slice_allocator<U> &) noexcept {}
 
-  ~slice_allocator () { }
-
-  pointer address (reference x) const { return &x; }
-  const_pointer address (const_reference x) const { return &x; }
-
-  pointer allocate (size_type n, const_pointer = 0)
+  value_type *allocate (std::size_t n)
   {
     return salloc<Tp> (n);
   }
 
-  void deallocate (pointer p, size_type n)
+  void deallocate (value_type *p, std::size_t n)
   {
     sfree<Tp> (p, n);
   }
+};
+
+template<class T, class U>
+bool operator == (const slice_allocator<T> &, const slice_allocator<U> &) noexcept
+{
+    return true;
+}
+
+template<class T, class U>
+bool operator != (const slice_allocator<T> &x, const slice_allocator<U> &y) noexcept
+{
+    return !(x == y);
+}
+
+// basically a memory area, but refcounted
+struct refcnt_buf
+{
+  char *data;
 
-  size_type max_size ()const throw ()
+  refcnt_buf (size_t size = 0);
+  refcnt_buf (void *data, size_t size);
+
+  refcnt_buf (const refcnt_buf &src)
   {
-    return size_t (-1) / sizeof (Tp);
+    data = src.data;
+    inc ();
   }
 
-  void construct (pointer p, const Tp &val)
+  ~refcnt_buf ();
+
+  refcnt_buf &operator =(const refcnt_buf &src);
+
+  operator char *()
   {
-    ::new (p) Tp (val);
+    return data;
   }
 
-  void destroy (pointer p)
+  size_t size () const
   {
-    p->~Tp ();
+    return _size ();
   }
-};
 
-// P. L'Ecuyer, “Maximally Equidistributed Combined Tausworthe Generators”, Mathematics of Computation, 65, 213 (1996), 203–213.
-// http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
-// http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
-struct tausworthe_random_generator
-{
-  // generator
-  uint32_t state [4];
+protected:
+  enum {
+    overhead = sizeof (uint32_t) * 2
+  };
 
-  void operator =(const tausworthe_random_generator &src)
+  uint32_t &_size () const
   {
-    state [0] = src.state [0];
-    state [1] = src.state [1];
-    state [2] = src.state [2];
-    state [3] = src.state [3];
+    return ((unsigned int *)data)[-2];
   }
 
-  void seed (uint32_t seed);
-  uint32_t next ();
-
-  // uniform distribution
-  uint32_t operator ()(uint32_t num)
+  uint32_t &_refcnt () const
   {
-    return is_constant (num)
-             ? (next () * (uint64_t)num) >> 32U
-             : get_range (num);
+    return ((unsigned int *)data)[-1];
   }
 
-  // return a number within (min .. max)
-  int operator () (int r_min, int r_max)
+  void _alloc (uint32_t size)
   {
-    return is_constant (r_min) && is_constant (r_max) && r_min <= r_max
-              ? r_min + operator ()(r_max - r_min + 1)
-              : get_range (r_min, r_max);
+    data = ((char *)salloc<char> (size + overhead)) + overhead;
+    _size   () = size;
+    _refcnt () = 1;
   }
 
-  double operator ()()
+  void _dealloc ();
+
+  void inc ()
   {
-    return this->next () / (double)0xFFFFFFFFU;
+    ++_refcnt ();
   }
 
-protected:
-  uint32_t get_range (uint32_t r_max);
-  int get_range (int r_min, int r_max);
+  void dec ()
+  {
+    if (!--_refcnt ())
+      _dealloc ();
+  }
 };
 
-typedef tausworthe_random_generator rand_gen;
+INTERFACE_CLASS (attachable)
+struct refcnt_base
+{
+  typedef int refcnt_t;
+  mutable refcnt_t ACC (RW, refcnt);
 
-extern rand_gen rndm;
+  MTH void refcnt_inc () const { ++refcnt; }
+  MTH void refcnt_dec () const { --refcnt; }
+
+  refcnt_base () : refcnt (0) { }
+};
+
+// to avoid branches with more advanced compilers
+extern refcnt_base::refcnt_t refcnt_dummy;
 
 template<class T>
 struct refptr
 {
+  // p if not null
+  refcnt_base::refcnt_t *refcnt_ref () { return p ? &p->refcnt : &refcnt_dummy; }
+
+  void refcnt_dec ()
+  {
+    if (!ecb_is_constant (p))
+      --*refcnt_ref ();
+    else if (p)
+      --p->refcnt;
+  }
+
+  void refcnt_inc ()
+  {
+    if (!ecb_is_constant (p))
+      ++*refcnt_ref ();
+    else if (p)
+      ++p->refcnt;
+  }
+
   T *p;
 
   refptr () : p(0) { }
-  refptr (const refptr<T> &p) : p(p.p) { if (p) p->refcnt_inc (); }
-  refptr (T *p) : p(p) { if (p) p->refcnt_inc (); }
-  ~refptr () { if (p) p->refcnt_dec (); }
+  refptr (const refptr<T> &p) : p(p.p) { refcnt_inc (); }
+  refptr (T *p) : p(p) { refcnt_inc (); }
+  ~refptr () { refcnt_dec (); }
 
   const refptr<T> &operator =(T *o)
   {
-    if (p) p->refcnt_dec ();
+    // if decrementing ever destroys we need to reverse the order here
+    refcnt_dec ();
     p = o;
-    if (p) p->refcnt_inc ();
-
+    refcnt_inc ();
     return *this;
   }
 
-  const refptr<T> &operator =(const refptr<T> o)
+  const refptr<T> &operator =(const refptr<T> &o)
   {
     *this = o.p;
     return *this;
   }
 
   T &operator * () const { return *p; }
-  T *operator ->() const { return p; }
+  T *operator ->() const { return  p; }
 
   operator T *() const { return p; }
 };
@@ -371,31 +542,51 @@
 typedef refptr<archetype> arch_ptr;
 typedef refptr<client>    client_ptr;
 typedef refptr<player>    player_ptr;
+typedef refptr<region>    region_ptr;
+
+#define STRHSH_NULL 2166136261
+
+static inline uint32_t
+strhsh (const char *s)
+{
+  // use FNV-1a hash (http://isthe.com/chongo/tech/comp/fnv/)
+  // it is about twice as fast as the one-at-a-time one,
+  // with good distribution.
+  // FNV-1a is faster on many cpus because the multiplication
+  // runs concurrently with the looping logic.
+  // we modify the hash a bit to improve its distribution
+  uint32_t hash = STRHSH_NULL;
+
+  while (*s)
+    hash = (hash ^ *s++) * 16777619U;
+
+  return hash ^ (hash >> 16);
+}
+
+static inline uint32_t
+memhsh (const char *s, size_t len)
+{
+  uint32_t hash = STRHSH_NULL;
+
+  while (len--)
+    hash = (hash ^ *s++) * 16777619U;
+
+  return hash;
+}
 
 struct str_hash
 {
   std::size_t operator ()(const char *s) const
   {
-    unsigned long hash = 0;
-
-    /* use the one-at-a-time hash function, which supposedly is
-     * better than the djb2-like one used by perl5.005, but
-     * certainly is better then the bug used here before.
-     * see http://burtleburtle.net/bob/hash/doobs.html
-     */
-    while (*s)
-      {
-        hash += *s++;
-        hash += hash << 10;
-        hash ^= hash >>  6;
-      }
-
-    hash += hash <<  3;
-    hash ^= hash >> 11;
-    hash += hash << 15;
+    return strhsh (s);
+  }
 
-    return hash;
+  std::size_t operator ()(const shstr &s) const
+  {
+    return strhsh (s);
   }
+
+  typedef ska::power_of_two_hash_policy hash_policy;
 };
 
 struct str_equal
@@ -431,7 +622,7 @@
 
 // This container blends advantages of linked lists
 // (efficiency) with vectors (random access) by
-// by using an unordered vector and storing the vector
+// using an unordered vector and storing the vector
 // index inside the object.
 //
 // + memory-efficient on most 64 bit archs
@@ -478,7 +669,7 @@
 
   void erase (T *obj)
   {
-    unsigned int pos = obj->*indexmember;
+    object_vector_index pos = obj->*indexmember;
     obj->*indexmember = 0;
 
     if (pos < this->size ())
@@ -496,25 +687,143 @@
   }
 };
 
+/////////////////////////////////////////////////////////////////////////////
+
+// something like a vector or stack, but without
+// out of bounds checking
+template<typename T>
+struct fixed_stack
+{
+  T *data;
+  int size;
+  int max;
+
+  fixed_stack ()
+  : size (0), data (0)
+  {
+  }
+
+  fixed_stack (int max)
+  : size (0), max (max)
+  {
+    data = salloc<T> (max);
+  }
+
+  void reset (int new_max)
+  {
+    sfree (data, max);
+    size = 0;
+    max = new_max;
+    data = salloc<T> (max);
+  }
+
+  void free ()
+  {
+    sfree (data, max);
+    data = 0;
+  }
+
+  ~fixed_stack ()
+  {
+    sfree (data, max);
+  }
+
+  T &operator[](int idx)
+  {
+    return data [idx];
+  }
+
+  void push (T v)
+  {
+    data [size++] = v;
+  }
+
+  T &pop ()
+  {
+    return data [--size];
+  }
+
+  T remove (int idx)
+  {
+    T v = data [idx];
+
+    data [idx] = data [--size];
+
+    return v;
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////////
+
 // basically does what strncpy should do, but appends "..." to strings exceeding length
-void assign (char *dst, const char *src, int maxlen);
+// returns the number of bytes actually used (including \0)
+int assign (char *dst, const char *src, int maxsize);
 
 // type-safe version of assign
 template<int N>
-inline void assign (char (&dst)[N], const char *src)
+inline int assign (char (&dst)[N], const char *src)
 {
-  assign ((char *)&dst, src, N);
+  return assign ((char *)&dst, src, N);
 }
 
 typedef double tstamp;
 
-// return current time as timestampe
+// return current time as timestamp
 tstamp now ();
 
 int similar_direction (int a, int b);
 
-// like printf, but returns a std::string
-const std::string format (const char *format, ...);
+// like v?sprintf, but returns a "static" buffer
+char *vformat (const char *format, va_list ap);
+char *format (const char *format, ...) ecb_attribute ((format (printf, 1, 2)));
+
+// safety-check player input which will become object->msg
+bool msg_is_safe (const char *msg);
+
+/////////////////////////////////////////////////////////////////////////////
+// threads, very very thin wrappers around pthreads
+
+struct thread
+{
+  pthread_t id;
+
+  void start (void *(*start_routine)(void *), void *arg = 0);
+
+  void cancel ()
+  {
+    pthread_cancel (id);
+  }
+
+  void *join ()
+  {
+    void *ret;
+
+    if (pthread_join (id, &ret))
+      cleanup ("pthread_join failed", 1);
+
+    return ret;
+  }
+};
+
+// note that mutexes are not classes
+typedef pthread_mutex_t smutex;
+
+#if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP)
+ #define SMUTEX_INITIALISER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#else
+ #define SMUTEX_INITIALISER PTHREAD_MUTEX_INITIALIZER
+#endif
+
+#define SMUTEX(name) smutex name = SMUTEX_INITIALISER
+#define SMUTEX_LOCK(name)   pthread_mutex_lock   (&(name))
+#define SMUTEX_UNLOCK(name) pthread_mutex_unlock (&(name))
+
+typedef pthread_cond_t scond;
+
+#define SCOND(name) scond name = PTHREAD_COND_INITIALIZER
+#define SCOND_SIGNAL(name)     pthread_cond_signal    (&(name))
+#define SCOND_BROADCAST(name)  pthread_cond_broadcast (&(name))
+#define SCOND_WAIT(name,mutex) pthread_cond_wait      (&(name), &(mutex))
 
 #endif