--- CBOR-XS/ecb.h	2021/03/19 17:30:27	1.17
+++ CBOR-XS/ecb.h	2021/10/21 01:14:58	1.18
@@ -1,7 +1,7 @@
 /*
  * libecb - http://software.schmorp.de/pkg/libecb
  *
- * Copyright (©) 2009-2015,2018-2020 Marc Alexander Lehmann <libecb@schmorp.de>
+ * Copyright (©) 2009-2015,2018-2021 Marc Alexander Lehmann <libecb@schmorp.de>
  * Copyright (©) 2011 Emanuele Giaquinta
  * All rights reserved.
  *
@@ -42,7 +42,7 @@
 #define ECB_H
 
 /* 16 bits major, 16 bits minor */
-#define ECB_VERSION 0x00010008
+#define ECB_VERSION 0x00010009
 
 #include <string.h> /* for memcpy */
 
@@ -106,6 +106,12 @@
   #endif
 #endif
 
+#if ECB_PTRSIZE >= 8 || ECB_AMD64_X32
+  #define ECB_64BIT_NATIVE 1
+#else
+  #define ECB_64BIT_NATIVE 0
+#endif
+
 /* many compilers define _GNUC_ to some versions but then only implement
  * what their idiot authors think are the "more important" extensions,
  * causing enormous grief in return for some better fake benchmark numbers.
@@ -244,6 +250,7 @@
     #define ECB_MEMORY_FENCE         __atomic_thread_fence (__ATOMIC_SEQ_CST)
     #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE)
     #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE)
+    #undef ECB_MEMORY_FENCE_RELAXED
     #define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED)
 
   #elif ECB_CLANG_EXTENSION(c_atomic)
@@ -251,6 +258,7 @@
     #define ECB_MEMORY_FENCE         __c11_atomic_thread_fence (__ATOMIC_SEQ_CST)
     #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE)
     #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE)
+    #undef ECB_MEMORY_FENCE_RELAXED
     #define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED)
 
   #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__
@@ -940,6 +948,228 @@
 }
 
 /*******************************************************************************/
+/* fast integer to ascii */
+
+/*
+ * This code is pretty complicated because it is general. The idea behind it,
+ * however, is pretty simple: first, the number is multiplied with a scaling
+ * factor (2**bits / 10**(digits-1)) to convert the integer into a fixed-point
+ * number with the first digit in the upper bits.
+ * Then this digit is converted to text and masked out. The resulting number
+ * is then multiplied by 10, by multiplying the fixed point representation
+ * by 5 and shifting the (binary) decimal point one to the right, so a 4.28
+ * format becomes 5.27, 6.26 and so on.
+ * The rest involves only advancing the pointer if we already generated a
+ * non-zero digit, so leading zeroes are overwritten.
+ */
+
+// simply return a mask with "bits" bits set
+#define ecb_i2a_mask(type,bits) ((((type)1) << (bits)) - 1)
+
+// oputput a single digit. maskvalue is 10**digitidx
+#define ecb_i2a_digit(type,bits,digitmask,maskvalue,digitidx) \
+  if (digitmask >= maskvalue) /* constant, used to decide how many digits to generate */ \
+    { \
+      char digit = x >> (bits - digitidx); /* calculate the topmost digit */ \
+      *ptr = digit + '0'; /* output it */ \
+      nz = (digitmask == maskvalue) || nz || digit; /* first term == always output last digit */ \
+      ptr += nz; /* output digit only if non-zero digit seen */ \
+      x = (x & ecb_i2a_mask (type, bits - digitidx)) * 5; /* *10, but shift decimal point right */ \
+    }
+
+// convert integer to fixed point format and multiply out digits, highest first
+// requires magic constants: max. digits and number of bits after the decimal point
+#define ecb_i2a_def(suffix,ptr,v,type,bits,digitmask,lz) \
+ecb_inline char *ecb_i2a_ ## suffix (char *ptr, uint32_t u) \
+{ \
+  char nz = lz; /* non-zero digit seen? */ \
+  /* convert to x.bits fixed-point */ \
+  type x = u * ((ecb_i2a_mask (type, bits) + digitmask) / digitmask); \
+  /* output up to 10 digits */ \
+  ecb_i2a_digit (type,bits,digitmask,          1, 0); \
+  ecb_i2a_digit (type,bits,digitmask,         10, 1); \
+  ecb_i2a_digit (type,bits,digitmask,        100, 2); \
+  ecb_i2a_digit (type,bits,digitmask,       1000, 3); \
+  ecb_i2a_digit (type,bits,digitmask,      10000, 4); \
+  ecb_i2a_digit (type,bits,digitmask,     100000, 5); \
+  ecb_i2a_digit (type,bits,digitmask,    1000000, 6); \
+  ecb_i2a_digit (type,bits,digitmask,   10000000, 7); \
+  ecb_i2a_digit (type,bits,digitmask,  100000000, 8); \
+  ecb_i2a_digit (type,bits,digitmask, 1000000000, 9); \
+  return ptr; \
+}
+
+// predefined versions of the above, for various digits
+// ecb_i2a_xN = almost N digits, limit defined by macro
+// ecb_i2a_N = up to N digits, leading zeroes suppressed
+// ecb_i2a_0N = exactly N digits, including leading zeroes
+
+// non-leading-zero versions, limited range
+#define ECB_I2A_MAX_X5       59074 // limit for ecb_i2a_x5
+#define ECB_I2A_MAX_X10 2932500665 // limit for ecb_i2a_x10
+ecb_i2a_def ( x5, ptr, v, uint32_t, 26,      10000, 0)
+ecb_i2a_def (x10, ptr, v, uint64_t, 60, 1000000000, 0)
+
+// non-leading zero versions, all digits, 4 and 9 are optimal for 32/64 bit
+ecb_i2a_def ( 2, ptr, v, uint32_t, 10,          10, 0)
+ecb_i2a_def ( 3, ptr, v, uint32_t, 12,         100, 0)
+ecb_i2a_def ( 4, ptr, v, uint32_t, 26,        1000, 0)
+ecb_i2a_def ( 5, ptr, v, uint64_t, 30,       10000, 0)
+ecb_i2a_def ( 6, ptr, v, uint64_t, 36,      100000, 0)
+ecb_i2a_def ( 7, ptr, v, uint64_t, 44,     1000000, 0)
+ecb_i2a_def ( 8, ptr, v, uint64_t, 50,    10000000, 0)
+ecb_i2a_def ( 9, ptr, v, uint64_t, 56,   100000000, 0)
+
+// leading-zero versions, all digits, 04 and 09 are optimal for 32/64 bit
+ecb_i2a_def (02, ptr, v, uint32_t, 10,          10, 1)
+ecb_i2a_def (03, ptr, v, uint32_t, 12,         100, 1)
+ecb_i2a_def (04, ptr, v, uint32_t, 26,        1000, 1)
+ecb_i2a_def (05, ptr, v, uint64_t, 30,       10000, 1)
+ecb_i2a_def (06, ptr, v, uint64_t, 36,      100000, 1)
+ecb_i2a_def (07, ptr, v, uint64_t, 44,     1000000, 1)
+ecb_i2a_def (08, ptr, v, uint64_t, 50,    10000000, 1)
+ecb_i2a_def (09, ptr, v, uint64_t, 56,   100000000, 1)
+
+#define ECB_I2A_I32_DIGITS 11
+#define ECB_I2A_U32_DIGITS 10
+#define ECB_I2A_I64_DIGITS 20
+#define ECB_I2A_U64_DIGITS 21
+#define ECB_I2A_MAX_DIGITS 21
+
+ecb_inline char *
+ecb_i2a_u32 (char *ptr, uint32_t u)
+{
+  #if ECB_64BIT_NATIVE
+    if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
+      ptr = ecb_i2a_x10 (ptr, u);
+    else // x10 almost, but not fully, covers 32 bit
+      {
+        uint32_t u1 = u % 1000000000;
+        uint32_t u2 = u / 1000000000;
+
+        *ptr++ = u2 + '0';
+        ptr = ecb_i2a_09 (ptr, u1);
+      }
+  #else
+    if (ecb_expect_true (u <= ECB_I2A_MAX_X5))
+      ecb_i2a_x5 (ptr, u);
+    else if (ecb_expect_true (u <= ECB_I2A_MAX_X5 * 10000))
+      {
+        uint32_t u1 = u % 10000;
+        uint32_t u2 = u / 10000;
+
+        ptr = ecb_i2a_x5 (ptr, u2);
+        ptr = ecb_i2a_04 (ptr, u1);
+      }
+    else
+      {
+        uint32_t u1 = u  % 10000;
+        uint32_t ua = u  / 10000;
+        uint32_t u2 = ua % 10000;
+        uint32_t u3 = ua / 10000;
+
+        ptr = ecb_i2a_2  (ptr, u3);
+        ptr = ecb_i2a_04 (ptr, u2);
+        ptr = ecb_i2a_04 (ptr, u1);
+      }
+  #endif
+
+  return ptr;
+}
+
+ecb_inline char *
+ecb_i2a_i32 (char *ptr, int32_t v)
+{
+  *ptr = '-'; ptr += v < 0;
+  uint32_t u = v < 0 ? -(uint32_t)v : v;
+
+  #if ECB_64BIT_NATIVE
+    ptr = ecb_i2a_x10 (ptr, u); // x10 fully covers 31 bit
+  #else
+    ptr = ecb_i2a_u32 (ptr, u);
+  #endif
+
+  return ptr;
+}
+
+ecb_inline char *
+ecb_i2a_u64 (char *ptr, uint64_t u)
+{
+  #if ECB_64BIT_NATIVE
+    if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
+      ptr = ecb_i2a_x10 (ptr, u);
+    else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000))
+      {
+        uint64_t u1 = u % 1000000000;
+        uint64_t u2 = u / 1000000000;
+
+        ptr = ecb_i2a_x10 (ptr, u2);
+        ptr = ecb_i2a_09  (ptr, u1);
+      }
+    else
+      {
+        uint64_t u1 = u  % 1000000000;
+        uint64_t ua = u  / 1000000000;
+        uint64_t u2 = ua % 1000000000;
+        uint64_t u3 = ua / 1000000000;
+
+        ptr = ecb_i2a_2  (ptr, u3);
+        ptr = ecb_i2a_09 (ptr, u2);
+        ptr = ecb_i2a_09 (ptr, u1);
+      }
+  #else
+    if (ecb_expect_true (u <= ECB_I2A_MAX_X5))
+      ptr = ecb_i2a_x5 (ptr, u);
+    else
+      {
+        uint64_t u1 = u % 10000;
+        uint64_t u2 = u / 10000;
+
+        ptr = ecb_i2a_u64 (ptr, u2);
+        ptr = ecb_i2a_04 (ptr, u1);
+      }
+  #endif
+
+  return ptr;
+}
+
+ecb_inline char *
+ecb_i2a_i64 (char *ptr, int64_t v)
+{
+  *ptr = '-'; ptr += v < 0;
+  uint64_t u = v < 0 ? -(uint64_t)v : v;
+
+  #if ECB_64BIT_NATIVE
+    if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
+      ptr = ecb_i2a_x10 (ptr, u);
+    else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000))
+      {
+        uint64_t u1 = u % 1000000000;
+        uint64_t u2 = u / 1000000000;
+
+        ptr = ecb_i2a_x10 (ptr, u2);
+        ptr = ecb_i2a_09  (ptr, u1);
+      }
+    else
+      {
+        uint64_t u1 = u  % 1000000000;
+        uint64_t ua = u  / 1000000000;
+        uint64_t u2 = ua % 1000000000;
+        uint64_t u3 = ua / 1000000000;
+
+        // 2**31 is 19 digits, so the top is exactly one digit
+        *ptr++ = u3 + '0';
+        ptr = ecb_i2a_09 (ptr, u2);
+        ptr = ecb_i2a_09 (ptr, u1);
+      }
+  #else
+    ptr = ecb_i2a_u64 (ptr, u);
+  #endif
+
+  return ptr;
+}
+
+/*******************************************************************************/
 /* floating point stuff, can be disabled by defining ECB_NO_LIBM */
 
 /* basically, everything uses "ieee pure-endian" floating point numbers */