JSON-XS/XS.xs

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "assert.h"
#include "string.h"
#include "stdlib.h"

#define F_ASCII        0x00000001
#define F_UTF8         0x00000002
#define F_INDENT       0x00000004
#define F_CANONICAL    0x00000008
#define F_SPACE_BEFORE 0x00000010
#define F_SPACE_AFTER  0x00000020
#define F_JSON_RPC     0x00000040
#define F_ALLOW_NONREF 0x00000080
#define F_SHRINK       0x00000100

#define F_PRETTY    F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
#define F_DEFAULT   0

#define INIT_SIZE   32 // initial scalar size to be allocated

#define SB do {
#define SE } while (0)

static HV *json_stash;

// structure used for encoding JSON
typedef struct
{
  char *cur;
  STRLEN len; // SvLEN (sv)
  char *end;  // SvEND (sv)
  SV *sv;
  UV flags;
  int max_recurse;
  int indent;
} enc_t;

// structure used for decoding JSON
typedef struct
{
  char *cur;
  char *end;
  const char *err;
  UV flags;
} dec_t;

static UV *
SvJSON (SV *sv)
{
  if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
    croak ("object is not of type JSON::XS");

  return &SvUVX (SvRV (sv));
}

static void
shrink (SV *sv)
{
  sv_utf8_downgrade (sv, 1);
#ifdef SvPV_shrink_to_cur
  SvPV_shrink_to_cur (sv);
#endif
}

/////////////////////////////////////////////////////////////////////////////

static void
need (enc_t *enc, STRLEN len)
{
  if (enc->cur + len >= enc->end)
    {
      STRLEN cur = enc->cur - SvPVX (enc->sv);
      SvGROW (enc->sv, cur + len + 1);
      enc->cur = SvPVX (enc->sv) + cur;
      enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
    }
}

static void
encode_ch (enc_t *enc, char ch)
{
  need (enc, 1);
  *enc->cur++ = ch;
}

static void
encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8)
{
  char *end = str + len;

  need (enc, len);

  while (str < end)
    {
      unsigned char ch = *(unsigned char *)str;

      if (ch >= 0x20 && ch < 0x80) // most common case
        {
          if (ch == '"') // but with slow exceptions
            {
              need (enc, len += 1);
              *enc->cur++ = '\\';
              *enc->cur++ = '"';
            }
          else if (ch == '\\')
            {
              need (enc, len += 1);
              *enc->cur++ = '\\';
              *enc->cur++ = '\\';
            }
          else
            *enc->cur++ = ch;

          ++str;
        }
      else
        {
          switch (ch)
            {
              case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
              case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
              case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
              case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
              case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;

              default:
                {
                  STRLEN clen;
                  UV uch;

                  if (is_utf8)
                    {
                      uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
                      if (clen == (STRLEN)-1)
                        croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
                    }
                  else
                    {
                      uch = ch;
                      clen = 1;
                    }

                  if (uch > 0x10FFFFUL)
                    croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);

                  if (uch < 0x80 || enc->flags & F_ASCII)
                    {
                      if (uch > 0xFFFFUL)
                        {
                          need (enc, len += 11);
                          sprintf (enc->cur, "\\u%04x\\u%04x",
                                   (uch - 0x10000) / 0x400 + 0xD800,
                                   (uch - 0x10000) % 0x400 + 0xDC00);
                          enc->cur += 12;
                        }
                      else
                        {
                          static char hexdigit [16] = "0123456789abcdef";
                          need (enc, len += 5);
                          *enc->cur++ = '\\';
                          *enc->cur++ = 'u';
                          *enc->cur++ = hexdigit [ uch >> 12      ];
                          *enc->cur++ = hexdigit [(uch >>  8) & 15];
                          *enc->cur++ = hexdigit [(uch >>  4) & 15];
                          *enc->cur++ = hexdigit [(uch >>  0) & 15];
                        }

                      str += clen;
                    }
                  else if (is_utf8)
                    {
                      need (enc, len += clen);
                      do
                        {
                          *enc->cur++ = *str++;
                        }
                      while (--clen);
                    }
                  else
                    {
                      need (enc, len += 10); // never more than 11 bytes needed
                      enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
                      ++str;
                    }
                }
            }
        }

      --len;
    }
}

#define INDENT SB \
  if (enc->flags & F_INDENT)            \
    {                                   \
      int i_;                           \
      need (enc, enc->indent);          \
      for (i_ = enc->indent * 3; i_--; )\
        encode_ch (enc, ' ');           \
    }                                   \
  SE

#define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE
#define NL    SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE
#define COMMA SB \
  encode_ch (enc, ',');                 \
  if (enc->flags & F_INDENT)            \
    NL;                                 \
  else if (enc->flags & F_SPACE_AFTER)  \
    SPACE;                              \
  SE

static void encode_sv (enc_t *enc, SV *sv);

static void
encode_av (enc_t *enc, AV *av)
{
  int i, len = av_len (av);

  encode_ch (enc, '['); NL;
  ++enc->indent;

  for (i = 0; i <= len; ++i)
    {
      INDENT;
      encode_sv (enc, *av_fetch (av, i, 0));

      if (i < len)
        COMMA;
    }

  NL;

  --enc->indent;
  INDENT; encode_ch (enc, ']');
}

static void
encode_he (enc_t *enc, HE *he)
{
  encode_ch (enc, '"');

  if (HeKLEN (he) == HEf_SVKEY)
    {
      SV *sv = HeSVKEY (he);
      STRLEN len;
      char *str;
      
      SvGETMAGIC (sv);
      str = SvPV (sv, len);

      encode_str (enc, str, len, SvUTF8 (sv));
    }
  else
    encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));

  encode_ch (enc, '"');

  if (enc->flags & F_SPACE_BEFORE) SPACE;
  encode_ch (enc, ':');
  if (enc->flags & F_SPACE_AFTER ) SPACE;
  encode_sv (enc, HeVAL (he));
}

// compare hash entries, used when all keys are bytestrings
static int
he_cmp_fast (const void *a_, const void *b_)
{
  int cmp;

  HE *a = *(HE **)a_;
  HE *b = *(HE **)b_;

  STRLEN la = HeKLEN (a);
  STRLEN lb = HeKLEN (b);

  if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
    cmp = la < lb ? -1 : la == lb ? 0 : 1;

  return cmp;
}

// compare hash entries, used when some keys are sv's or utf-x
static int
he_cmp_slow (const void *a, const void *b)
{
  return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b));
}

static void
encode_hv (enc_t *enc, HV *hv)
{
  int count, i;

  encode_ch (enc, '{'); NL; ++enc->indent;

  if ((count = hv_iterinit (hv)))
    {
      // for canonical output we have to sort by keys first
      // actually, this is mostly due to the stupid so-called
      // security workaround added somewhere in 5.8.x.
      // that randomises hash orderings
      if (enc->flags & F_CANONICAL)
        {
          HE *he, *hes [count];
          int fast = 1;

          i = 0;
          while ((he = hv_iternext (hv)))
            {
              hes [i++] = he;
              if (HeKLEN (he) < 0 || HeKUTF8 (he))
                fast = 0;
            }

          assert (i == count);

          if (fast)
            qsort (hes, count, sizeof (HE *), he_cmp_fast);
          else
            {
              // hack to forcefully disable "use bytes"
              COP cop = *PL_curcop;
              cop.op_private = 0;

              ENTER;
              SAVETMPS;

              SAVEVPTR (PL_curcop);
              PL_curcop = &cop;

              qsort (hes, count, sizeof (HE *), he_cmp_slow);

              FREETMPS;
              LEAVE;
            }

          for (i = 0; i < count; ++i)
            {
              INDENT;
              encode_he (enc, hes [i]);

              if (i < count - 1)
                COMMA;
            }

          NL;
        }
      else
        {
          SV *sv;
          HE *he = hv_iternext (hv);

          for (;;)
            {
              INDENT;
              encode_he (enc, he);

              if (!(he = hv_iternext (hv)))
                break;

              COMMA;
            }

          NL;
        }
    }

  --enc->indent; INDENT; encode_ch (enc, '}');
}

static void
encode_sv (enc_t *enc, SV *sv)
{
  SvGETMAGIC (sv);

  if (SvPOKp (sv))
    {
      STRLEN len;
      char *str = SvPV (sv, len);
      encode_ch (enc, '"');
      encode_str (enc, str, len, SvUTF8 (sv));
      encode_ch (enc, '"');
    }
  else if (SvNOKp (sv))
    {
      need (enc, NV_DIG + 32);
      Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
      enc->cur += strlen (enc->cur);
    }
  else if (SvIOKp (sv))
    {
      need (enc, 64);
      enc->cur += 
         SvIsUV(sv)
            ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
            : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
    }
  else if (SvROK (sv))
    {
      SV *rv = SvRV (sv);

      if (!--enc->max_recurse)
        croak ("data structure too deep (hit recursion limit)");

      switch (SvTYPE (rv))
        {
          case SVt_PVAV: encode_av (enc, (AV *)rv); break;
          case SVt_PVHV: encode_hv (enc, (HV *)rv); break;

          default:
            croak ("encountered %s, but JSON can only represent references to arrays or hashes",
                   SvPV_nolen (sv));
        }
    }
  else if (!SvOK (sv))
    encode_str (enc, "null", 4, 0);
  else
    croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
           SvPV_nolen (sv), SvFLAGS (sv));
}

static SV *
encode_json (SV *scalar, UV flags)
{
  if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
    croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");

  enc_t enc;
  enc.flags       = flags;
  enc.sv          = sv_2mortal (NEWSV (0, INIT_SIZE));
  enc.cur         = SvPVX (enc.sv);
  enc.end         = SvEND (enc.sv);
  enc.max_recurse = 0;
  enc.indent      = 0;

  SvPOK_only (enc.sv);
  encode_sv (&enc, scalar);

  if (!(flags & (F_ASCII | F_UTF8)))
    SvUTF8_on (enc.sv);

  SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));

  if (enc.flags & F_SHRINK)
    shrink (enc.sv);

  return enc.sv;
}

/////////////////////////////////////////////////////////////////////////////

#define WS \
  for (;;)                              \
    {                                   \
      char ch = *dec->cur;              \
      if (ch > 0x20                     \
          || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \
        break;                          \
      ++dec->cur;                       \
    }

#define ERR(reason) SB dec->err = reason; goto fail; SE
#define EXPECT_CH(ch) SB \
  if (*dec->cur != ch)          \
    ERR (# ch " expected");     \
  ++dec->cur;                   \
  SE

static SV *decode_sv (dec_t *dec);

static signed char decode_hexdigit[256];

static UV
decode_4hex (dec_t *dec)
{
  signed char d1, d2, d3, d4;

  d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];
  if (d1 < 0) ERR ("four hexadecimal digits expected");
  d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];
  if (d2 < 0) ERR ("four hexadecimal digits expected");
  d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
  if (d3 < 0) ERR ("four hexadecimal digits expected");
  d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
  if (d4 < 0) ERR ("four hexadecimal digits expected");

  dec->cur += 4;

  return ((UV)d1) << 12
       | ((UV)d2) <<  8
       | ((UV)d3) <<  4
       | ((UV)d4);

fail:
  return (UV)-1;
}

#define APPEND_GROW(n) SB \
  if (cur + (n) >= end)                         \
    {                                           \
      STRLEN ofs = cur - SvPVX (sv);            \
      SvGROW (sv, ofs + (n) + 1);               \
      cur = SvPVX (sv) + ofs;                   \
      end = SvEND (sv);                         \
    }                                           \
  SE

#define APPEND_CH(ch) SB \
  APPEND_GROW (1);      \
  *cur++ = (ch);        \
  SE

static SV *
decode_str (dec_t *dec)
{
  SV *sv = NEWSV (0,2);
  int utf8 = 0;
  char *cur = SvPVX (sv);
  char *end = SvEND (sv);

  for (;;)
    {
      unsigned char ch = *(unsigned char *)dec->cur;

      if (ch == '"')
        break;
      else if (ch == '\\')
        {
          switch (*++dec->cur)
            {
              case '\\':
              case '/':
              case '"': APPEND_CH (*dec->cur++); break;

              case 'b': APPEND_CH ('\010'); ++dec->cur; break;
              case 't': APPEND_CH ('\011'); ++dec->cur; break;
              case 'n': APPEND_CH ('\012'); ++dec->cur; break;
              case 'f': APPEND_CH ('\014'); ++dec->cur; break;
              case 'r': APPEND_CH ('\015'); ++dec->cur; break;

              case 'u':
                {
                  UV lo, hi;
                  ++dec->cur;

                  hi = decode_4hex (dec);
                  if (hi == (UV)-1)
                    goto fail;

                  // possibly a surrogate pair
                  if (hi >= 0xd800 && hi < 0xdc00)
                    {
                      if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
                        ERR ("missing low surrogate character in surrogate pair");

                      dec->cur += 2;

                      lo = decode_4hex (dec);
                      if (lo == (UV)-1)
                        goto fail;

                      if (lo < 0xdc00 || lo >= 0xe000)
                        ERR ("surrogate pair expected");

                      hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
                    }
                  else if (hi >= 0xdc00 && hi < 0xe000)
                    ERR ("missing high surrogate character in surrogate pair");

                  if (hi >= 0x80)
                    {
                      utf8 = 1;

                      APPEND_GROW (4); // at most 4 bytes for 21 bits
                      cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
                    }
                  else
                    APPEND_CH (hi);
                }
                break;

              default:
                --dec->cur;
                ERR ("illegal backslash escape sequence in string");
            }
        }
      else if (ch >= 0x20 && ch <= 0x7f)
        APPEND_CH (*dec->cur++);
      else if (ch >= 0x80)
        {
          STRLEN clen;
          UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
          if (clen == (STRLEN)-1)
            ERR ("malformed UTF-8 character in JSON string");

          APPEND_GROW (clen);
          do
            {
              *cur++ = *dec->cur++;
            }
          while (--clen);

          utf8 = 1;
        }
      else if (dec->cur == dec->end)
        ERR ("unexpected end of string while parsing json string");
      else
        ERR ("invalid character encountered");
    }

  ++dec->cur;

  SvCUR_set (sv, cur - SvPVX (sv));

  SvPOK_only (sv);
  *SvEND (sv) = 0;

  if (utf8)
    SvUTF8_on (sv);

  if (dec->flags & F_SHRINK)
    shrink (sv);

  return sv;

fail:
  SvREFCNT_dec (sv);
  return 0;
}

static SV *
decode_num (dec_t *dec)
{
  int is_nv = 0;
  char *start = dec->cur;

  // [minus]
  if (*dec->cur == '-')
    ++dec->cur;

  if (*dec->cur == '0')
    {
      ++dec->cur;
      if (*dec->cur >= '0' && *dec->cur <= '9')
         ERR ("malformed number (leading zero must not be followed by another digit)");
    }
  else if (*dec->cur < '0' || *dec->cur > '9')
    ERR ("malformed number (no digits after initial minus)");
  else
    do
      {
        ++dec->cur;
      }
    while (*dec->cur >= '0' && *dec->cur <= '9');

  // [frac]
  if (*dec->cur == '.')
    {
      ++dec->cur;

      if (*dec->cur < '0' || *dec->cur > '9')
        ERR ("malformed number (no digits after decimal point)");

      do
        {
          ++dec->cur;
        }
      while (*dec->cur >= '0' && *dec->cur <= '9');

      is_nv = 1;
    }

  // [exp]
  if (*dec->cur == 'e' || *dec->cur == 'E')
    {
      ++dec->cur;

      if (*dec->cur == '-' || *dec->cur == '+')
        ++dec->cur;

      if (*dec->cur < '0' || *dec->cur > '9')
        ERR ("malformed number (no digits after exp sign)");

      do
        {
          ++dec->cur;
        }
      while (*dec->cur >= '0' && *dec->cur <= '9');

      is_nv = 1;
    }

  if (!is_nv)
    {
      UV uv;
      int numtype = grok_number (start, dec->cur - start, &uv);
      if (numtype & IS_NUMBER_IN_UV)
        if (numtype & IS_NUMBER_NEG)
          {
            if (uv < (UV)IV_MIN)
              return newSViv (-(IV)uv);
          }
        else
          return newSVuv (uv);
    }

  return newSVnv (Atof (start));

fail:
  return 0;
}

static SV *
decode_av (dec_t *dec)
{
  AV *av = newAV ();

  WS;
  if (*dec->cur == ']')
    ++dec->cur;
  else
    for (;;)
      {
        SV *value;

        value = decode_sv (dec);
        if (!value)
          goto fail;

        av_push (av, value);

        WS;

        if (*dec->cur == ']')
          {
            ++dec->cur;
            break;
          }
        
        if (*dec->cur != ',')
          ERR (", or ] expected while parsing array");

        ++dec->cur;
      }

  return newRV_noinc ((SV *)av);

fail:
  SvREFCNT_dec (av);
  return 0;
}

static SV *
decode_hv (dec_t *dec)
{
  HV *hv = newHV ();

  WS;
  if (*dec->cur == '}')
    ++dec->cur;
  else
    for (;;)
      {
        SV *key, *value;

        WS; EXPECT_CH ('"');

        key = decode_str (dec);
        if (!key)
          goto fail;

        WS; EXPECT_CH (':');

        value = decode_sv (dec);
        if (!value)
          {
            SvREFCNT_dec (key);
            goto fail;
          }

        //TODO: optimise
        hv_store_ent (hv, key, value, 0);

        WS;

        if (*dec->cur == '}')
          {
            ++dec->cur;
            break;
          }

        if (*dec->cur != ',')
          ERR (", or } expected while parsing object/hash");

        ++dec->cur;
      }

  return newRV_noinc ((SV *)hv);

fail:
  SvREFCNT_dec (hv);
  return 0;
}

static SV *
decode_sv (dec_t *dec)
{
  WS;
  switch (*dec->cur)
    {
      case '"': ++dec->cur; return decode_str (dec); 
      case '[': ++dec->cur; return decode_av (dec); 
      case '{': ++dec->cur; return decode_hv (dec);

      case '-':
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        return decode_num (dec);

      case 't':
        if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
          {
            dec->cur += 4;
            return newSViv (1);
          }
        else
          ERR ("'true' expected");

        break;

      case 'f':
        if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
          {
            dec->cur += 5;
            return newSViv (0);
          }
        else
          ERR ("'false' expected");

        break;

      case 'n':
        if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
          {
            dec->cur += 4;
            return newSVsv (&PL_sv_undef);
          }
        else
          ERR ("'null' expected");

        break;

      default:
        ERR ("malformed json string, neither array, object, number, string or atom");
        break;
    }

fail:
  return 0;
}

static SV *
decode_json (SV *string, UV flags)
{
  SV *sv;

  if (flags & F_UTF8)
    sv_utf8_downgrade (string, 0);
  else
    sv_utf8_upgrade (string);

  SvGROW (string, SvCUR (string) + 1); // should basically be a NOP

  dec_t dec;
  dec.flags = flags;
  dec.cur   = SvPVX (string);
  dec.end   = SvEND (string);
  dec.err   = 0;

  sv = decode_sv (&dec);

  if (!sv)
    {
      IV offset = dec.flags & F_UTF8
                  ? dec.cur - SvPVX (string)
                  : utf8_distance (dec.cur, SvPVX (string));
      SV *uni = sv_newmortal ();

      // horrible hack to silence warning inside pv_uni_display
      COP cop = *PL_curcop;
      cop.cop_warnings = pWARN_NONE;
      ENTER;
      SAVEVPTR (PL_curcop);
      PL_curcop = &cop;
      pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
      LEAVE;

      croak ("%s, at character offset %d (%s)",
             dec.err,
             (int)offset,
             dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
    }

  sv = sv_2mortal (sv);

  if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
    croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");

  return sv;
}

MODULE = JSON::XS               PACKAGE = JSON::XS

BOOT:
{
        int i;

        memset (decode_hexdigit, 0xff, 256);
        for (i = 10; i--; )
          decode_hexdigit ['0' + i] = i;

        for (i = 7; i--; )
          {
            decode_hexdigit ['a' + i] = 10 + i;
            decode_hexdigit ['A' + i] = 10 + i;
          }

        json_stash = gv_stashpv ("JSON::XS", 1);
}

PROTOTYPES: DISABLE

SV *new (char *dummy)
        CODE:
        RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
        OUTPUT:
        RETVAL

SV *ascii (SV *self, int enable = 1)
        ALIAS:
        ascii        = F_ASCII
        utf8         = F_UTF8
        indent       = F_INDENT
        canonical    = F_CANONICAL
        space_before = F_SPACE_BEFORE
        space_after  = F_SPACE_AFTER
        json_rpc     = F_JSON_RPC
        pretty       = F_PRETTY
        allow_nonref = F_ALLOW_NONREF
        shrink       = F_SHRINK
        CODE:
{
        UV *uv = SvJSON (self);
        if (enable)
          *uv |=  ix;
        else
          *uv &= ~ix;

        RETVAL = newSVsv (self);
}
        OUTPUT:
        RETVAL

void encode (SV *self, SV *scalar)
        PPCODE:
        XPUSHs (encode_json (scalar, *SvJSON (self)));

void decode (SV *self, SV *jsonstr)
        PPCODE:
        XPUSHs (decode_json (jsonstr, *SvJSON (self)));

PROTOTYPES: ENABLE

void to_json (SV *scalar)
        PPCODE:
        XPUSHs (encode_json (scalar, F_UTF8));

void from_json (SV *jsonstr)
        PPCODE:
        XPUSHs (decode_json (jsonstr, F_UTF8));

Revision:	1.9
Committed:	Fri Mar 23 17:40:29 2007 UTC (17 years, 3 months ago) by root
Branch:	MAIN
CVS Tags:	rel-0_3
Changes since 1.8:	+15 -10 lines
Log Message:	* empty log message *
#	Content
1	#include "EXTERN.h"
2	#include "perl.h"
3	#include "XSUB.h"
4
5	#include "assert.h"
6	#include "string.h"
7	#include "stdlib.h"
8
9	#define F_ASCII 0x00000001
10	#define F_UTF8 0x00000002
11	#define F_INDENT 0x00000004
12	#define F_CANONICAL 0x00000008
13	#define F_SPACE_BEFORE 0x00000010
14	#define F_SPACE_AFTER 0x00000020
15	#define F_JSON_RPC 0x00000040
16	#define F_ALLOW_NONREF 0x00000080
17	#define F_SHRINK 0x00000100
18
19	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
20	#define F_DEFAULT 0
21
22	#define INIT_SIZE 32 // initial scalar size to be allocated
23
24	#define SB do {
25	#define SE } while (0)
26
27	static HV *json_stash;
28
29	// structure used for encoding JSON
30	typedef struct
31	{
32	char *cur;
33	STRLEN len; // SvLEN (sv)
34	char *end; // SvEND (sv)
35	SV *sv;
36	UV flags;
37	int max_recurse;
38	int indent;
39	} enc_t;
40
41	// structure used for decoding JSON
42	typedef struct
43	{
44	char *cur;
45	char *end;
46	const char *err;
47	UV flags;
48	} dec_t;
49
50	static UV *
51	SvJSON (SV *sv)
52	{
53	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
54	croak ("object is not of type JSON::XS");
55
56	return &SvUVX (SvRV (sv));
57	}
58
59	static void
60	shrink (SV *sv)
61	{
62	sv_utf8_downgrade (sv, 1);
63	#ifdef SvPV_shrink_to_cur
64	SvPV_shrink_to_cur (sv);
65	#endif
66	}
67
68	/////////////////////////////////////////////////////////////////////////////
69
70	static void
71	need (enc_t *enc, STRLEN len)
72	{
73	if (enc->cur + len >= enc->end)
74	{
75	STRLEN cur = enc->cur - SvPVX (enc->sv);
76	SvGROW (enc->sv, cur + len + 1);
77	enc->cur = SvPVX (enc->sv) + cur;
78	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
79	}
80	}
81
82	static void
83	encode_ch (enc_t *enc, char ch)
84	{
85	need (enc, 1);
86	*enc->cur++ = ch;
87	}
88
89	static void
90	encode_str (enc_t enc, char str, STRLEN len, int is_utf8)
91	{
92	char *end = str + len;
93
94	need (enc, len);
95
96	while (str < end)
97	{
98	unsigned char ch = (unsigned char )str;
99
100	if (ch >= 0x20 && ch < 0x80) // most common case
101	{
102	if (ch == '"') // but with slow exceptions
103	{
104	need (enc, len += 1);
105	*enc->cur++ = '\\';
106	*enc->cur++ = '"';
107	}
108	else if (ch == '\\')
109	{
110	need (enc, len += 1);
111	*enc->cur++ = '\\';
112	*enc->cur++ = '\\';
113	}
114	else
115	*enc->cur++ = ch;
116
117	++str;
118	}
119	else
120	{
121	switch (ch)
122	{
123	case '\010': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'b'; ++str; break;
124	case '\011': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 't'; ++str; break;
125	case '\012': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'n'; ++str; break;
126	case '\014': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'f'; ++str; break;
127	case '\015': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'r'; ++str; break;
128
129	default:
130	{
131	STRLEN clen;
132	UV uch;
133
134	if (is_utf8)
135	{
136	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
137	if (clen == (STRLEN)-1)
138	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
139	}
140	else
141	{
142	uch = ch;
143	clen = 1;
144	}
145
146	if (uch > 0x10FFFFUL)
147	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
148
149	if (uch < 0x80 \|\| enc->flags & F_ASCII)
150	{
151	if (uch > 0xFFFFUL)
152	{
153	need (enc, len += 11);
154	sprintf (enc->cur, "\\u%04x\\u%04x",
155	(uch - 0x10000) / 0x400 + 0xD800,
156	(uch - 0x10000) % 0x400 + 0xDC00);
157	enc->cur += 12;
158	}
159	else
160	{
161	static char hexdigit [16] = "0123456789abcdef";
162	need (enc, len += 5);
163	*enc->cur++ = '\\';
164	*enc->cur++ = 'u';
165	*enc->cur++ = hexdigit [ uch >> 12 ];
166	*enc->cur++ = hexdigit [(uch >> 8) & 15];
167	*enc->cur++ = hexdigit [(uch >> 4) & 15];
168	*enc->cur++ = hexdigit [(uch >> 0) & 15];
169	}
170
171	str += clen;
172	}
173	else if (is_utf8)
174	{
175	need (enc, len += clen);
176	do
177	{
178	enc->cur++ = str++;
179	}
180	while (--clen);
181	}
182	else
183	{
184	need (enc, len += 10); // never more than 11 bytes needed
185	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
186	++str;
187	}
188	}
189	}
190	}
191
192	--len;
193	}
194	}
195
196	#define INDENT SB \
197	if (enc->flags & F_INDENT) \
198	{ \
199	int i_; \
200	need (enc, enc->indent); \
201	for (i_ = enc->indent * 3; i_--; )\
202	encode_ch (enc, ' '); \
203	} \
204	SE
205
206	#define SPACE SB need (enc, 1); encode_ch (enc, ' '); SE
207	#define NL SB if (enc->flags & F_INDENT) { need (enc, 1); encode_ch (enc, '\n'); } SE
208	#define COMMA SB \
209	encode_ch (enc, ','); \
210	if (enc->flags & F_INDENT) \
211	NL; \
212	else if (enc->flags & F_SPACE_AFTER) \
213	SPACE; \
214	SE
215
216	static void encode_sv (enc_t enc, SV sv);
217
218	static void
219	encode_av (enc_t enc, AV av)
220	{
221	int i, len = av_len (av);
222
223	encode_ch (enc, '['); NL;
224	++enc->indent;
225
226	for (i = 0; i <= len; ++i)
227	{
228	INDENT;
229	encode_sv (enc, *av_fetch (av, i, 0));
230
231	if (i < len)
232	COMMA;
233	}
234
235	NL;
236
237	--enc->indent;
238	INDENT; encode_ch (enc, ']');
239	}
240
241	static void
242	encode_he (enc_t enc, HE he)
243	{
244	encode_ch (enc, '"');
245
246	if (HeKLEN (he) == HEf_SVKEY)
247	{
248	SV *sv = HeSVKEY (he);
249	STRLEN len;
250	char *str;
251
252	SvGETMAGIC (sv);
253	str = SvPV (sv, len);
254
255	encode_str (enc, str, len, SvUTF8 (sv));
256	}
257	else
258	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
259
260	encode_ch (enc, '"');
261
262	if (enc->flags & F_SPACE_BEFORE) SPACE;
263	encode_ch (enc, ':');
264	if (enc->flags & F_SPACE_AFTER ) SPACE;
265	encode_sv (enc, HeVAL (he));
266	}
267
268	// compare hash entries, used when all keys are bytestrings
269	static int
270	he_cmp_fast (const void a_, const void b_)
271	{
272	int cmp;
273
274	HE a = (HE **)a_;
275	HE b = (HE **)b_;
276
277	STRLEN la = HeKLEN (a);
278	STRLEN lb = HeKLEN (b);
279
280	if (!(cmp == memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
281	cmp = la < lb ? -1 : la == lb ? 0 : 1;
282
283	return cmp;
284	}
285
286	// compare hash entries, used when some keys are sv's or utf-x
287	static int
288	he_cmp_slow (const void a, const void b)
289	{
290	return sv_cmp (HeSVKEY_force ((HE )a), HeSVKEY_force ((HE **)b));
291	}
292
293	static void
294	encode_hv (enc_t enc, HV hv)
295	{
296	int count, i;
297
298	encode_ch (enc, '{'); NL; ++enc->indent;
299
300	if ((count = hv_iterinit (hv)))
301	{
302	// for canonical output we have to sort by keys first
303	// actually, this is mostly due to the stupid so-called
304	// security workaround added somewhere in 5.8.x.
305	// that randomises hash orderings
306	if (enc->flags & F_CANONICAL)
307	{
308	HE he, hes [count];
309	int fast = 1;
310
311	i = 0;
312	while ((he = hv_iternext (hv)))
313	{
314	hes [i++] = he;
315	if (HeKLEN (he) < 0 \|\| HeKUTF8 (he))
316	fast = 0;
317	}
318
319	assert (i == count);
320
321	if (fast)
322	qsort (hes, count, sizeof (HE *), he_cmp_fast);
323	else
324	{
325	// hack to forcefully disable "use bytes"
326	COP cop = *PL_curcop;
327	cop.op_private = 0;
328
329	ENTER;
330	SAVETMPS;
331
332	SAVEVPTR (PL_curcop);
333	PL_curcop = &cop;
334
335	qsort (hes, count, sizeof (HE *), he_cmp_slow);
336
337	FREETMPS;
338	LEAVE;
339	}
340
341	for (i = 0; i < count; ++i)
342	{
343	INDENT;
344	encode_he (enc, hes [i]);
345
346	if (i < count - 1)
347	COMMA;
348	}
349
350	NL;
351	}
352	else
353	{
354	SV *sv;
355	HE *he = hv_iternext (hv);
356
357	for (;;)
358	{
359	INDENT;
360	encode_he (enc, he);
361
362	if (!(he = hv_iternext (hv)))
363	break;
364
365	COMMA;
366	}
367
368	NL;
369	}
370	}
371
372	--enc->indent; INDENT; encode_ch (enc, '}');
373	}
374
375	static void
376	encode_sv (enc_t enc, SV sv)
377	{
378	SvGETMAGIC (sv);
379
380	if (SvPOKp (sv))
381	{
382	STRLEN len;
383	char *str = SvPV (sv, len);
384	encode_ch (enc, '"');
385	encode_str (enc, str, len, SvUTF8 (sv));
386	encode_ch (enc, '"');
387	}
388	else if (SvNOKp (sv))
389	{
390	need (enc, NV_DIG + 32);
391	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
392	enc->cur += strlen (enc->cur);
393	}
394	else if (SvIOKp (sv))
395	{
396	need (enc, 64);
397	enc->cur +=
398	SvIsUV(sv)
399	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
400	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
401	}
402	else if (SvROK (sv))
403	{
404	SV *rv = SvRV (sv);
405
406	if (!--enc->max_recurse)
407	croak ("data structure too deep (hit recursion limit)");
408
409	switch (SvTYPE (rv))
410	{
411	case SVt_PVAV: encode_av (enc, (AV *)rv); break;
412	case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
413
414	default:
415	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
416	SvPV_nolen (sv));
417	}
418	}
419	else if (!SvOK (sv))
420	encode_str (enc, "null", 4, 0);
421	else
422	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
423	SvPV_nolen (sv), SvFLAGS (sv));
424	}
425
426	static SV *
427	encode_json (SV *scalar, UV flags)
428	{
429	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
430	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
431
432	enc_t enc;
433	enc.flags = flags;
434	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
435	enc.cur = SvPVX (enc.sv);
436	enc.end = SvEND (enc.sv);
437	enc.max_recurse = 0;
438	enc.indent = 0;
439
440	SvPOK_only (enc.sv);
441	encode_sv (&enc, scalar);
442
443	if (!(flags & (F_ASCII \| F_UTF8)))
444	SvUTF8_on (enc.sv);
445
446	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
447
448	if (enc.flags & F_SHRINK)
449	shrink (enc.sv);
450
451	return enc.sv;
452	}
453
454	/////////////////////////////////////////////////////////////////////////////
455
456	#define WS \
457	for (;;) \
458	{ \
459	char ch = *dec->cur; \
460	if (ch > 0x20 \
461	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) \
462	break; \
463	++dec->cur; \
464	}
465
466	#define ERR(reason) SB dec->err = reason; goto fail; SE
467	#define EXPECT_CH(ch) SB \
468	if (*dec->cur != ch) \
469	ERR (# ch " expected"); \
470	++dec->cur; \
471	SE
472
473	static SV decode_sv (dec_t dec);
474
475	static signed char decode_hexdigit[256];
476
477	static UV
478	decode_4hex (dec_t *dec)
479	{
480	signed char d1, d2, d3, d4;
481
482	d1 = decode_hexdigit [((unsigned char *)dec->cur) [0]];
483	if (d1 < 0) ERR ("four hexadecimal digits expected");
484	d2 = decode_hexdigit [((unsigned char *)dec->cur) [1]];
485	if (d2 < 0) ERR ("four hexadecimal digits expected");
486	d3 = decode_hexdigit [((unsigned char *)dec->cur) [2]];
487	if (d3 < 0) ERR ("four hexadecimal digits expected");
488	d4 = decode_hexdigit [((unsigned char *)dec->cur) [3]];
489	if (d4 < 0) ERR ("four hexadecimal digits expected");
490
491	dec->cur += 4;
492
493	return ((UV)d1) << 12
494	\| ((UV)d2) << 8
495	\| ((UV)d3) << 4
496	\| ((UV)d4);
497
498	fail:
499	return (UV)-1;
500	}
501
502	#define APPEND_GROW(n) SB \
503	if (cur + (n) >= end) \
504	{ \
505	STRLEN ofs = cur - SvPVX (sv); \
506	SvGROW (sv, ofs + (n) + 1); \
507	cur = SvPVX (sv) + ofs; \
508	end = SvEND (sv); \
509	} \
510	SE
511
512	#define APPEND_CH(ch) SB \
513	APPEND_GROW (1); \
514	*cur++ = (ch); \
515	SE
516
517	static SV *
518	decode_str (dec_t *dec)
519	{
520	SV *sv = NEWSV (0,2);
521	int utf8 = 0;
522	char *cur = SvPVX (sv);
523	char *end = SvEND (sv);
524
525	for (;;)
526	{
527	unsigned char ch = (unsigned char )dec->cur;
528
529	if (ch == '"')
530	break;
531	else if (ch == '\\')
532	{
533	switch (*++dec->cur)
534	{
535	case '\\':
536	case '/':
537	case '"': APPEND_CH (*dec->cur++); break;
538
539	case 'b': APPEND_CH ('\010'); ++dec->cur; break;
540	case 't': APPEND_CH ('\011'); ++dec->cur; break;
541	case 'n': APPEND_CH ('\012'); ++dec->cur; break;
542	case 'f': APPEND_CH ('\014'); ++dec->cur; break;
543	case 'r': APPEND_CH ('\015'); ++dec->cur; break;
544
545	case 'u':
546	{
547	UV lo, hi;
548	++dec->cur;
549
550	hi = decode_4hex (dec);
551	if (hi == (UV)-1)
552	goto fail;
553
554	// possibly a surrogate pair
555	if (hi >= 0xd800 && hi < 0xdc00)
556	{
557	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')
558	ERR ("missing low surrogate character in surrogate pair");
559
560	dec->cur += 2;
561
562	lo = decode_4hex (dec);
563	if (lo == (UV)-1)
564	goto fail;
565
566	if (lo < 0xdc00 \|\| lo >= 0xe000)
567	ERR ("surrogate pair expected");
568
569	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
570	}
571	else if (hi >= 0xdc00 && hi < 0xe000)
572	ERR ("missing high surrogate character in surrogate pair");
573
574	if (hi >= 0x80)
575	{
576	utf8 = 1;
577
578	APPEND_GROW (4); // at most 4 bytes for 21 bits
579	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
580	}
581	else
582	APPEND_CH (hi);
583	}
584	break;
585
586	default:
587	--dec->cur;
588	ERR ("illegal backslash escape sequence in string");
589	}
590	}
591	else if (ch >= 0x20 && ch <= 0x7f)
592	APPEND_CH (*dec->cur++);
593	else if (ch >= 0x80)
594	{
595	STRLEN clen;
596	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
597	if (clen == (STRLEN)-1)
598	ERR ("malformed UTF-8 character in JSON string");
599
600	APPEND_GROW (clen);
601	do
602	{
603	cur++ = dec->cur++;
604	}
605	while (--clen);
606
607	utf8 = 1;
608	}
609	else if (dec->cur == dec->end)
610	ERR ("unexpected end of string while parsing json string");
611	else
612	ERR ("invalid character encountered");
613	}
614
615	++dec->cur;
616
617	SvCUR_set (sv, cur - SvPVX (sv));
618
619	SvPOK_only (sv);
620	*SvEND (sv) = 0;
621
622	if (utf8)
623	SvUTF8_on (sv);
624
625	if (dec->flags & F_SHRINK)
626	shrink (sv);
627
628	return sv;
629
630	fail:
631	SvREFCNT_dec (sv);
632	return 0;
633	}
634
635	static SV *
636	decode_num (dec_t *dec)
637	{
638	int is_nv = 0;
639	char *start = dec->cur;
640
641	// [minus]
642	if (*dec->cur == '-')
643	++dec->cur;
644
645	if (*dec->cur == '0')
646	{
647	++dec->cur;
648	if (dec->cur >= '0' && dec->cur <= '9')
649	ERR ("malformed number (leading zero must not be followed by another digit)");
650	}
651	else if (dec->cur < '0' \|\| dec->cur > '9')
652	ERR ("malformed number (no digits after initial minus)");
653	else
654	do
655	{
656	++dec->cur;
657	}
658	while (dec->cur >= '0' && dec->cur <= '9');
659
660	// [frac]
661	if (*dec->cur == '.')
662	{
663	++dec->cur;
664
665	if (dec->cur < '0' \|\| dec->cur > '9')
666	ERR ("malformed number (no digits after decimal point)");
667
668	do
669	{
670	++dec->cur;
671	}
672	while (dec->cur >= '0' && dec->cur <= '9');
673
674	is_nv = 1;
675	}
676
677	// [exp]
678	if (dec->cur == 'e' \|\| dec->cur == 'E')
679	{
680	++dec->cur;
681
682	if (dec->cur == '-' \|\| dec->cur == '+')
683	++dec->cur;
684
685	if (dec->cur < '0' \|\| dec->cur > '9')
686	ERR ("malformed number (no digits after exp sign)");
687
688	do
689	{
690	++dec->cur;
691	}
692	while (dec->cur >= '0' && dec->cur <= '9');
693
694	is_nv = 1;
695	}
696
697	if (!is_nv)
698	{
699	UV uv;
700	int numtype = grok_number (start, dec->cur - start, &uv);
701	if (numtype & IS_NUMBER_IN_UV)
702	if (numtype & IS_NUMBER_NEG)
703	{
704	if (uv < (UV)IV_MIN)
705	return newSViv (-(IV)uv);
706	}
707	else
708	return newSVuv (uv);
709	}
710
711	return newSVnv (Atof (start));
712
713	fail:
714	return 0;
715	}
716
717	static SV *
718	decode_av (dec_t *dec)
719	{
720	AV *av = newAV ();
721
722	WS;
723	if (*dec->cur == ']')
724	++dec->cur;
725	else
726	for (;;)
727	{
728	SV *value;
729
730	value = decode_sv (dec);
731	if (!value)
732	goto fail;
733
734	av_push (av, value);
735
736	WS;
737
738	if (*dec->cur == ']')
739	{
740	++dec->cur;
741	break;
742	}
743
744	if (*dec->cur != ',')
745	ERR (", or ] expected while parsing array");
746
747	++dec->cur;
748	}
749
750	return newRV_noinc ((SV *)av);
751
752	fail:
753	SvREFCNT_dec (av);
754	return 0;
755	}
756
757	static SV *
758	decode_hv (dec_t *dec)
759	{
760	HV *hv = newHV ();
761
762	WS;
763	if (*dec->cur == '}')
764	++dec->cur;
765	else
766	for (;;)
767	{
768	SV key, value;
769
770	WS; EXPECT_CH ('"');
771
772	key = decode_str (dec);
773	if (!key)
774	goto fail;
775
776	WS; EXPECT_CH (':');
777
778	value = decode_sv (dec);
779	if (!value)
780	{
781	SvREFCNT_dec (key);
782	goto fail;
783	}
784
785	//TODO: optimise
786	hv_store_ent (hv, key, value, 0);
787
788	WS;
789
790	if (*dec->cur == '}')
791	{
792	++dec->cur;
793	break;
794	}
795
796	if (*dec->cur != ',')
797	ERR (", or } expected while parsing object/hash");
798
799	++dec->cur;
800	}
801
802	return newRV_noinc ((SV *)hv);
803
804	fail:
805	SvREFCNT_dec (hv);
806	return 0;
807	}
808
809	static SV *
810	decode_sv (dec_t *dec)
811	{
812	WS;
813	switch (*dec->cur)
814	{
815	case '"': ++dec->cur; return decode_str (dec);
816	case '[': ++dec->cur; return decode_av (dec);
817	case '{': ++dec->cur; return decode_hv (dec);
818
819	case '-':
820	case '0': case '1': case '2': case '3': case '4':
821	case '5': case '6': case '7': case '8': case '9':
822	return decode_num (dec);
823
824	case 't':
825	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
826	{
827	dec->cur += 4;
828	return newSViv (1);
829	}
830	else
831	ERR ("'true' expected");
832
833	break;
834
835	case 'f':
836	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
837	{
838	dec->cur += 5;
839	return newSViv (0);
840	}
841	else
842	ERR ("'false' expected");
843
844	break;
845
846	case 'n':
847	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
848	{
849	dec->cur += 4;
850	return newSVsv (&PL_sv_undef);
851	}
852	else
853	ERR ("'null' expected");
854
855	break;
856
857	default:
858	ERR ("malformed json string, neither array, object, number, string or atom");
859	break;
860	}
861
862	fail:
863	return 0;
864	}
865
866	static SV *
867	decode_json (SV *string, UV flags)
868	{
869	SV *sv;
870
871	if (flags & F_UTF8)
872	sv_utf8_downgrade (string, 0);
873	else
874	sv_utf8_upgrade (string);
875
876	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
877
878	dec_t dec;
879	dec.flags = flags;
880	dec.cur = SvPVX (string);
881	dec.end = SvEND (string);
882	dec.err = 0;
883
884	sv = decode_sv (&dec);
885
886	if (!sv)
887	{
888	IV offset = dec.flags & F_UTF8
889	? dec.cur - SvPVX (string)
890	: utf8_distance (dec.cur, SvPVX (string));
891	SV *uni = sv_newmortal ();
892
893	// horrible hack to silence warning inside pv_uni_display
894	COP cop = *PL_curcop;
895	cop.cop_warnings = pWARN_NONE;
896	ENTER;
897	SAVEVPTR (PL_curcop);
898	PL_curcop = &cop;
899	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
900	LEAVE;
901
902	croak ("%s, at character offset %d (%s)",
903	dec.err,
904	(int)offset,
905	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
906	}
907
908	sv = sv_2mortal (sv);
909
910	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
911	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
912
913	return sv;
914	}
915
916	MODULE = JSON::XS PACKAGE = JSON::XS
917
918	BOOT:
919	{
920	int i;
921
922	memset (decode_hexdigit, 0xff, 256);
923	for (i = 10; i--; )
924	decode_hexdigit ['0' + i] = i;
925
926	for (i = 7; i--; )
927	{
928	decode_hexdigit ['a' + i] = 10 + i;
929	decode_hexdigit ['A' + i] = 10 + i;
930	}
931
932	json_stash = gv_stashpv ("JSON::XS", 1);
933	}
934
935	PROTOTYPES: DISABLE
936
937	SV new (char dummy)
938	CODE:
939	RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
940	OUTPUT:
941	RETVAL
942
943	SV ascii (SV self, int enable = 1)
944	ALIAS:
945	ascii = F_ASCII
946	utf8 = F_UTF8
947	indent = F_INDENT
948	canonical = F_CANONICAL
949	space_before = F_SPACE_BEFORE
950	space_after = F_SPACE_AFTER
951	json_rpc = F_JSON_RPC
952	pretty = F_PRETTY
953	allow_nonref = F_ALLOW_NONREF
954	shrink = F_SHRINK
955	CODE:
956	{
957	UV *uv = SvJSON (self);
958	if (enable)
959	*uv \|= ix;
960	else
961	*uv &= ~ix;
962
963	RETVAL = newSVsv (self);
964	}
965	OUTPUT:
966	RETVAL
967
968	void encode (SV self, SV scalar)
969	PPCODE:
970	XPUSHs (encode_json (scalar, *SvJSON (self)));
971
972	void decode (SV self, SV jsonstr)
973	PPCODE:
974	XPUSHs (decode_json (jsonstr, *SvJSON (self)));
975
976	PROTOTYPES: ENABLE
977
978	void to_json (SV *scalar)
979	PPCODE:
980	XPUSHs (encode_json (scalar, F_UTF8));
981
982	void from_json (SV *jsonstr)
983	PPCODE:
984	XPUSHs (decode_json (jsonstr, F_UTF8));
985