JSON-XS/XS.xs

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"

#include "assert.h"
#include "string.h"
#include "stdlib.h"

#define F_ASCII        0x00000001
#define F_UTF8         0x00000002
#define F_INDENT       0x00000004
#define F_CANONICAL    0x00000008
#define F_SPACE_BEFORE 0x00000010
#define F_SPACE_AFTER  0x00000020
#define F_ALLOW_NONREF 0x00000080
#define F_SHRINK       0x00000100

#define F_PRETTY    F_INDENT | F_SPACE_BEFORE | F_SPACE_AFTER
#define F_DEFAULT   0

#define INIT_SIZE   32 // initial scalar size to be allocated
#define INDENT_STEP 3  // spaces per indentation level

#define UTF8_MAX_LEN      11 // for perls UTF-X: max. number of octets per character
#define SHORT_STRING_LEN 256 // special-case strings of up to this size

#define SB do {
#define SE } while (0)

static HV *json_stash; // JSON::XS::

/////////////////////////////////////////////////////////////////////////////
// utility functions

static UV *
SvJSON (SV *sv)
{
  if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
    croak ("object is not of type JSON::XS");

  return &SvUVX (SvRV (sv));
}

static void
shrink (SV *sv)
{
  sv_utf8_downgrade (sv, 1);
  if (SvLEN (sv) > SvCUR (sv) + 1)
    {
#ifdef SvPV_shrink_to_cur
      SvPV_shrink_to_cur (sv);
#elif defined (SvPV_renew)
      SvPV_renew (sv, SvCUR (sv) + 1);
#endif
    }
}

/////////////////////////////////////////////////////////////////////////////
// encoder

// structure used for encoding JSON
typedef struct
{
  char *cur;  // SvPVX (sv) + current output position
  char *end;  // SvEND (sv)
  SV *sv;     // result scalar
  UV flags;   // F_*
  int indent; // indentation level
  int max_depth; // max. recursion level
} enc_t;

static void
need (enc_t *enc, STRLEN len)
{
  if (enc->cur + len >= enc->end)
    {
      STRLEN cur = enc->cur - SvPVX (enc->sv);
      SvGROW (enc->sv, cur + len + 1);
      enc->cur = SvPVX (enc->sv) + cur;
      enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
    }
}

static void
encode_ch (enc_t *enc, char ch)
{
  need (enc, 1);
  *enc->cur++ = ch;
}

static void
encode_str (enc_t *enc, char *str, STRLEN len, int is_utf8)
{
  char *end = str + len;

  need (enc, len);

  while (str < end)
    {
      unsigned char ch = *(unsigned char *)str;

      if (ch >= 0x20 && ch < 0x80) // most common case
        {
          if (ch == '"') // but with slow exceptions
            {
              need (enc, len += 1);
              *enc->cur++ = '\\';
              *enc->cur++ = '"';
            }
          else if (ch == '\\')
            {
              need (enc, len += 1);
              *enc->cur++ = '\\';
              *enc->cur++ = '\\';
            }
          else
            *enc->cur++ = ch;

          ++str;
        }
      else
        {
          switch (ch)
            {
              case '\010': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'b'; ++str; break;
              case '\011': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 't'; ++str; break;
              case '\012': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'n'; ++str; break;
              case '\014': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'f'; ++str; break;
              case '\015': need (enc, len += 1); *enc->cur++ = '\\'; *enc->cur++ = 'r'; ++str; break;

              default:
                {
                  STRLEN clen;
                  UV uch;

                  if (is_utf8)
                    {
                      uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
                      if (clen == (STRLEN)-1)
                        croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
                    }
                  else
                    {
                      uch = ch;
                      clen = 1;
                    }

                  if (uch > 0x10FFFFUL)
                    croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);

                  if (uch < 0x80 || enc->flags & F_ASCII)
                    {
                      if (uch > 0xFFFFUL)
                        {
                          need (enc, len += 11);
                          sprintf (enc->cur, "\\u%04x\\u%04x",
                                   (int)((uch - 0x10000) / 0x400 + 0xD800),
                                   (int)((uch - 0x10000) % 0x400 + 0xDC00));
                          enc->cur += 12;
                        }
                      else
                        {
                          static char hexdigit [16] = "0123456789abcdef";
                          need (enc, len += 5);
                          *enc->cur++ = '\\';
                          *enc->cur++ = 'u';
                          *enc->cur++ = hexdigit [ uch >> 12      ];
                          *enc->cur++ = hexdigit [(uch >>  8) & 15];
                          *enc->cur++ = hexdigit [(uch >>  4) & 15];
                          *enc->cur++ = hexdigit [(uch >>  0) & 15];
                        }

                      str += clen;
                    }
                  else if (is_utf8)
                    {
                      need (enc, len += clen);
                      do
                        {
                          *enc->cur++ = *str++;
                        }
                      while (--clen);
                    }
                  else
                    {
                      need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed
                      enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
                      ++str;
                    }
                }
            }
        }

      --len;
    }
}

static void
encode_indent (enc_t *enc)
{
  if (enc->flags & F_INDENT)
    {
      int spaces = enc->indent * INDENT_STEP;

      need (enc, spaces);
      memset (enc->cur, ' ', spaces);
      enc->cur += spaces;
    }
}

static void
encode_space (enc_t *enc)
{
  need (enc, 1);
  encode_ch (enc, ' ');
}

static void
encode_nl (enc_t *enc)
{
  if (enc->flags & F_INDENT)
    {
      need (enc, 1);
      encode_ch (enc, '\n');
    }
}

static void
encode_comma (enc_t *enc)
{
  encode_ch (enc, ',');

  if (enc->flags & F_INDENT)
    encode_nl (enc);
  else if (enc->flags & F_SPACE_AFTER)
    encode_space (enc);
}

static void encode_sv (enc_t *enc, SV *sv);

static void
encode_av (enc_t *enc, AV *av)
{
  int i, len = av_len (av);

  encode_ch (enc, '['); encode_nl (enc);
  ++enc->indent;

  for (i = 0; i <= len; ++i)
    {
      encode_indent (enc);
      encode_sv (enc, *av_fetch (av, i, 0));

      if (i < len)
        encode_comma (enc);
    }

  encode_nl (enc);

  --enc->indent;
  encode_indent (enc); encode_ch (enc, ']');
}

static void
encode_he (enc_t *enc, HE *he)
{
  encode_ch (enc, '"');

  if (HeKLEN (he) == HEf_SVKEY)
    {
      SV *sv = HeSVKEY (he);
      STRLEN len;
      char *str;
      
      SvGETMAGIC (sv);
      str = SvPV (sv, len);

      encode_str (enc, str, len, SvUTF8 (sv));
    }
  else
    encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));

  encode_ch (enc, '"');

  if (enc->flags & F_SPACE_BEFORE) encode_space (enc);
  encode_ch (enc, ':');
  if (enc->flags & F_SPACE_AFTER ) encode_space (enc);
  encode_sv (enc, HeVAL (he));
}

// compare hash entries, used when all keys are bytestrings
static int
he_cmp_fast (const void *a_, const void *b_)
{
  int cmp;

  HE *a = *(HE **)a_;
  HE *b = *(HE **)b_;

  STRLEN la = HeKLEN (a);
  STRLEN lb = HeKLEN (b);

  if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
    cmp = la - lb;

  return cmp;
}

// compare hash entries, used when some keys are sv's or utf-x
static int
he_cmp_slow (const void *a, const void *b)
{
  return sv_cmp (HeSVKEY_force (*(HE **)a), HeSVKEY_force (*(HE **)b));
}

static void
encode_hv (enc_t *enc, HV *hv)
{
  int count, i;

  encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;

  if ((count = hv_iterinit (hv)))
    {
      // for canonical output we have to sort by keys first
      // actually, this is mostly due to the stupid so-called
      // security workaround added somewhere in 5.8.x.
      // that randomises hash orderings
      if (enc->flags & F_CANONICAL)
        {
          HE *he, *hes [count]; // if your compiler dies here, you need to enable C99 mode
          int fast = 1;

          i = 0;
          while ((he = hv_iternext (hv)))
            {
              hes [i++] = he;
              if (HeKLEN (he) < 0 || HeKUTF8 (he))
                fast = 0;
            }

          assert (i == count);

          if (fast)
            qsort (hes, count, sizeof (HE *), he_cmp_fast);
          else
            {
              // hack to forcefully disable "use bytes"
              COP cop = *PL_curcop;
              cop.op_private = 0;

              ENTER;
              SAVETMPS;

              SAVEVPTR (PL_curcop);
              PL_curcop = &cop;

              qsort (hes, count, sizeof (HE *), he_cmp_slow);

              FREETMPS;
              LEAVE;
            }

          for (i = 0; i < count; ++i)
            {
              encode_indent (enc);
              encode_he (enc, hes [i]);

              if (i < count - 1)
                encode_comma (enc);
            }

          encode_nl (enc);
        }
      else
        {
          SV *sv;
          HE *he = hv_iternext (hv);

          for (;;)
            {
              encode_indent (enc);
              encode_he (enc, he);

              if (!(he = hv_iternext (hv)))
                break;

              encode_comma (enc);
            }

          encode_nl (enc);
        }
    }

  --enc->indent; encode_indent (enc); encode_ch (enc, '}');
}

static void
encode_sv (enc_t *enc, SV *sv)
{
  SvGETMAGIC (sv);

  if (SvPOKp (sv))
    {
      STRLEN len;
      char *str = SvPV (sv, len);
      encode_ch (enc, '"');
      encode_str (enc, str, len, SvUTF8 (sv));
      encode_ch (enc, '"');
    }
  else if (SvNOKp (sv))
    {
      need (enc, NV_DIG + 32);
      Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
      enc->cur += strlen (enc->cur);
    }
  else if (SvIOKp (sv))
    {
      need (enc, 64);
      enc->cur += 
         SvIsUV(sv)
            ? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
            : snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
    }
  else if (SvROK (sv))
    {
      SV *rv = SvRV (sv);

      if (enc->indent >= enc->max_depth)
        croak ("data structure too deep (hit recursion limit)");

      switch (SvTYPE (rv))
        {
          case SVt_PVAV: encode_av (enc, (AV *)rv); break;
          case SVt_PVHV: encode_hv (enc, (HV *)rv); break;

          default:
            croak ("encountered %s, but JSON can only represent references to arrays or hashes",
                   SvPV_nolen (sv));
        }
    }
  else if (!SvOK (sv))
    encode_str (enc, "null", 4, 0);
  else
    croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
           SvPV_nolen (sv), SvFLAGS (sv));
}

static SV *
encode_json (SV *scalar, UV flags)
{
  if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
    croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");

  enc_t enc;
  enc.flags     = flags;
  enc.sv        = sv_2mortal (NEWSV (0, INIT_SIZE));
  enc.cur       = SvPVX (enc.sv);
  enc.end       = SvEND (enc.sv);
  enc.indent    = 0;
  enc.max_depth = 0x7fffffffUL;

  SvPOK_only (enc.sv);
  encode_sv (&enc, scalar);

  if (!(flags & (F_ASCII | F_UTF8)))
    SvUTF8_on (enc.sv);

  SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));

  if (enc.flags & F_SHRINK)
    shrink (enc.sv);

  return enc.sv;
}

/////////////////////////////////////////////////////////////////////////////
// decoder

// structure used for decoding JSON
typedef struct
{
  char *cur; // current parser pointer
  char *end; // end of input string
  const char *err; // parse error, if != 0
  UV flags;  // F_*
} dec_t;

static void
decode_ws (dec_t *dec)
{
  for (;;)
    {
      char ch = *dec->cur;

      if (ch > 0x20
          || (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09)) 
        break;

      ++dec->cur;
    }
}

#define ERR(reason) SB dec->err = reason; goto fail; SE
#define EXPECT_CH(ch) SB \
  if (*dec->cur != ch)          \
    ERR (# ch " expected");     \
  ++dec->cur;                   \
  SE

static SV *decode_sv (dec_t *dec);

static signed char decode_hexdigit[256];

static UV
decode_4hex (dec_t *dec)
{
  signed char d1, d2, d3, d4;
  unsigned char *cur = (unsigned char *)dec->cur;

  d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");
  d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");
  d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");
  d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");

  dec->cur += 4;

  return ((UV)d1) << 12
       | ((UV)d2) <<  8
       | ((UV)d3) <<  4
       | ((UV)d4);

fail:
  return (UV)-1;
}

static SV *
decode_str (dec_t *dec)
{
  SV *sv = 0;
  int utf8 = 0;

  do
    {
      char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];
      char *cur = buf;

      do
        {
          unsigned char ch = *(unsigned char *)dec->cur++;

          if (ch == '"')
            {
              --dec->cur;
              break;
            }
          else if (ch == '\\')
            {
              switch (*dec->cur)
                {
                  case '\\':
                  case '/':
                  case '"': *cur++ = *dec->cur++; break;

                  case 'b': ++dec->cur; *cur++ = '\010'; break;
                  case 't': ++dec->cur; *cur++ = '\011'; break;
                  case 'n': ++dec->cur; *cur++ = '\012'; break;
                  case 'f': ++dec->cur; *cur++ = '\014'; break;
                  case 'r': ++dec->cur; *cur++ = '\015'; break;

                  case 'u':
                    {
                      UV lo, hi;
                      ++dec->cur;

                      hi = decode_4hex (dec);
                      if (hi == (UV)-1)
                        goto fail;

                      // possibly a surrogate pair
                      if (hi >= 0xd800)
                        if (hi < 0xdc00)
                          {
                            if (dec->cur [0] != '\\' || dec->cur [1] != 'u')
                              ERR ("missing low surrogate character in surrogate pair");

                            dec->cur += 2;

                            lo = decode_4hex (dec);
                            if (lo == (UV)-1)
                              goto fail;

                            if (lo < 0xdc00 || lo >= 0xe000)
                              ERR ("surrogate pair expected");

                            hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
                          }
                        else if (hi < 0xe000)
                          ERR ("missing high surrogate character in surrogate pair");

                      if (hi >= 0x80)
                        {
                          utf8 = 1;

                          cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
                        }
                      else
                        *cur++ = hi;
                    }
                    break;

                  default:
                    --dec->cur;
                    ERR ("illegal backslash escape sequence in string");
                }
            }
          else if (ch >= 0x20 && ch <= 0x7f)
            *cur++ = ch;
          else if (ch >= 0x80)
            {
              --dec->cur;

              STRLEN clen;
              UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
              if (clen == (STRLEN)-1)
                ERR ("malformed UTF-8 character in JSON string");

              do
                {
                  *cur++ = *dec->cur++;
                }
              while (--clen);

              utf8 = 1;
            }
          else if (!ch)
            ERR ("unexpected end of string while parsing json string");
          else
            ERR ("invalid character encountered");

        }
      while (cur < buf + SHORT_STRING_LEN);

      STRLEN len = cur - buf;

      if (sv)
        {
          SvGROW (sv, SvCUR (sv) + len + 1);
          memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
          SvCUR_set (sv, SvCUR (sv) + len);
        }
      else
        sv = newSVpvn (buf, len);
    }
  while (*dec->cur != '"');

  ++dec->cur;

  if (sv)
    {
      SvPOK_only (sv);
      *SvEND (sv) = 0;

      if (utf8)
        SvUTF8_on (sv);
    }
  else
    sv = newSVpvn ("", 0);

  return sv;

fail:
  return 0;
}

static SV *
decode_num (dec_t *dec)
{
  int is_nv = 0;
  char *start = dec->cur;

  // [minus]
  if (*dec->cur == '-')
    ++dec->cur;

  if (*dec->cur == '0')
    {
      ++dec->cur;
      if (*dec->cur >= '0' && *dec->cur <= '9')
         ERR ("malformed number (leading zero must not be followed by another digit)");
    }
  else if (*dec->cur < '0' || *dec->cur > '9')
    ERR ("malformed number (no digits after initial minus)");
  else
    do
      {
        ++dec->cur;
      }
    while (*dec->cur >= '0' && *dec->cur <= '9');

  // [frac]
  if (*dec->cur == '.')
    {
      ++dec->cur;

      if (*dec->cur < '0' || *dec->cur > '9')
        ERR ("malformed number (no digits after decimal point)");

      do
        {
          ++dec->cur;
        }
      while (*dec->cur >= '0' && *dec->cur <= '9');

      is_nv = 1;
    }

  // [exp]
  if (*dec->cur == 'e' || *dec->cur == 'E')
    {
      ++dec->cur;

      if (*dec->cur == '-' || *dec->cur == '+')
        ++dec->cur;

      if (*dec->cur < '0' || *dec->cur > '9')
        ERR ("malformed number (no digits after exp sign)");

      do
        {
          ++dec->cur;
        }
      while (*dec->cur >= '0' && *dec->cur <= '9');

      is_nv = 1;
    }

  if (!is_nv)
    {
      UV uv;
      int numtype = grok_number (start, dec->cur - start, &uv);
      if (numtype & IS_NUMBER_IN_UV)
        if (numtype & IS_NUMBER_NEG)
          {
            if (uv < (UV)IV_MIN)
              return newSViv (-(IV)uv);
          }
        else
          return newSVuv (uv);
    }

  return newSVnv (Atof (start));

fail:
  return 0;
}

static SV *
decode_av (dec_t *dec)
{
  AV *av = newAV ();

  decode_ws (dec);
  if (*dec->cur == ']')
    ++dec->cur;
  else
    for (;;)
      {
        SV *value;

        value = decode_sv (dec);
        if (!value)
          goto fail;

        av_push (av, value);

        decode_ws (dec);

        if (*dec->cur == ']')
          {
            ++dec->cur;
            break;
          }
        
        if (*dec->cur != ',')
          ERR (", or ] expected while parsing array");

        ++dec->cur;
      }

  return newRV_noinc ((SV *)av);

fail:
  SvREFCNT_dec (av);
  return 0;
}

static SV *
decode_hv (dec_t *dec)
{
  HV *hv = newHV ();

  decode_ws (dec);
  if (*dec->cur == '}')
    ++dec->cur;
  else
    for (;;)
      {
        SV *key, *value;

        decode_ws (dec); EXPECT_CH ('"');

        key = decode_str (dec);
        if (!key)
          goto fail;

        decode_ws (dec); EXPECT_CH (':');

        value = decode_sv (dec);
        if (!value)
          {
            SvREFCNT_dec (key);
            goto fail;
          }

        //TODO: optimise
        hv_store_ent (hv, key, value, 0);

        decode_ws (dec);

        if (*dec->cur == '}')
          {
            ++dec->cur;
            break;
          }

        if (*dec->cur != ',')
          ERR (", or } expected while parsing object/hash");

        ++dec->cur;
      }

  return newRV_noinc ((SV *)hv);

fail:
  SvREFCNT_dec (hv);
  return 0;
}

static SV *
decode_sv (dec_t *dec)
{
  decode_ws (dec);
  switch (*dec->cur)
    {
      case '"': ++dec->cur; return decode_str (dec); 
      case '[': ++dec->cur; return decode_av (dec); 
      case '{': ++dec->cur; return decode_hv (dec);

      case '-':
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
        return decode_num (dec);

      case 't':
        if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
          {
            dec->cur += 4;
            return newSViv (1);
          }
        else
          ERR ("'true' expected");

        break;

      case 'f':
        if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
          {
            dec->cur += 5;
            return newSViv (0);
          }
        else
          ERR ("'false' expected");

        break;

      case 'n':
        if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
          {
            dec->cur += 4;
            return newSVsv (&PL_sv_undef);
          }
        else
          ERR ("'null' expected");

        break;

      default:
        ERR ("malformed json string, neither array, object, number, string or atom");
        break;
    }

fail:
  return 0;
}

static SV *
decode_json (SV *string, UV flags)
{
  SV *sv;

  if (flags & F_UTF8)
    sv_utf8_downgrade (string, 0);
  else
    sv_utf8_upgrade (string);

  SvGROW (string, SvCUR (string) + 1); // should basically be a NOP

  dec_t dec;
  dec.flags = flags;
  dec.cur   = SvPVX (string);
  dec.end   = SvEND (string);
  dec.err   = 0;

  sv = decode_sv (&dec);

  if (!sv)
    {
      IV offset = dec.flags & F_UTF8
                  ? dec.cur - SvPVX (string)
                  : utf8_distance (dec.cur, SvPVX (string));
      SV *uni = sv_newmortal ();

      // horrible hack to silence warning inside pv_uni_display
      COP cop = *PL_curcop;
      cop.cop_warnings = pWARN_NONE;
      ENTER;
      SAVEVPTR (PL_curcop);
      PL_curcop = &cop;
      pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
      LEAVE;

      croak ("%s, at character offset %d (%s)",
             dec.err,
             (int)offset,
             dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
    }

  sv = sv_2mortal (sv);

  if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
    croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");

  return sv;
}

/////////////////////////////////////////////////////////////////////////////
// XS interface functions

MODULE = JSON::XS               PACKAGE = JSON::XS

BOOT:
{
        int i;

        memset (decode_hexdigit, 0xff, 256);
        for (i = 10; i--; )
          decode_hexdigit ['0' + i] = i;

        for (i = 7; i--; )
          {
            decode_hexdigit ['a' + i] = 10 + i;
            decode_hexdigit ['A' + i] = 10 + i;
          }

        json_stash = gv_stashpv ("JSON::XS", 1);
}

PROTOTYPES: DISABLE

SV *new (char *dummy)
        CODE:
        RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
        OUTPUT:
        RETVAL

SV *ascii (SV *self, int enable = 1)
        ALIAS:
        ascii        = F_ASCII
        utf8         = F_UTF8
        indent       = F_INDENT
        canonical    = F_CANONICAL
        space_before = F_SPACE_BEFORE
        space_after  = F_SPACE_AFTER
        pretty       = F_PRETTY
        allow_nonref = F_ALLOW_NONREF
        shrink       = F_SHRINK
        CODE:
{
        UV *uv = SvJSON (self);
        if (enable)
          *uv |=  ix;
        else
          *uv &= ~ix;

        RETVAL = newSVsv (self);
}
        OUTPUT:
        RETVAL

void encode (SV *self, SV *scalar)
        PPCODE:
        XPUSHs (encode_json (scalar, *SvJSON (self)));

void decode (SV *self, SV *jsonstr)
        PPCODE:
        XPUSHs (decode_json (jsonstr, *SvJSON (self)));

PROTOTYPES: ENABLE

void to_json (SV *scalar)
        PPCODE:
        XPUSHs (encode_json (scalar, F_UTF8));

void from_json (SV *jsonstr)
        PPCODE:
        XPUSHs (decode_json (jsonstr, F_UTF8));

Revision:	1.12
Committed:	Sat Mar 24 22:10:08 2007 UTC (17 years, 1 month ago) by root
Branch:	MAIN
Changes since 1.11:	+226 -181 lines
Log Message:	* empty log message *
#	Content
1	#include "EXTERN.h"
2	#include "perl.h"
3	#include "XSUB.h"
4
5	#include "assert.h"
6	#include "string.h"
7	#include "stdlib.h"
8
9	#define F_ASCII 0x00000001
10	#define F_UTF8 0x00000002
11	#define F_INDENT 0x00000004
12	#define F_CANONICAL 0x00000008
13	#define F_SPACE_BEFORE 0x00000010
14	#define F_SPACE_AFTER 0x00000020
15	#define F_ALLOW_NONREF 0x00000080
16	#define F_SHRINK 0x00000100
17
18	#define F_PRETTY F_INDENT \| F_SPACE_BEFORE \| F_SPACE_AFTER
19	#define F_DEFAULT 0
20
21	#define INIT_SIZE 32 // initial scalar size to be allocated
22	#define INDENT_STEP 3 // spaces per indentation level
23
24	#define UTF8_MAX_LEN 11 // for perls UTF-X: max. number of octets per character
25	#define SHORT_STRING_LEN 256 // special-case strings of up to this size
26
27	#define SB do {
28	#define SE } while (0)
29
30	static HV *json_stash; // JSON::XS::
31
32	/////////////////////////////////////////////////////////////////////////////
33	// utility functions
34
35	static UV *
36	SvJSON (SV *sv)
37	{
38	if (!(SvROK (sv) && SvOBJECT (SvRV (sv)) && SvSTASH (SvRV (sv)) == json_stash))
39	croak ("object is not of type JSON::XS");
40
41	return &SvUVX (SvRV (sv));
42	}
43
44	static void
45	shrink (SV *sv)
46	{
47	sv_utf8_downgrade (sv, 1);
48	if (SvLEN (sv) > SvCUR (sv) + 1)
49	{
50	#ifdef SvPV_shrink_to_cur
51	SvPV_shrink_to_cur (sv);
52	#elif defined (SvPV_renew)
53	SvPV_renew (sv, SvCUR (sv) + 1);
54	#endif
55	}
56	}
57
58	/////////////////////////////////////////////////////////////////////////////
59	// encoder
60
61	// structure used for encoding JSON
62	typedef struct
63	{
64	char *cur; // SvPVX (sv) + current output position
65	char *end; // SvEND (sv)
66	SV *sv; // result scalar
67	UV flags; // F_*
68	int indent; // indentation level
69	int max_depth; // max. recursion level
70	} enc_t;
71
72	static void
73	need (enc_t *enc, STRLEN len)
74	{
75	if (enc->cur + len >= enc->end)
76	{
77	STRLEN cur = enc->cur - SvPVX (enc->sv);
78	SvGROW (enc->sv, cur + len + 1);
79	enc->cur = SvPVX (enc->sv) + cur;
80	enc->end = SvPVX (enc->sv) + SvLEN (enc->sv);
81	}
82	}
83
84	static void
85	encode_ch (enc_t *enc, char ch)
86	{
87	need (enc, 1);
88	*enc->cur++ = ch;
89	}
90
91	static void
92	encode_str (enc_t enc, char str, STRLEN len, int is_utf8)
93	{
94	char *end = str + len;
95
96	need (enc, len);
97
98	while (str < end)
99	{
100	unsigned char ch = (unsigned char )str;
101
102	if (ch >= 0x20 && ch < 0x80) // most common case
103	{
104	if (ch == '"') // but with slow exceptions
105	{
106	need (enc, len += 1);
107	*enc->cur++ = '\\';
108	*enc->cur++ = '"';
109	}
110	else if (ch == '\\')
111	{
112	need (enc, len += 1);
113	*enc->cur++ = '\\';
114	*enc->cur++ = '\\';
115	}
116	else
117	*enc->cur++ = ch;
118
119	++str;
120	}
121	else
122	{
123	switch (ch)
124	{
125	case '\010': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'b'; ++str; break;
126	case '\011': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 't'; ++str; break;
127	case '\012': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'n'; ++str; break;
128	case '\014': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'f'; ++str; break;
129	case '\015': need (enc, len += 1); enc->cur++ = '\\'; enc->cur++ = 'r'; ++str; break;
130
131	default:
132	{
133	STRLEN clen;
134	UV uch;
135
136	if (is_utf8)
137	{
138	uch = utf8n_to_uvuni (str, end - str, &clen, UTF8_CHECK_ONLY);
139	if (clen == (STRLEN)-1)
140	croak ("malformed or illegal unicode character in string [%.11s], cannot convert to JSON", str);
141	}
142	else
143	{
144	uch = ch;
145	clen = 1;
146	}
147
148	if (uch > 0x10FFFFUL)
149	croak ("out of range codepoint (0x%lx) encountered, unrepresentable in JSON", (unsigned long)uch);
150
151	if (uch < 0x80 \|\| enc->flags & F_ASCII)
152	{
153	if (uch > 0xFFFFUL)
154	{
155	need (enc, len += 11);
156	sprintf (enc->cur, "\\u%04x\\u%04x",
157	(int)((uch - 0x10000) / 0x400 + 0xD800),
158	(int)((uch - 0x10000) % 0x400 + 0xDC00));
159	enc->cur += 12;
160	}
161	else
162	{
163	static char hexdigit [16] = "0123456789abcdef";
164	need (enc, len += 5);
165	*enc->cur++ = '\\';
166	*enc->cur++ = 'u';
167	*enc->cur++ = hexdigit [ uch >> 12 ];
168	*enc->cur++ = hexdigit [(uch >> 8) & 15];
169	*enc->cur++ = hexdigit [(uch >> 4) & 15];
170	*enc->cur++ = hexdigit [(uch >> 0) & 15];
171	}
172
173	str += clen;
174	}
175	else if (is_utf8)
176	{
177	need (enc, len += clen);
178	do
179	{
180	enc->cur++ = str++;
181	}
182	while (--clen);
183	}
184	else
185	{
186	need (enc, len += UTF8_MAX_LEN - 1); // never more than 11 bytes needed
187	enc->cur = uvuni_to_utf8_flags (enc->cur, uch, 0);
188	++str;
189	}
190	}
191	}
192	}
193
194	--len;
195	}
196	}
197
198	static void
199	encode_indent (enc_t *enc)
200	{
201	if (enc->flags & F_INDENT)
202	{
203	int spaces = enc->indent * INDENT_STEP;
204
205	need (enc, spaces);
206	memset (enc->cur, ' ', spaces);
207	enc->cur += spaces;
208	}
209	}
210
211	static void
212	encode_space (enc_t *enc)
213	{
214	need (enc, 1);
215	encode_ch (enc, ' ');
216	}
217
218	static void
219	encode_nl (enc_t *enc)
220	{
221	if (enc->flags & F_INDENT)
222	{
223	need (enc, 1);
224	encode_ch (enc, '\n');
225	}
226	}
227
228	static void
229	encode_comma (enc_t *enc)
230	{
231	encode_ch (enc, ',');
232
233	if (enc->flags & F_INDENT)
234	encode_nl (enc);
235	else if (enc->flags & F_SPACE_AFTER)
236	encode_space (enc);
237	}
238
239	static void encode_sv (enc_t enc, SV sv);
240
241	static void
242	encode_av (enc_t enc, AV av)
243	{
244	int i, len = av_len (av);
245
246	encode_ch (enc, '['); encode_nl (enc);
247	++enc->indent;
248
249	for (i = 0; i <= len; ++i)
250	{
251	encode_indent (enc);
252	encode_sv (enc, *av_fetch (av, i, 0));
253
254	if (i < len)
255	encode_comma (enc);
256	}
257
258	encode_nl (enc);
259
260	--enc->indent;
261	encode_indent (enc); encode_ch (enc, ']');
262	}
263
264	static void
265	encode_he (enc_t enc, HE he)
266	{
267	encode_ch (enc, '"');
268
269	if (HeKLEN (he) == HEf_SVKEY)
270	{
271	SV *sv = HeSVKEY (he);
272	STRLEN len;
273	char *str;
274
275	SvGETMAGIC (sv);
276	str = SvPV (sv, len);
277
278	encode_str (enc, str, len, SvUTF8 (sv));
279	}
280	else
281	encode_str (enc, HeKEY (he), HeKLEN (he), HeKUTF8 (he));
282
283	encode_ch (enc, '"');
284
285	if (enc->flags & F_SPACE_BEFORE) encode_space (enc);
286	encode_ch (enc, ':');
287	if (enc->flags & F_SPACE_AFTER ) encode_space (enc);
288	encode_sv (enc, HeVAL (he));
289	}
290
291	// compare hash entries, used when all keys are bytestrings
292	static int
293	he_cmp_fast (const void a_, const void b_)
294	{
295	int cmp;
296
297	HE a = (HE **)a_;
298	HE b = (HE **)b_;
299
300	STRLEN la = HeKLEN (a);
301	STRLEN lb = HeKLEN (b);
302
303	if (!(cmp = memcmp (HeKEY (a), HeKEY (b), la < lb ? la : lb)))
304	cmp = la - lb;
305
306	return cmp;
307	}
308
309	// compare hash entries, used when some keys are sv's or utf-x
310	static int
311	he_cmp_slow (const void a, const void b)
312	{
313	return sv_cmp (HeSVKEY_force ((HE )a), HeSVKEY_force ((HE **)b));
314	}
315
316	static void
317	encode_hv (enc_t enc, HV hv)
318	{
319	int count, i;
320
321	encode_ch (enc, '{'); encode_nl (enc); ++enc->indent;
322
323	if ((count = hv_iterinit (hv)))
324	{
325	// for canonical output we have to sort by keys first
326	// actually, this is mostly due to the stupid so-called
327	// security workaround added somewhere in 5.8.x.
328	// that randomises hash orderings
329	if (enc->flags & F_CANONICAL)
330	{
331	HE he, hes [count]; // if your compiler dies here, you need to enable C99 mode
332	int fast = 1;
333
334	i = 0;
335	while ((he = hv_iternext (hv)))
336	{
337	hes [i++] = he;
338	if (HeKLEN (he) < 0 \|\| HeKUTF8 (he))
339	fast = 0;
340	}
341
342	assert (i == count);
343
344	if (fast)
345	qsort (hes, count, sizeof (HE *), he_cmp_fast);
346	else
347	{
348	// hack to forcefully disable "use bytes"
349	COP cop = *PL_curcop;
350	cop.op_private = 0;
351
352	ENTER;
353	SAVETMPS;
354
355	SAVEVPTR (PL_curcop);
356	PL_curcop = &cop;
357
358	qsort (hes, count, sizeof (HE *), he_cmp_slow);
359
360	FREETMPS;
361	LEAVE;
362	}
363
364	for (i = 0; i < count; ++i)
365	{
366	encode_indent (enc);
367	encode_he (enc, hes [i]);
368
369	if (i < count - 1)
370	encode_comma (enc);
371	}
372
373	encode_nl (enc);
374	}
375	else
376	{
377	SV *sv;
378	HE *he = hv_iternext (hv);
379
380	for (;;)
381	{
382	encode_indent (enc);
383	encode_he (enc, he);
384
385	if (!(he = hv_iternext (hv)))
386	break;
387
388	encode_comma (enc);
389	}
390
391	encode_nl (enc);
392	}
393	}
394
395	--enc->indent; encode_indent (enc); encode_ch (enc, '}');
396	}
397
398	static void
399	encode_sv (enc_t enc, SV sv)
400	{
401	SvGETMAGIC (sv);
402
403	if (SvPOKp (sv))
404	{
405	STRLEN len;
406	char *str = SvPV (sv, len);
407	encode_ch (enc, '"');
408	encode_str (enc, str, len, SvUTF8 (sv));
409	encode_ch (enc, '"');
410	}
411	else if (SvNOKp (sv))
412	{
413	need (enc, NV_DIG + 32);
414	Gconvert (SvNVX (sv), NV_DIG, 0, enc->cur);
415	enc->cur += strlen (enc->cur);
416	}
417	else if (SvIOKp (sv))
418	{
419	need (enc, 64);
420	enc->cur +=
421	SvIsUV(sv)
422	? snprintf (enc->cur, 64, "%"UVuf, (UV)SvUVX (sv))
423	: snprintf (enc->cur, 64, "%"IVdf, (IV)SvIVX (sv));
424	}
425	else if (SvROK (sv))
426	{
427	SV *rv = SvRV (sv);
428
429	if (enc->indent >= enc->max_depth)
430	croak ("data structure too deep (hit recursion limit)");
431
432	switch (SvTYPE (rv))
433	{
434	case SVt_PVAV: encode_av (enc, (AV *)rv); break;
435	case SVt_PVHV: encode_hv (enc, (HV *)rv); break;
436
437	default:
438	croak ("encountered %s, but JSON can only represent references to arrays or hashes",
439	SvPV_nolen (sv));
440	}
441	}
442	else if (!SvOK (sv))
443	encode_str (enc, "null", 4, 0);
444	else
445	croak ("encountered perl type (%s,0x%x) that JSON cannot handle, you might want to report this",
446	SvPV_nolen (sv), SvFLAGS (sv));
447	}
448
449	static SV *
450	encode_json (SV *scalar, UV flags)
451	{
452	if (!(flags & F_ALLOW_NONREF) && !SvROK (scalar))
453	croak ("hash- or arrayref expected (not a simple scalar, use allow_nonref to allow this)");
454
455	enc_t enc;
456	enc.flags = flags;
457	enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
458	enc.cur = SvPVX (enc.sv);
459	enc.end = SvEND (enc.sv);
460	enc.indent = 0;
461	enc.max_depth = 0x7fffffffUL;
462
463	SvPOK_only (enc.sv);
464	encode_sv (&enc, scalar);
465
466	if (!(flags & (F_ASCII \| F_UTF8)))
467	SvUTF8_on (enc.sv);
468
469	SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
470
471	if (enc.flags & F_SHRINK)
472	shrink (enc.sv);
473
474	return enc.sv;
475	}
476
477	/////////////////////////////////////////////////////////////////////////////
478	// decoder
479
480	// structure used for decoding JSON
481	typedef struct
482	{
483	char *cur; // current parser pointer
484	char *end; // end of input string
485	const char *err; // parse error, if != 0
486	UV flags; // F_*
487	} dec_t;
488
489	static void
490	decode_ws (dec_t *dec)
491	{
492	for (;;)
493	{
494	char ch = *dec->cur;
495
496	if (ch > 0x20
497	\|\| (ch != 0x20 && ch != 0x0a && ch != 0x0d && ch != 0x09))
498	break;
499
500	++dec->cur;
501	}
502	}
503
504	#define ERR(reason) SB dec->err = reason; goto fail; SE
505	#define EXPECT_CH(ch) SB \
506	if (*dec->cur != ch) \
507	ERR (# ch " expected"); \
508	++dec->cur; \
509	SE
510
511	static SV decode_sv (dec_t dec);
512
513	static signed char decode_hexdigit[256];
514
515	static UV
516	decode_4hex (dec_t *dec)
517	{
518	signed char d1, d2, d3, d4;
519	unsigned char cur = (unsigned char )dec->cur;
520
521	d1 = decode_hexdigit [cur [0]]; if (d1 < 0) ERR ("four hexadecimal digits expected");
522	d2 = decode_hexdigit [cur [1]]; if (d2 < 0) ERR ("four hexadecimal digits expected");
523	d3 = decode_hexdigit [cur [2]]; if (d3 < 0) ERR ("four hexadecimal digits expected");
524	d4 = decode_hexdigit [cur [3]]; if (d4 < 0) ERR ("four hexadecimal digits expected");
525
526	dec->cur += 4;
527
528	return ((UV)d1) << 12
529	\| ((UV)d2) << 8
530	\| ((UV)d3) << 4
531	\| ((UV)d4);
532
533	fail:
534	return (UV)-1;
535	}
536
537	static SV *
538	decode_str (dec_t *dec)
539	{
540	SV *sv = 0;
541	int utf8 = 0;
542
543	do
544	{
545	char buf [SHORT_STRING_LEN + UTF8_MAX_LEN];
546	char *cur = buf;
547
548	do
549	{
550	unsigned char ch = (unsigned char )dec->cur++;
551
552	if (ch == '"')
553	{
554	--dec->cur;
555	break;
556	}
557	else if (ch == '\\')
558	{
559	switch (*dec->cur)
560	{
561	case '\\':
562	case '/':
563	case '"': cur++ = dec->cur++; break;
564
565	case 'b': ++dec->cur; *cur++ = '\010'; break;
566	case 't': ++dec->cur; *cur++ = '\011'; break;
567	case 'n': ++dec->cur; *cur++ = '\012'; break;
568	case 'f': ++dec->cur; *cur++ = '\014'; break;
569	case 'r': ++dec->cur; *cur++ = '\015'; break;
570
571	case 'u':
572	{
573	UV lo, hi;
574	++dec->cur;
575
576	hi = decode_4hex (dec);
577	if (hi == (UV)-1)
578	goto fail;
579
580	// possibly a surrogate pair
581	if (hi >= 0xd800)
582	if (hi < 0xdc00)
583	{
584	if (dec->cur [0] != '\\' \|\| dec->cur [1] != 'u')
585	ERR ("missing low surrogate character in surrogate pair");
586
587	dec->cur += 2;
588
589	lo = decode_4hex (dec);
590	if (lo == (UV)-1)
591	goto fail;
592
593	if (lo < 0xdc00 \|\| lo >= 0xe000)
594	ERR ("surrogate pair expected");
595
596	hi = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
597	}
598	else if (hi < 0xe000)
599	ERR ("missing high surrogate character in surrogate pair");
600
601	if (hi >= 0x80)
602	{
603	utf8 = 1;
604
605	cur = (char *)uvuni_to_utf8_flags (cur, hi, 0);
606	}
607	else
608	*cur++ = hi;
609	}
610	break;
611
612	default:
613	--dec->cur;
614	ERR ("illegal backslash escape sequence in string");
615	}
616	}
617	else if (ch >= 0x20 && ch <= 0x7f)
618	*cur++ = ch;
619	else if (ch >= 0x80)
620	{
621	--dec->cur;
622
623	STRLEN clen;
624	UV uch = utf8n_to_uvuni (dec->cur, dec->end - dec->cur, &clen, UTF8_CHECK_ONLY);
625	if (clen == (STRLEN)-1)
626	ERR ("malformed UTF-8 character in JSON string");
627
628	do
629	{
630	cur++ = dec->cur++;
631	}
632	while (--clen);
633
634	utf8 = 1;
635	}
636	else if (!ch)
637	ERR ("unexpected end of string while parsing json string");
638	else
639	ERR ("invalid character encountered");
640
641	}
642	while (cur < buf + SHORT_STRING_LEN);
643
644	STRLEN len = cur - buf;
645
646	if (sv)
647	{
648	SvGROW (sv, SvCUR (sv) + len + 1);
649	memcpy (SvPVX (sv) + SvCUR (sv), buf, len);
650	SvCUR_set (sv, SvCUR (sv) + len);
651	}
652	else
653	sv = newSVpvn (buf, len);
654	}
655	while (*dec->cur != '"');
656
657	++dec->cur;
658
659	if (sv)
660	{
661	SvPOK_only (sv);
662	*SvEND (sv) = 0;
663
664	if (utf8)
665	SvUTF8_on (sv);
666	}
667	else
668	sv = newSVpvn ("", 0);
669
670	return sv;
671
672	fail:
673	return 0;
674	}
675
676	static SV *
677	decode_num (dec_t *dec)
678	{
679	int is_nv = 0;
680	char *start = dec->cur;
681
682	// [minus]
683	if (*dec->cur == '-')
684	++dec->cur;
685
686	if (*dec->cur == '0')
687	{
688	++dec->cur;
689	if (dec->cur >= '0' && dec->cur <= '9')
690	ERR ("malformed number (leading zero must not be followed by another digit)");
691	}
692	else if (dec->cur < '0' \|\| dec->cur > '9')
693	ERR ("malformed number (no digits after initial minus)");
694	else
695	do
696	{
697	++dec->cur;
698	}
699	while (dec->cur >= '0' && dec->cur <= '9');
700
701	// [frac]
702	if (*dec->cur == '.')
703	{
704	++dec->cur;
705
706	if (dec->cur < '0' \|\| dec->cur > '9')
707	ERR ("malformed number (no digits after decimal point)");
708
709	do
710	{
711	++dec->cur;
712	}
713	while (dec->cur >= '0' && dec->cur <= '9');
714
715	is_nv = 1;
716	}
717
718	// [exp]
719	if (dec->cur == 'e' \|\| dec->cur == 'E')
720	{
721	++dec->cur;
722
723	if (dec->cur == '-' \|\| dec->cur == '+')
724	++dec->cur;
725
726	if (dec->cur < '0' \|\| dec->cur > '9')
727	ERR ("malformed number (no digits after exp sign)");
728
729	do
730	{
731	++dec->cur;
732	}
733	while (dec->cur >= '0' && dec->cur <= '9');
734
735	is_nv = 1;
736	}
737
738	if (!is_nv)
739	{
740	UV uv;
741	int numtype = grok_number (start, dec->cur - start, &uv);
742	if (numtype & IS_NUMBER_IN_UV)
743	if (numtype & IS_NUMBER_NEG)
744	{
745	if (uv < (UV)IV_MIN)
746	return newSViv (-(IV)uv);
747	}
748	else
749	return newSVuv (uv);
750	}
751
752	return newSVnv (Atof (start));
753
754	fail:
755	return 0;
756	}
757
758	static SV *
759	decode_av (dec_t *dec)
760	{
761	AV *av = newAV ();
762
763	decode_ws (dec);
764	if (*dec->cur == ']')
765	++dec->cur;
766	else
767	for (;;)
768	{
769	SV *value;
770
771	value = decode_sv (dec);
772	if (!value)
773	goto fail;
774
775	av_push (av, value);
776
777	decode_ws (dec);
778
779	if (*dec->cur == ']')
780	{
781	++dec->cur;
782	break;
783	}
784
785	if (*dec->cur != ',')
786	ERR (", or ] expected while parsing array");
787
788	++dec->cur;
789	}
790
791	return newRV_noinc ((SV *)av);
792
793	fail:
794	SvREFCNT_dec (av);
795	return 0;
796	}
797
798	static SV *
799	decode_hv (dec_t *dec)
800	{
801	HV *hv = newHV ();
802
803	decode_ws (dec);
804	if (*dec->cur == '}')
805	++dec->cur;
806	else
807	for (;;)
808	{
809	SV key, value;
810
811	decode_ws (dec); EXPECT_CH ('"');
812
813	key = decode_str (dec);
814	if (!key)
815	goto fail;
816
817	decode_ws (dec); EXPECT_CH (':');
818
819	value = decode_sv (dec);
820	if (!value)
821	{
822	SvREFCNT_dec (key);
823	goto fail;
824	}
825
826	//TODO: optimise
827	hv_store_ent (hv, key, value, 0);
828
829	decode_ws (dec);
830
831	if (*dec->cur == '}')
832	{
833	++dec->cur;
834	break;
835	}
836
837	if (*dec->cur != ',')
838	ERR (", or } expected while parsing object/hash");
839
840	++dec->cur;
841	}
842
843	return newRV_noinc ((SV *)hv);
844
845	fail:
846	SvREFCNT_dec (hv);
847	return 0;
848	}
849
850	static SV *
851	decode_sv (dec_t *dec)
852	{
853	decode_ws (dec);
854	switch (*dec->cur)
855	{
856	case '"': ++dec->cur; return decode_str (dec);
857	case '[': ++dec->cur; return decode_av (dec);
858	case '{': ++dec->cur; return decode_hv (dec);
859
860	case '-':
861	case '0': case '1': case '2': case '3': case '4':
862	case '5': case '6': case '7': case '8': case '9':
863	return decode_num (dec);
864
865	case 't':
866	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
867	{
868	dec->cur += 4;
869	return newSViv (1);
870	}
871	else
872	ERR ("'true' expected");
873
874	break;
875
876	case 'f':
877	if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
878	{
879	dec->cur += 5;
880	return newSViv (0);
881	}
882	else
883	ERR ("'false' expected");
884
885	break;
886
887	case 'n':
888	if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
889	{
890	dec->cur += 4;
891	return newSVsv (&PL_sv_undef);
892	}
893	else
894	ERR ("'null' expected");
895
896	break;
897
898	default:
899	ERR ("malformed json string, neither array, object, number, string or atom");
900	break;
901	}
902
903	fail:
904	return 0;
905	}
906
907	static SV *
908	decode_json (SV *string, UV flags)
909	{
910	SV *sv;
911
912	if (flags & F_UTF8)
913	sv_utf8_downgrade (string, 0);
914	else
915	sv_utf8_upgrade (string);
916
917	SvGROW (string, SvCUR (string) + 1); // should basically be a NOP
918
919	dec_t dec;
920	dec.flags = flags;
921	dec.cur = SvPVX (string);
922	dec.end = SvEND (string);
923	dec.err = 0;
924
925	sv = decode_sv (&dec);
926
927	if (!sv)
928	{
929	IV offset = dec.flags & F_UTF8
930	? dec.cur - SvPVX (string)
931	: utf8_distance (dec.cur, SvPVX (string));
932	SV *uni = sv_newmortal ();
933
934	// horrible hack to silence warning inside pv_uni_display
935	COP cop = *PL_curcop;
936	cop.cop_warnings = pWARN_NONE;
937	ENTER;
938	SAVEVPTR (PL_curcop);
939	PL_curcop = &cop;
940	pv_uni_display (uni, dec.cur, dec.end - dec.cur, 20, UNI_DISPLAY_QQ);
941	LEAVE;
942
943	croak ("%s, at character offset %d (%s)",
944	dec.err,
945	(int)offset,
946	dec.cur != dec.end ? SvPV_nolen (uni) : "(end of string)");
947	}
948
949	sv = sv_2mortal (sv);
950
951	if (!(dec.flags & F_ALLOW_NONREF) && !SvROK (sv))
952	croak ("JSON text must be an object or array (but found number, string, true, false or null, use allow_nonref to allow this)");
953
954	return sv;
955	}
956
957	/////////////////////////////////////////////////////////////////////////////
958	// XS interface functions
959
960	MODULE = JSON::XS PACKAGE = JSON::XS
961
962	BOOT:
963	{
964	int i;
965
966	memset (decode_hexdigit, 0xff, 256);
967	for (i = 10; i--; )
968	decode_hexdigit ['0' + i] = i;
969
970	for (i = 7; i--; )
971	{
972	decode_hexdigit ['a' + i] = 10 + i;
973	decode_hexdigit ['A' + i] = 10 + i;
974	}
975
976	json_stash = gv_stashpv ("JSON::XS", 1);
977	}
978
979	PROTOTYPES: DISABLE
980
981	SV new (char dummy)
982	CODE:
983	RETVAL = sv_bless (newRV_noinc (newSVuv (F_DEFAULT)), json_stash);
984	OUTPUT:
985	RETVAL
986
987	SV ascii (SV self, int enable = 1)
988	ALIAS:
989	ascii = F_ASCII
990	utf8 = F_UTF8
991	indent = F_INDENT
992	canonical = F_CANONICAL
993	space_before = F_SPACE_BEFORE
994	space_after = F_SPACE_AFTER
995	pretty = F_PRETTY
996	allow_nonref = F_ALLOW_NONREF
997	shrink = F_SHRINK
998	CODE:
999	{
1000	UV *uv = SvJSON (self);
1001	if (enable)
1002	*uv \|= ix;
1003	else
1004	*uv &= ~ix;
1005
1006	RETVAL = newSVsv (self);
1007	}
1008	OUTPUT:
1009	RETVAL
1010
1011	void encode (SV self, SV scalar)
1012	PPCODE:
1013	XPUSHs (encode_json (scalar, *SvJSON (self)));
1014
1015	void decode (SV self, SV jsonstr)
1016	PPCODE:
1017	XPUSHs (decode_json (jsonstr, *SvJSON (self)));
1018
1019	PROTOTYPES: ENABLE
1020
1021	void to_json (SV *scalar)
1022	PPCODE:
1023	XPUSHs (encode_json (scalar, F_UTF8));
1024
1025	void from_json (SV *jsonstr)
1026	PPCODE:
1027	XPUSHs (decode_json (jsonstr, F_UTF8));
1028