ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.15 by root, Tue Oct 29 18:37:31 2013 UTC vs.
Revision 1.18 by root, Wed Nov 20 01:09:46 2013 UTC

24 24
25// known tags 25// known tags
26enum cbor_tag 26enum cbor_tag
27{ 27{
28 // inofficial extensions (pending iana registration) 28 // inofficial extensions (pending iana registration)
29 CBOR_TAG_PERL_OBJECT = 256, 29 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30 CBOR_TAG_GENERIC_OBJECT = 257, 30 CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 CBOR_TAG_VALUE_SHARABLE = 26, // http://cbor.schmorp.de/value-sharing
32 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33 CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34 CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
31 36
32 // rfc7049 37 // rfc7049
33 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 38 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
34 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any 39 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
35 CBOR_TAG_POS_BIGNUM = 2, // byte string 40 CBOR_TAG_POS_BIGNUM = 2, // byte string
49 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 54 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
50 55
51 CBOR_TAG_MAGIC = 55799 // self-describe cbor 56 CBOR_TAG_MAGIC = 55799 // self-describe cbor
52}; 57};
53 58
54#define F_SHRINK 0x00000200UL 59#define F_SHRINK 0x00000001UL
55#define F_ALLOW_UNKNOWN 0x00002000UL 60#define F_ALLOW_UNKNOWN 0x00000002UL
61#define F_ALLOW_SHARING 0x00000004UL //TODO
62#define F_DEDUP_STRINGS 0x00000008UL //TODO
63#define F_DEDUP_KEYS 0x00000010UL //TODO
56 64
57#define INIT_SIZE 32 // initial scalar size to be allocated 65#define INIT_SIZE 32 // initial scalar size to be allocated
58 66
59#define SB do { 67#define SB do {
60#define SE } while (0) 68#define SE } while (0)
117#endif 125#endif
118 } 126 }
119} 127}
120 128
121///////////////////////////////////////////////////////////////////////////// 129/////////////////////////////////////////////////////////////////////////////
122// fp hell
123
124//TODO
125
126/////////////////////////////////////////////////////////////////////////////
127// encoder 130// encoder
128 131
129// structure used for encoding CBOR 132// structure used for encoding CBOR
130typedef struct 133typedef struct
131{ 134{
132 char *cur; // SvPVX (sv) + current output position 135 char *cur; // SvPVX (sv) + current output position
133 char *end; // SvEND (sv) 136 char *end; // SvEND (sv)
134 SV *sv; // result scalar 137 SV *sv; // result scalar
135 CBOR cbor; 138 CBOR cbor;
136 U32 depth; // recursion level 139 U32 depth; // recursion level
140 HV *stringref; // string => index, or 0
141 HV *sharable; // ptr => index, or 0
142 HV *sharable_idx;
137} enc_t; 143} enc_t;
138 144
139ecb_inline void 145ecb_inline void
140need (enc_t *enc, STRLEN len) 146need (enc_t *enc, STRLEN len)
141{ 147{
202 need (enc, len); 208 need (enc, len);
203 memcpy (enc->cur, str, len); 209 memcpy (enc->cur, str, len);
204 enc->cur += len; 210 enc->cur += len;
205} 211}
206 212
213ecb_inline void
214encode_tag (enc_t *enc, UV tag)
215{
216 encode_uint (enc, 0xc0, tag);
217}
218
219static int
220encode_sharable2 (enc_t *enc, SV *sv)
221{
222 if (!enc->sharable)
223 enc->sharable = (HV *)sv_2mortal ((SV *)newHV ());
224
225 SV **svp = hv_fetch (enc->sharable, &sv, sizeof (sv), 1);
226
227 if (SvOK (*svp))
228 {
229 encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
230 encode_uint (enc, 0x00, SvUV (*svp));
231
232 return 1;
233 }
234 else
235 {
236 sv_setuv (*svp, enc->sharable_idx++);
237 encode_tag (enc, CBOR_TAG_VALUE_SHARABLE);
238
239 return 0;
240 }
241}
242
243ecb_inline int
244encode_sharable (enc_t *enc, SV *sv)
245{
246 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
247 && ecb_expect_false (SvREFCNT (sv) > 1))
248 return encode_sharable2 (enc, sv);
249
250 return 0;
251}
252
207static void encode_sv (enc_t *enc, SV *sv); 253static void encode_sv (enc_t *enc, SV *sv);
208 254
209static void 255static void
210encode_av (enc_t *enc, AV *av) 256encode_av (enc_t *enc, AV *av)
211{ 257{
267{ 313{
268 svtype svt; 314 svtype svt;
269 315
270 SvGETMAGIC (sv); 316 SvGETMAGIC (sv);
271 svt = SvTYPE (sv); 317 svt = SvTYPE (sv);
318
319 if (encode_sharable (enc, sv))
320 return;
272 321
273 if (ecb_expect_false (SvOBJECT (sv))) 322 if (ecb_expect_false (SvOBJECT (sv)))
274 { 323 {
275 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash 324 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
276 ? types_boolean_stash 325 ? types_boolean_stash
336 385
337 // catch this surprisingly common error 386 // catch this surprisingly common error
338 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 387 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
339 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 388 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
340 389
341 encode_uint (enc, 0xc0, CBOR_TAG_PERL_OBJECT); 390 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
342 encode_uint (enc, 0x80, count + 1); 391 encode_uint (enc, 0x80, count + 1);
343 encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 392 encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
344 393
345 while (count) 394 while (count)
346 encode_sv (enc, SP[1 - count--]); 395 encode_sv (enc, SP[1 - count--]);
355 } 404 }
356 else if (svt == SVt_PVHV) 405 else if (svt == SVt_PVHV)
357 encode_hv (enc, (HV *)sv); 406 encode_hv (enc, (HV *)sv);
358 else if (svt == SVt_PVAV) 407 else if (svt == SVt_PVAV)
359 encode_av (enc, (AV *)sv); 408 encode_av (enc, (AV *)sv);
360 else if (svt < SVt_PVAV)
361 {
362 STRLEN len = 0;
363 char *pv = svt ? SvPV (sv, len) : 0;
364
365 if (len == 1 && *pv == '1')
366 encode_ch (enc, 0xe0 | 21);
367 else if (len == 1 && *pv == '0')
368 encode_ch (enc, 0xe0 | 20);
369 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
370 encode_ch (enc, 0xe0 | 23);
371 else
372 croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
373 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
374 }
375 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
376 encode_ch (enc, 0xe0 | 23);
377 else 409 else
378 croak ("encountered %s, but CBOR can only represent references to arrays or hashes", 410 {
379 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 411 encode_tag (enc, CBOR_TAG_INDIRECTION);
412 encode_sv (enc, sv);
413 }
380} 414}
381 415
382static void 416static void
383encode_nv (enc_t *enc, SV *sv) 417encode_nv (enc_t *enc, SV *sv)
384{ 418{
417 451
418static void 452static void
419encode_sv (enc_t *enc, SV *sv) 453encode_sv (enc_t *enc, SV *sv)
420{ 454{
421 SvGETMAGIC (sv); 455 SvGETMAGIC (sv);
456
457 if (encode_sharable (enc, sv))
458 return;
422 459
423 if (SvPOKp (sv)) 460 if (SvPOKp (sv))
424 { 461 {
425 STRLEN len; 462 STRLEN len;
426 char *str = SvPV (sv, len); 463 char *str = SvPV (sv, len);
449} 486}
450 487
451static SV * 488static SV *
452encode_cbor (SV *scalar, CBOR *cbor) 489encode_cbor (SV *scalar, CBOR *cbor)
453{ 490{
454 enc_t enc; 491 enc_t enc = { };
455 492
456 enc.cbor = *cbor; 493 enc.cbor = *cbor;
457 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 494 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
458 enc.cur = SvPVX (enc.sv); 495 enc.cur = SvPVX (enc.sv);
459 enc.end = SvEND (enc.sv); 496 enc.end = SvEND (enc.sv);
460 enc.depth = 0;
461 497
462 SvPOK_only (enc.sv); 498 SvPOK_only (enc.sv);
463 encode_sv (&enc, scalar); 499 encode_sv (&enc, scalar);
464 500
465 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 501 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
481 U8 *end; // end of input string 517 U8 *end; // end of input string
482 const char *err; // parse error, if != 0 518 const char *err; // parse error, if != 0
483 CBOR cbor; 519 CBOR cbor;
484 U32 depth; // recursion depth 520 U32 depth; // recursion depth
485 U32 maxdepth; // recursion depth limit 521 U32 maxdepth; // recursion depth limit
522 AV *sharable;
486} dec_t; 523} dec_t;
487 524
488#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 525#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
489 526
490#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 527#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
584 SvREFCNT_dec (av); 621 SvREFCNT_dec (av);
585 DEC_DEC_DEPTH; 622 DEC_DEC_DEPTH;
586 return &PL_sv_undef; 623 return &PL_sv_undef;
587} 624}
588 625
626static void
627decode_he (dec_t *dec, HV *hv)
628{
629 // for speed reasons, we specialcase single-string
630 // byte or utf-8 strings as keys.
631
632 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
633 {
634 I32 len = decode_uint (dec);
635 char *key = (char *)dec->cur;
636
637 dec->cur += len;
638
639 hv_store (hv, key, len, decode_sv (dec), 0);
640 }
641 else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
642 {
643 I32 len = decode_uint (dec);
644 char *key = (char *)dec->cur;
645
646 dec->cur += len;
647
648 hv_store (hv, key, -len, decode_sv (dec), 0);
649 }
650 else
651 {
652 SV *k = decode_sv (dec);
653 SV *v = decode_sv (dec);
654
655 hv_store_ent (hv, k, v, 0);
656 SvREFCNT_dec (k);
657 }
658}
659
589static SV * 660static SV *
590decode_hv (dec_t *dec) 661decode_hv (dec_t *dec)
591{ 662{
592 HV *hv = newHV (); 663 HV *hv = newHV ();
593 664
605 { 676 {
606 ++dec->cur; 677 ++dec->cur;
607 break; 678 break;
608 } 679 }
609 680
610 SV *k = decode_sv (dec); 681 decode_he (dec, hv);
611 SV *v = decode_sv (dec);
612
613 hv_store_ent (hv, k, v, 0);
614 SvREFCNT_dec (k);
615 } 682 }
616 } 683 }
617 else 684 else
618 { 685 {
619 int len = decode_uint (dec); 686 int pairs = decode_uint (dec);
620 687
621 while (len--) 688 while (pairs--)
622 { 689 decode_he (dec, hv);
623 SV *k = decode_sv (dec);
624 SV *v = decode_sv (dec);
625
626 hv_store_ent (hv, k, v, 0);
627 SvREFCNT_dec (k);
628 }
629 } 690 }
630 691
631 DEC_DEC_DEPTH; 692 DEC_DEC_DEPTH;
632 return newRV_noinc ((SV *)hv); 693 return newRV_noinc ((SV *)hv);
633 694
685decode_tagged (dec_t *dec) 746decode_tagged (dec_t *dec)
686{ 747{
687 UV tag = decode_uint (dec); 748 UV tag = decode_uint (dec);
688 SV *sv = decode_sv (dec); 749 SV *sv = decode_sv (dec);
689 750
690 if (tag == CBOR_TAG_MAGIC) 751 switch (tag)
752 {
753 case CBOR_TAG_MAGIC:
691 return sv; 754 return sv;
692 else if (tag == CBOR_TAG_PERL_OBJECT) 755
693 { 756 case CBOR_TAG_INDIRECTION:
757 return newRV_noinc (sv);
758
759 case CBOR_TAG_VALUE_SHARABLE:
760 if (ecb_expect_false (!dec->sharable))
761 dec->sharable = (AV *)sv_2mortal ((SV *)newAV ());
762
763 av_push (dec->sharable, SvREFCNT_inc_NN (sv));
764
765 return sv;
766
767 case CBOR_TAG_VALUE_SHAREDREF:
768 {
769 // TODO: should verify that the sv atcually was a CBOR unsigned integer
770 UV idx = SvUV (sv);
771
772 if (!dec->sharable || idx > AvFILLp (dec->sharable))
773 ERR ("corrupted CBOR data (sharedref index out of bounds)");
774
775 SvREFCNT_dec (sv);
776
777 return SvREFCNT_inc_NN (AvARRAY (dec->sharable)[idx]);
778 }
779
780 case CBOR_TAG_PERL_OBJECT:
781 {
694 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV) 782 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
695 ERR ("corrupted CBOR data (non-array perl object)"); 783 ERR ("corrupted CBOR data (non-array perl object)");
696 784
697 AV *av = (AV *)SvRV (sv); 785 AV *av = (AV *)SvRV (sv);
698 int len = av_len (av) + 1; 786 int len = av_len (av) + 1;
699 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0); 787 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
700 788
701 if (!stash) 789 if (!stash)
702 ERR ("cannot decode perl-object (package does not exist)"); 790 ERR ("cannot decode perl-object (package does not exist)");
703 791
704 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); 792 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
705 793
706 if (!method) 794 if (!method)
707 ERR ("cannot decode perl-object (package does not have a THAW method)"); 795 ERR ("cannot decode perl-object (package does not have a THAW method)");
708 796
709 dSP; 797 dSP;
710 798
711 ENTER; SAVETMPS; PUSHMARK (SP); 799 ENTER; SAVETMPS; PUSHMARK (SP);
712 EXTEND (SP, len + 1); 800 EXTEND (SP, len + 1);
713 // we re-bless the reference to get overload and other niceties right 801 // we re-bless the reference to get overload and other niceties right
714 PUSHs (*av_fetch (av, 0, 1)); 802 PUSHs (*av_fetch (av, 0, 1));
715 PUSHs (sv_cbor); 803 PUSHs (sv_cbor);
716 804
717 int i; 805 int i;
718 806
719 for (i = 1; i < len; ++i) 807 for (i = 1; i < len; ++i)
720 PUSHs (*av_fetch (av, i, 1)); 808 PUSHs (*av_fetch (av, i, 1));
721 809
722 PUTBACK; 810 PUTBACK;
723 call_sv ((SV *)GvCV (method), G_SCALAR); 811 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
724 SPAGAIN; 812 SPAGAIN;
725 813
814 if (SvTRUE (ERRSV))
815 {
816 FREETMPS; LEAVE;
817 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
818 }
819
726 SvREFCNT_dec (sv); 820 SvREFCNT_dec (sv);
727 sv = SvREFCNT_inc (POPs); 821 sv = SvREFCNT_inc (POPs);
728 822
729 PUTBACK; 823 PUTBACK;
730 824
731 FREETMPS; LEAVE; 825 FREETMPS; LEAVE;
732 826
733 return sv; 827 return sv;
734 } 828 }
735 else 829
736 { 830 default:
831 {
737 AV *av = newAV (); 832 AV *av = newAV ();
738 av_push (av, newSVuv (tag)); 833 av_push (av, newSVuv (tag));
739 av_push (av, sv); 834 av_push (av, sv);
740 835
741 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 836 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
742 ? cbor_tagged_stash 837 ? cbor_tagged_stash
743 : gv_stashpv ("CBOR::XS::Tagged" , 1); 838 : gv_stashpv ("CBOR::XS::Tagged" , 1);
744 839
745 return sv_bless (newRV_noinc ((SV *)av), tagged_stash); 840 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
841 }
746 } 842 }
747 843
748fail: 844fail:
749 SvREFCNT_dec (sv); 845 SvREFCNT_dec (sv);
750 return &PL_sv_undef; 846 return &PL_sv_undef;
842} 938}
843 939
844static SV * 940static SV *
845decode_cbor (SV *string, CBOR *cbor, char **offset_return) 941decode_cbor (SV *string, CBOR *cbor, char **offset_return)
846{ 942{
847 dec_t dec; 943 dec_t dec = { };
848 SV *sv; 944 SV *sv;
945 STRLEN len;
946 char *data = SvPVbyte (string, len);
849 947
850 /* work around bugs in 5.10 where manipulating magic values
851 * makes perl ignore the magic in subsequent accesses.
852 * also make a copy of non-PV values, to get them into a clean
853 * state (SvPV should do that, but it's buggy, see below).
854 */
855 /*SvGETMAGIC (string);*/
856 if (SvMAGICAL (string) || !SvPOK (string))
857 string = sv_2mortal (newSVsv (string));
858
859 SvUPGRADE (string, SVt_PV);
860
861 /* work around a bug in perl 5.10, which causes SvCUR to fail an
862 * assertion with -DDEBUGGING, although SvCUR is documented to
863 * return the xpv_cur field which certainly exists after upgrading.
864 * according to nicholas clark, calling SvPOK fixes this.
865 * But it doesn't fix it, so try another workaround, call SvPV_nolen
866 * and hope for the best.
867 * Damnit, SvPV_nolen still trips over yet another assertion. This
868 * assertion business is seriously broken, try yet another workaround
869 * for the broken -DDEBUGGING.
870 */
871 {
872#ifdef DEBUGGING
873 STRLEN offset = SvOK (string) ? sv_len (string) : 0;
874#else
875 STRLEN offset = SvCUR (string);
876#endif
877
878 if (offset > cbor->max_size && cbor->max_size) 948 if (len > cbor->max_size && cbor->max_size)
879 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu", 949 croak ("attempted decode of CBOR text of %lu bytes size, but max_size is set to %lu",
880 (unsigned long)SvCUR (string), (unsigned long)cbor->max_size); 950 (unsigned long)len, (unsigned long)cbor->max_size);
881 }
882
883 sv_utf8_downgrade (string, 0);
884 951
885 dec.cbor = *cbor; 952 dec.cbor = *cbor;
886 dec.cur = (U8 *)SvPVX (string); 953 dec.cur = (U8 *)data;
887 dec.end = (U8 *)SvEND (string); 954 dec.end = (U8 *)data + len;
888 dec.err = 0;
889 dec.depth = 0;
890 955
891 sv = decode_sv (&dec); 956 sv = decode_sv (&dec);
892 957
893 if (offset_return) 958 if (offset_return)
894 *offset_return = dec.cur; 959 *offset_return = dec.cur;
898 dec.err = "garbage after CBOR object"; 963 dec.err = "garbage after CBOR object";
899 964
900 if (dec.err) 965 if (dec.err)
901 { 966 {
902 SvREFCNT_dec (sv); 967 SvREFCNT_dec (sv);
903 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)SvPVX (string), (int)(uint8_t)*dec.cur); 968 croak ("%s, at offset %d (octet 0x%02x)", dec.err, dec.cur - (U8 *)data, (int)(uint8_t)*dec.cur);
904 } 969 }
905 970
906 sv = sv_2mortal (sv); 971 sv = sv_2mortal (sv);
907 972
908 return sv; 973 return sv;
952 1017
953void shrink (CBOR *self, int enable = 1) 1018void shrink (CBOR *self, int enable = 1)
954 ALIAS: 1019 ALIAS:
955 shrink = F_SHRINK 1020 shrink = F_SHRINK
956 allow_unknown = F_ALLOW_UNKNOWN 1021 allow_unknown = F_ALLOW_UNKNOWN
1022 allow_sharing = F_ALLOW_SHARING
1023 dedup_keys = F_DEDUP_KEYS
1024 dedup_strings = F_DEDUP_STRINGS
957 PPCODE: 1025 PPCODE:
958{ 1026{
959 if (enable) 1027 if (enable)
960 self->flags |= ix; 1028 self->flags |= ix;
961 else 1029 else
966 1034
967void get_shrink (CBOR *self) 1035void get_shrink (CBOR *self)
968 ALIAS: 1036 ALIAS:
969 get_shrink = F_SHRINK 1037 get_shrink = F_SHRINK
970 get_allow_unknown = F_ALLOW_UNKNOWN 1038 get_allow_unknown = F_ALLOW_UNKNOWN
1039 get_allow_sharing = F_ALLOW_SHARING
1040 get_dedup_keys = F_DEDUP_KEYS
1041 get_dedup_strings = F_DEDUP_STRINGS
971 PPCODE: 1042 PPCODE:
972 XPUSHs (boolSV (self->flags & ix)); 1043 XPUSHs (boolSV (self->flags & ix));
973 1044
974void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1045void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
975 PPCODE: 1046 PPCODE:

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines