ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.17 by root, Tue Oct 29 22:04:52 2013 UTC vs.
Revision 1.18 by root, Wed Nov 20 01:09:46 2013 UTC

24 24
25// known tags 25// known tags
26enum cbor_tag 26enum cbor_tag
27{ 27{
28 // inofficial extensions (pending iana registration) 28 // inofficial extensions (pending iana registration)
29 CBOR_TAG_PERL_OBJECT = 256, 29 CBOR_TAG_PERL_OBJECT = 24, // http://cbor.schmorp.de/perl-object
30 CBOR_TAG_GENERIC_OBJECT = 257, 30 CBOR_TAG_GENERIC_OBJECT = 25, // http://cbor.schmorp.de/generic-object
31 CBOR_TAG_VALUE_SHARABLE = 26, // http://cbor.schmorp.de/value-sharing
32 CBOR_TAG_VALUE_SHAREDREF = 27, // http://cbor.schmorp.de/value-sharing
33 CBOR_TAG_STRINGREF_NAMESPACE = 65537, // http://cbor.schmorp.de/stringref
34 CBOR_TAG_STRINGREF = 28, // http://cbor.schmorp.de/stringref
35 CBOR_TAG_INDIRECTION = 22098, // http://cbor.schmorp.de/indirection
31 36
32 // rfc7049 37 // rfc7049
33 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8 38 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
34 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any 39 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
35 CBOR_TAG_POS_BIGNUM = 2, // byte string 40 CBOR_TAG_POS_BIGNUM = 2, // byte string
49 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 54 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
50 55
51 CBOR_TAG_MAGIC = 55799 // self-describe cbor 56 CBOR_TAG_MAGIC = 55799 // self-describe cbor
52}; 57};
53 58
54#define F_SHRINK 0x00000200UL 59#define F_SHRINK 0x00000001UL
55#define F_ALLOW_UNKNOWN 0x00002000UL 60#define F_ALLOW_UNKNOWN 0x00000002UL
61#define F_ALLOW_SHARING 0x00000004UL //TODO
62#define F_DEDUP_STRINGS 0x00000008UL //TODO
63#define F_DEDUP_KEYS 0x00000010UL //TODO
56 64
57#define INIT_SIZE 32 // initial scalar size to be allocated 65#define INIT_SIZE 32 // initial scalar size to be allocated
58 66
59#define SB do { 67#define SB do {
60#define SE } while (0) 68#define SE } while (0)
117#endif 125#endif
118 } 126 }
119} 127}
120 128
121///////////////////////////////////////////////////////////////////////////// 129/////////////////////////////////////////////////////////////////////////////
122// fp hell
123
124//TODO
125
126/////////////////////////////////////////////////////////////////////////////
127// encoder 130// encoder
128 131
129// structure used for encoding CBOR 132// structure used for encoding CBOR
130typedef struct 133typedef struct
131{ 134{
132 char *cur; // SvPVX (sv) + current output position 135 char *cur; // SvPVX (sv) + current output position
133 char *end; // SvEND (sv) 136 char *end; // SvEND (sv)
134 SV *sv; // result scalar 137 SV *sv; // result scalar
135 CBOR cbor; 138 CBOR cbor;
136 U32 depth; // recursion level 139 U32 depth; // recursion level
140 HV *stringref; // string => index, or 0
141 HV *sharable; // ptr => index, or 0
142 HV *sharable_idx;
137} enc_t; 143} enc_t;
138 144
139ecb_inline void 145ecb_inline void
140need (enc_t *enc, STRLEN len) 146need (enc_t *enc, STRLEN len)
141{ 147{
202 need (enc, len); 208 need (enc, len);
203 memcpy (enc->cur, str, len); 209 memcpy (enc->cur, str, len);
204 enc->cur += len; 210 enc->cur += len;
205} 211}
206 212
213ecb_inline void
214encode_tag (enc_t *enc, UV tag)
215{
216 encode_uint (enc, 0xc0, tag);
217}
218
219static int
220encode_sharable2 (enc_t *enc, SV *sv)
221{
222 if (!enc->sharable)
223 enc->sharable = (HV *)sv_2mortal ((SV *)newHV ());
224
225 SV **svp = hv_fetch (enc->sharable, &sv, sizeof (sv), 1);
226
227 if (SvOK (*svp))
228 {
229 encode_tag (enc, CBOR_TAG_VALUE_SHAREDREF);
230 encode_uint (enc, 0x00, SvUV (*svp));
231
232 return 1;
233 }
234 else
235 {
236 sv_setuv (*svp, enc->sharable_idx++);
237 encode_tag (enc, CBOR_TAG_VALUE_SHARABLE);
238
239 return 0;
240 }
241}
242
243ecb_inline int
244encode_sharable (enc_t *enc, SV *sv)
245{
246 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_SHARING)
247 && ecb_expect_false (SvREFCNT (sv) > 1))
248 return encode_sharable2 (enc, sv);
249
250 return 0;
251}
252
207static void encode_sv (enc_t *enc, SV *sv); 253static void encode_sv (enc_t *enc, SV *sv);
208 254
209static void 255static void
210encode_av (enc_t *enc, AV *av) 256encode_av (enc_t *enc, AV *av)
211{ 257{
267{ 313{
268 svtype svt; 314 svtype svt;
269 315
270 SvGETMAGIC (sv); 316 SvGETMAGIC (sv);
271 svt = SvTYPE (sv); 317 svt = SvTYPE (sv);
318
319 if (encode_sharable (enc, sv))
320 return;
272 321
273 if (ecb_expect_false (SvOBJECT (sv))) 322 if (ecb_expect_false (SvOBJECT (sv)))
274 { 323 {
275 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash 324 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
276 ? types_boolean_stash 325 ? types_boolean_stash
336 385
337 // catch this surprisingly common error 386 // catch this surprisingly common error
338 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv) 387 if (count == 1 && SvROK (TOPs) && SvRV (TOPs) == sv)
339 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash)); 388 croak ("%s::FREEZE(CBOR) method returned same object as was passed instead of a new one", HvNAME (stash));
340 389
341 encode_uint (enc, 0xc0, CBOR_TAG_PERL_OBJECT); 390 encode_tag (enc, CBOR_TAG_PERL_OBJECT);
342 encode_uint (enc, 0x80, count + 1); 391 encode_uint (enc, 0x80, count + 1);
343 encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); 392 encode_str (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash));
344 393
345 while (count) 394 while (count)
346 encode_sv (enc, SP[1 - count--]); 395 encode_sv (enc, SP[1 - count--]);
355 } 404 }
356 else if (svt == SVt_PVHV) 405 else if (svt == SVt_PVHV)
357 encode_hv (enc, (HV *)sv); 406 encode_hv (enc, (HV *)sv);
358 else if (svt == SVt_PVAV) 407 else if (svt == SVt_PVAV)
359 encode_av (enc, (AV *)sv); 408 encode_av (enc, (AV *)sv);
360 else if (svt < SVt_PVAV)
361 {
362 STRLEN len = 0;
363 char *pv = svt ? SvPV (sv, len) : 0;
364
365 if (len == 1 && *pv == '1')
366 encode_ch (enc, 0xe0 | 21);
367 else if (len == 1 && *pv == '0')
368 encode_ch (enc, 0xe0 | 20);
369 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
370 encode_ch (enc, 0xe0 | 23);
371 else
372 croak ("cannot encode reference to scalar '%s' unless the scalar is 0 or 1",
373 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
374 }
375 else if (enc->cbor.flags & F_ALLOW_UNKNOWN)
376 encode_ch (enc, 0xe0 | 23);
377 else 409 else
378 croak ("encountered %s, but CBOR can only represent references to arrays or hashes", 410 {
379 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 411 encode_tag (enc, CBOR_TAG_INDIRECTION);
412 encode_sv (enc, sv);
413 }
380} 414}
381 415
382static void 416static void
383encode_nv (enc_t *enc, SV *sv) 417encode_nv (enc_t *enc, SV *sv)
384{ 418{
417 451
418static void 452static void
419encode_sv (enc_t *enc, SV *sv) 453encode_sv (enc_t *enc, SV *sv)
420{ 454{
421 SvGETMAGIC (sv); 455 SvGETMAGIC (sv);
456
457 if (encode_sharable (enc, sv))
458 return;
422 459
423 if (SvPOKp (sv)) 460 if (SvPOKp (sv))
424 { 461 {
425 STRLEN len; 462 STRLEN len;
426 char *str = SvPV (sv, len); 463 char *str = SvPV (sv, len);
449} 486}
450 487
451static SV * 488static SV *
452encode_cbor (SV *scalar, CBOR *cbor) 489encode_cbor (SV *scalar, CBOR *cbor)
453{ 490{
454 enc_t enc; 491 enc_t enc = { };
455 492
456 enc.cbor = *cbor; 493 enc.cbor = *cbor;
457 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 494 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
458 enc.cur = SvPVX (enc.sv); 495 enc.cur = SvPVX (enc.sv);
459 enc.end = SvEND (enc.sv); 496 enc.end = SvEND (enc.sv);
460 enc.depth = 0;
461 497
462 SvPOK_only (enc.sv); 498 SvPOK_only (enc.sv);
463 encode_sv (&enc, scalar); 499 encode_sv (&enc, scalar);
464 500
465 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 501 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
481 U8 *end; // end of input string 517 U8 *end; // end of input string
482 const char *err; // parse error, if != 0 518 const char *err; // parse error, if != 0
483 CBOR cbor; 519 CBOR cbor;
484 U32 depth; // recursion depth 520 U32 depth; // recursion depth
485 U32 maxdepth; // recursion depth limit 521 U32 maxdepth; // recursion depth limit
522 AV *sharable;
486} dec_t; 523} dec_t;
487 524
488#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 525#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
489 526
490#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 527#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
709decode_tagged (dec_t *dec) 746decode_tagged (dec_t *dec)
710{ 747{
711 UV tag = decode_uint (dec); 748 UV tag = decode_uint (dec);
712 SV *sv = decode_sv (dec); 749 SV *sv = decode_sv (dec);
713 750
714 if (tag == CBOR_TAG_MAGIC) 751 switch (tag)
752 {
753 case CBOR_TAG_MAGIC:
715 return sv; 754 return sv;
716 else if (tag == CBOR_TAG_PERL_OBJECT)
717 {
718 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
719 ERR ("corrupted CBOR data (non-array perl object)");
720 755
721 AV *av = (AV *)SvRV (sv); 756 case CBOR_TAG_INDIRECTION:
722 int len = av_len (av) + 1; 757 return newRV_noinc (sv);
723 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
724 758
725 if (!stash) 759 case CBOR_TAG_VALUE_SHARABLE:
726 ERR ("cannot decode perl-object (package does not exist)"); 760 if (ecb_expect_false (!dec->sharable))
761 dec->sharable = (AV *)sv_2mortal ((SV *)newAV ());
727 762
728 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0); 763 av_push (dec->sharable, SvREFCNT_inc_NN (sv));
729
730 if (!method)
731 ERR ("cannot decode perl-object (package does not have a THAW method)");
732
733 dSP;
734 764
735 ENTER; SAVETMPS; PUSHMARK (SP); 765 return sv;
736 EXTEND (SP, len + 1);
737 // we re-bless the reference to get overload and other niceties right
738 PUSHs (*av_fetch (av, 0, 1));
739 PUSHs (sv_cbor);
740 766
741 int i; 767 case CBOR_TAG_VALUE_SHAREDREF:
742
743 for (i = 1; i < len; ++i)
744 PUSHs (*av_fetch (av, i, 1));
745
746 PUTBACK;
747 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
748 SPAGAIN;
749
750 if (SvTRUE (ERRSV))
751 { 768 {
769 // TODO: should verify that the sv atcually was a CBOR unsigned integer
770 UV idx = SvUV (sv);
771
772 if (!dec->sharable || idx > AvFILLp (dec->sharable))
773 ERR ("corrupted CBOR data (sharedref index out of bounds)");
774
775 SvREFCNT_dec (sv);
776
777 return SvREFCNT_inc_NN (AvARRAY (dec->sharable)[idx]);
778 }
779
780 case CBOR_TAG_PERL_OBJECT:
781 {
782 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
783 ERR ("corrupted CBOR data (non-array perl object)");
784
785 AV *av = (AV *)SvRV (sv);
786 int len = av_len (av) + 1;
787 HV *stash = gv_stashsv (*av_fetch (av, 0, 1), 0);
788
789 if (!stash)
790 ERR ("cannot decode perl-object (package does not exist)");
791
792 GV *method = gv_fetchmethod_autoload (stash, "THAW", 0);
793
794 if (!method)
795 ERR ("cannot decode perl-object (package does not have a THAW method)");
796
797 dSP;
798
799 ENTER; SAVETMPS; PUSHMARK (SP);
800 EXTEND (SP, len + 1);
801 // we re-bless the reference to get overload and other niceties right
802 PUSHs (*av_fetch (av, 0, 1));
803 PUSHs (sv_cbor);
804
805 int i;
806
807 for (i = 1; i < len; ++i)
808 PUSHs (*av_fetch (av, i, 1));
809
810 PUTBACK;
811 call_sv ((SV *)GvCV (method), G_SCALAR | G_EVAL);
812 SPAGAIN;
813
814 if (SvTRUE (ERRSV))
815 {
816 FREETMPS; LEAVE;
817 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
818 }
819
820 SvREFCNT_dec (sv);
821 sv = SvREFCNT_inc (POPs);
822
823 PUTBACK;
824
752 FREETMPS; LEAVE; 825 FREETMPS; LEAVE;
753 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV)))); 826
827 return sv;
754 } 828 }
755 829
756 SvREFCNT_dec (sv); 830 default:
757 sv = SvREFCNT_inc (POPs); 831 {
758
759 PUTBACK;
760
761 FREETMPS; LEAVE;
762
763 return sv;
764 }
765 else
766 {
767 AV *av = newAV (); 832 AV *av = newAV ();
768 av_push (av, newSVuv (tag)); 833 av_push (av, newSVuv (tag));
769 av_push (av, sv); 834 av_push (av, sv);
770 835
771 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 836 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
772 ? cbor_tagged_stash 837 ? cbor_tagged_stash
773 : gv_stashpv ("CBOR::XS::Tagged" , 1); 838 : gv_stashpv ("CBOR::XS::Tagged" , 1);
774 839
775 return sv_bless (newRV_noinc ((SV *)av), tagged_stash); 840 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
841 }
776 } 842 }
777 843
778fail: 844fail:
779 SvREFCNT_dec (sv); 845 SvREFCNT_dec (sv);
780 return &PL_sv_undef; 846 return &PL_sv_undef;
872} 938}
873 939
874static SV * 940static SV *
875decode_cbor (SV *string, CBOR *cbor, char **offset_return) 941decode_cbor (SV *string, CBOR *cbor, char **offset_return)
876{ 942{
877 dec_t dec; 943 dec_t dec = { };
878 SV *sv; 944 SV *sv;
879 STRLEN len; 945 STRLEN len;
880 char *data = SvPVbyte (string, len); 946 char *data = SvPVbyte (string, len);
881 947
882 if (len > cbor->max_size && cbor->max_size) 948 if (len > cbor->max_size && cbor->max_size)
884 (unsigned long)len, (unsigned long)cbor->max_size); 950 (unsigned long)len, (unsigned long)cbor->max_size);
885 951
886 dec.cbor = *cbor; 952 dec.cbor = *cbor;
887 dec.cur = (U8 *)data; 953 dec.cur = (U8 *)data;
888 dec.end = (U8 *)data + len; 954 dec.end = (U8 *)data + len;
889 dec.err = 0;
890 dec.depth = 0;
891 955
892 sv = decode_sv (&dec); 956 sv = decode_sv (&dec);
893 957
894 if (offset_return) 958 if (offset_return)
895 *offset_return = dec.cur; 959 *offset_return = dec.cur;
953 1017
954void shrink (CBOR *self, int enable = 1) 1018void shrink (CBOR *self, int enable = 1)
955 ALIAS: 1019 ALIAS:
956 shrink = F_SHRINK 1020 shrink = F_SHRINK
957 allow_unknown = F_ALLOW_UNKNOWN 1021 allow_unknown = F_ALLOW_UNKNOWN
1022 allow_sharing = F_ALLOW_SHARING
1023 dedup_keys = F_DEDUP_KEYS
1024 dedup_strings = F_DEDUP_STRINGS
958 PPCODE: 1025 PPCODE:
959{ 1026{
960 if (enable) 1027 if (enable)
961 self->flags |= ix; 1028 self->flags |= ix;
962 else 1029 else
967 1034
968void get_shrink (CBOR *self) 1035void get_shrink (CBOR *self)
969 ALIAS: 1036 ALIAS:
970 get_shrink = F_SHRINK 1037 get_shrink = F_SHRINK
971 get_allow_unknown = F_ALLOW_UNKNOWN 1038 get_allow_unknown = F_ALLOW_UNKNOWN
1039 get_allow_sharing = F_ALLOW_SHARING
1040 get_dedup_keys = F_DEDUP_KEYS
1041 get_dedup_strings = F_DEDUP_STRINGS
972 PPCODE: 1042 PPCODE:
973 XPUSHs (boolSV (self->flags & ix)); 1043 XPUSHs (boolSV (self->flags & ix));
974 1044
975void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1045void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
976 PPCODE: 1046 PPCODE:

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines