ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.5 by root, Sat Oct 26 21:14:20 2013 UTC vs.
Revision 1.8 by root, Sun Oct 27 10:12:01 2013 UTC

8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#include "ecb.h" 12#include "ecb.h"
13
14// known tags, rfc7049
15enum cbor_tag
16{
17 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
18 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
19 CBOR_TAG_POS_BIGNUM = 2, // byte string
20 CBOR_TAG_NEG_BIGNUM = 3, // byte string
21 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
22 CBOR_TAG_BIGFLOAT = 5, // array
23
24 CBOR_TAG_CONV_B64U = 21, // base64url, any
25 CBOR_TAG_CONV_B64 = 22, // base64, any
26 CBOR_TAG_CONV_HEX = 23, // base16, any
27 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
28
29 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
30 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
31 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
32 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
33 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
34
35 CBOR_TAG_MAGIC = 55799
36};
13 37
14#define F_SHRINK 0x00000200UL 38#define F_SHRINK 0x00000200UL
15#define F_ALLOW_UNKNOWN 0x00002000UL 39#define F_ALLOW_UNKNOWN 0x00002000UL
16 40
17#define INIT_SIZE 32 // initial scalar size to be allocated 41#define INIT_SIZE 32 // initial scalar size to be allocated
31#else 55#else
32# define CBOR_SLOW 0 56# define CBOR_SLOW 0
33# define CBOR_STASH cbor_stash 57# define CBOR_STASH cbor_stash
34#endif 58#endif
35 59
36static HV *cbor_stash, *cbor_boolean_stash; // CBOR::XS:: 60static HV *cbor_stash, *cbor_boolean_stash, *cbor_tagged_stash; // CBOR::XS::
37static SV *cbor_true, *cbor_false; 61static SV *cbor_true, *cbor_false;
38 62
39typedef struct { 63typedef struct {
40 U32 flags; 64 U32 flags;
41 U32 max_depth; 65 U32 max_depth;
42 STRLEN max_size; 66 STRLEN max_size;
43
44 SV *cb_object;
45 HV *cb_sk_object;
46} CBOR; 67} CBOR;
47 68
48ecb_inline void 69ecb_inline void
49cbor_init (CBOR *cbor) 70cbor_init (CBOR *cbor)
50{ 71{
233 SvGETMAGIC (sv); 254 SvGETMAGIC (sv);
234 svt = SvTYPE (sv); 255 svt = SvTYPE (sv);
235 256
236 if (ecb_expect_false (SvOBJECT (sv))) 257 if (ecb_expect_false (SvOBJECT (sv)))
237 { 258 {
238 HV *stash = !CBOR_SLOW || cbor_boolean_stash 259 HV *boolean_stash = !CBOR_SLOW || cbor_boolean_stash
239 ? cbor_boolean_stash 260 ? cbor_boolean_stash
240 : gv_stashpv ("CBOR::XS::Boolean", 1); 261 : gv_stashpv ("CBOR::XS::Boolean", 1);
262 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
263 ? cbor_tagged_stash
264 : gv_stashpv ("CBOR::XS::Tagged" , 1);
241 265
242 if (SvSTASH (sv) == stash) 266 if (SvSTASH (sv) == boolean_stash)
243 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); 267 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
268 else if (SvSTASH (sv) == tagged_stash)
269 {
270 if (svt != SVt_PVAV)
271 croak ("encountered CBOR::XS::Tagged object that isn't an array");
272
273 encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
274 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
275 }
244 else 276 else
245 { 277 {
246#if 0 //TODO 278 // we re-bless the reference to get overload and other niceties right
247 if (enc->cbor.flags & F_CONV_BLESSED) 279 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
280
281 if (to_cbor)
248 { 282 {
249 // we re-bless the reference to get overload and other niceties right
250 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
251
252 if (to_cbor)
253 {
254 dSP; 283 dSP;
255 284
256 ENTER; SAVETMPS; PUSHMARK (SP); 285 ENTER; SAVETMPS; PUSHMARK (SP);
257 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); 286 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv)));
258 287
259 // calling with G_SCALAR ensures that we always get a 1 return value 288 // calling with G_SCALAR ensures that we always get a 1 return value
260 PUTBACK; 289 PUTBACK;
261 call_sv ((SV *)GvCV (to_cbor), G_SCALAR); 290 call_sv ((SV *)GvCV (to_cbor), G_SCALAR);
262 SPAGAIN; 291 SPAGAIN;
263 292
264 // catch this surprisingly common error 293 // catch this surprisingly common error
265 if (SvROK (TOPs) && SvRV (TOPs) == sv) 294 if (SvROK (TOPs) && SvRV (TOPs) == sv)
266 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); 295 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv)));
267 296
268 sv = POPs; 297 sv = POPs;
269 PUTBACK; 298 PUTBACK;
270 299
271 encode_sv (enc, sv); 300 encode_sv (enc, sv);
272 301
273 FREETMPS; LEAVE; 302 FREETMPS; LEAVE;
274 }
275 else if (enc->cbor.flags & F_ALLOW_BLESSED)
276 encode_str (enc, "null", 4, 0);
277 else
278 croak ("encountered object '%s', but neither allow_blessed enabled nor TO_CBOR method available on it",
279 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
280 } 303 }
281 else if (enc->cbor.flags & F_ALLOW_BLESSED)
282 encode_str (enc, "null", 4, 0);
283 else 304 else
284 croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled", 305 croak ("encountered object '%s', but no TO_CBOR method available on it",
285 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 306 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
286#endif
287 } 307 }
288 } 308 }
289 else if (svt == SVt_PVHV) 309 else if (svt == SVt_PVHV)
290 encode_hv (enc, (HV *)sv); 310 encode_hv (enc, (HV *)sv);
291 else if (svt == SVt_PVAV) 311 else if (svt == SVt_PVAV)
560 } 580 }
561 581
562 DEC_DEC_DEPTH; 582 DEC_DEC_DEPTH;
563 return newRV_noinc ((SV *)hv); 583 return newRV_noinc ((SV *)hv);
564 584
565#if 0
566 SV *sv;
567 HV *hv = newHV ();
568
569 DEC_INC_DEPTH;
570 decode_ws (dec);
571
572 for (;;)
573 {
574 // heuristic: assume that
575 // a) decode_str + hv_store_ent are abysmally slow.
576 // b) most hash keys are short, simple ascii text.
577 // => try to "fast-match" such strings to avoid
578 // the overhead of decode_str + hv_store_ent.
579 {
580 SV *value;
581 char *p = dec->cur;
582 char *e = p + 24; // only try up to 24 bytes
583
584 for (;;)
585 {
586 // the >= 0x80 is false on most architectures
587 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\')
588 {
589 // slow path, back up and use decode_str
590 SV *key = decode_str (dec);
591 if (!key)
592 goto fail;
593
594 decode_ws (dec); EXPECT_CH (':');
595
596 decode_ws (dec);
597 value = decode_sv (dec);
598 if (!value)
599 {
600 SvREFCNT_dec (key);
601 goto fail;
602 }
603
604 hv_store_ent (hv, key, value, 0);
605 SvREFCNT_dec (key);
606
607 break;
608 }
609 else if (*p == '"')
610 {
611 // fast path, got a simple key
612 char *key = dec->cur;
613 int len = p - key;
614 dec->cur = p + 1;
615
616 decode_ws (dec); EXPECT_CH (':');
617
618 decode_ws (dec);
619 value = decode_sv (dec);
620 if (!value)
621 goto fail;
622
623 hv_store (hv, key, len, value, 0);
624
625 break;
626 }
627
628 ++p;
629 }
630 }
631
632 decode_ws (dec);
633
634 if (*dec->cur == '}')
635 {
636 ++dec->cur;
637 break;
638 }
639
640 if (*dec->cur != ',')
641 ERR (", or } expected while parsing object/hash");
642
643 ++dec->cur;
644
645 decode_ws (dec);
646
647 if (*dec->cur == '}' && dec->cbor.flags & F_RELAXED)
648 {
649 ++dec->cur;
650 break;
651 }
652 }
653
654 DEC_DEC_DEPTH;
655 sv = newRV_noinc ((SV *)hv);
656
657 // check filter callbacks
658 if (dec->cbor.flags & F_HOOK)
659 {
660 if (dec->cbor.cb_sk_object && HvKEYS (hv) == 1)
661 {
662 HE *cb, *he;
663
664 hv_iterinit (hv);
665 he = hv_iternext (hv);
666 hv_iterinit (hv);
667
668 // the next line creates a mortal sv each time its called.
669 // might want to optimise this for common cases.
670 cb = hv_fetch_ent (dec->cbor.cb_sk_object, hv_iterkeysv (he), 0, 0);
671
672 if (cb)
673 {
674 dSP;
675 int count;
676
677 ENTER; SAVETMPS; PUSHMARK (SP);
678 XPUSHs (HeVAL (he));
679 sv_2mortal (sv);
680
681 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
682
683 if (count == 1)
684 {
685 sv = newSVsv (POPs);
686 FREETMPS; LEAVE;
687 return sv;
688 }
689
690 SvREFCNT_inc (sv);
691 FREETMPS; LEAVE;
692 }
693 }
694
695 if (dec->cbor.cb_object)
696 {
697 dSP;
698 int count;
699
700 ENTER; SAVETMPS; PUSHMARK (SP);
701 XPUSHs (sv_2mortal (sv));
702
703 PUTBACK; count = call_sv (dec->cbor.cb_object, G_ARRAY); SPAGAIN;
704
705 if (count == 1)
706 {
707 sv = newSVsv (POPs);
708 FREETMPS; LEAVE;
709 return sv;
710 }
711
712 SvREFCNT_inc (sv);
713 FREETMPS; LEAVE;
714 }
715 }
716
717 return sv;
718#endif
719
720fail: 585fail:
721 SvREFCNT_dec (hv); 586 SvREFCNT_dec (hv);
722 DEC_DEC_DEPTH; 587 DEC_DEC_DEPTH;
723 return &PL_sv_undef; 588 return &PL_sv_undef;
724} 589}
725 590
726static SV * 591static SV *
727decode_str (dec_t *dec, int utf8) 592decode_str (dec_t *dec, int utf8)
728{ 593{
729 SV *sv; 594 SV *sv = 0;
730 595
731 if ((*dec->cur & 31) == 31) 596 if ((*dec->cur & 31) == 31)
732 { 597 {
733 ++dec->cur; 598 ++dec->cur;
734 599
743 { 608 {
744 ++dec->cur; 609 ++dec->cur;
745 break; 610 break;
746 } 611 }
747 612
748 SV *sv2 = decode_sv (dec);
749 sv_catsv (sv, sv2); 613 sv_catsv (sv, decode_sv (dec));
750 } 614 }
751 } 615 }
752 else 616 else
753 { 617 {
754 STRLEN len = decode_uint (dec); 618 STRLEN len = decode_uint (dec);
762 SvUTF8_on (sv); 626 SvUTF8_on (sv);
763 627
764 return sv; 628 return sv;
765 629
766fail: 630fail:
631 SvREFCNT_dec (sv);
767 return &PL_sv_undef; 632 return &PL_sv_undef;
768} 633}
769 634
770static SV * 635static SV *
771decode_tagged (dec_t *dec) 636decode_tagged (dec_t *dec)
772{ 637{
773 UV tag = decode_uint (dec); 638 UV tag = decode_uint (dec);
774 SV *sv = decode_sv (dec); 639 SV *sv = decode_sv (dec);
775 640
776 if (tag == 55799) // 2.4.5 Self-Describe CBOR 641 if (tag == CBOR_TAG_MAGIC) // 2.4.5 Self-Describe CBOR
777 return sv; 642 return sv;
778 643
779 AV *av = newAV (); 644 AV *av = newAV ();
780 av_push (av, newSVuv (tag)); 645 av_push (av, newSVuv (tag));
781 av_push (av, sv); 646 av_push (av, sv);
782 return newRV_noinc ((SV *)av); 647
648 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
649 ? cbor_tagged_stash
650 : gv_stashpv ("CBOR::XS::Tagged" , 1);
651
652 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
783} 653}
784 654
785static SV * 655static SV *
786decode_sv (dec_t *dec) 656decode_sv (dec_t *dec)
787{ 657{
788 WANT (1); 658 WANT (1);
789 659
790 switch (*dec->cur >> 5) 660 switch (*dec->cur >> 5)
791 { 661 {
792 case 0: // unsigned int 662 case 0: // unsigned int
793 //TODO: 64 bit values on 3 2bit perls
794 return newSVuv (decode_uint (dec)); 663 return newSVuv (decode_uint (dec));
795 case 1: // negative int 664 case 1: // negative int
796 return newSViv (-1 - (IV)decode_uint (dec)); 665 return newSViv (-1 - (IV)decode_uint (dec));
797 case 2: // octet string 666 case 2: // octet string
798 return decode_str (dec, 0); 667 return decode_str (dec, 0);
862 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 731 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
863 } 732 }
864 733
865 break; 734 break;
866 } 735 }
867#if 0
868 switch (*dec->cur)
869 {
870 //case '"': ++dec->cur; return decode_str (dec);
871 case '[': ++dec->cur; return decode_av (dec);
872 case '{': ++dec->cur; return decode_hv (dec);
873
874 case '-':
875 case '0': case '1': case '2': case '3': case '4':
876 case '5': case '6': case '7': case '8': case '9':
877 //TODO return decode_num (dec);
878
879 case 't':
880 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
881 {
882 dec->cur += 4;
883#if CBOR_SLOW
884 cbor_true = get_bool ("CBOR::XS::true");
885#endif
886 return newSVsv (cbor_true);
887 }
888 else
889 ERR ("'true' expected");
890
891 break;
892
893 case 'f':
894 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
895 {
896 dec->cur += 5;
897#if CBOR_SLOW
898 cbor_false = get_bool ("CBOR::XS::false");
899#endif
900 return newSVsv (cbor_false);
901 }
902 else
903 ERR ("'false' expected");
904
905 break;
906
907 case 'n':
908 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
909 {
910 dec->cur += 4;
911 return newSVsv (&PL_sv_undef);
912 }
913 else
914 ERR ("'null' expected");
915
916 break;
917
918 default:
919 ERR ("malformed CBOR string, neither array, object, number, string or atom");
920 break;
921 }
922#endif
923 736
924fail: 737fail:
925 return &PL_sv_undef; 738 return &PL_sv_undef;
926} 739}
927 740
970 dec.cur = (U8 *)SvPVX (string); 783 dec.cur = (U8 *)SvPVX (string);
971 dec.end = (U8 *)SvEND (string); 784 dec.end = (U8 *)SvEND (string);
972 dec.err = 0; 785 dec.err = 0;
973 dec.depth = 0; 786 dec.depth = 0;
974 787
975 if (dec.cbor.cb_object || dec.cbor.cb_sk_object)
976 ;//TODO dec.cbor.flags |= F_HOOK;
977
978 sv = decode_sv (&dec); 788 sv = decode_sv (&dec);
979 789
980 if (offset_return) 790 if (offset_return)
981 *offset_return = dec.cur; 791 *offset_return = dec.cur;
982 792
1002 812
1003BOOT: 813BOOT:
1004{ 814{
1005 cbor_stash = gv_stashpv ("CBOR::XS" , 1); 815 cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1006 cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1); 816 cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1);
817 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1007 818
1008 cbor_true = get_bool ("CBOR::XS::true"); 819 cbor_true = get_bool ("CBOR::XS::true");
1009 cbor_false = get_bool ("CBOR::XS::false"); 820 cbor_false = get_bool ("CBOR::XS::false");
1010} 821}
1011 822
1013 824
1014void CLONE (...) 825void CLONE (...)
1015 CODE: 826 CODE:
1016 cbor_stash = 0; 827 cbor_stash = 0;
1017 cbor_boolean_stash = 0; 828 cbor_boolean_stash = 0;
829 cbor_tagged_stash = 0;
1018 830
1019void new (char *klass) 831void new (char *klass)
1020 PPCODE: 832 PPCODE:
1021{ 833{
1022 SV *pv = NEWSV (0, sizeof (CBOR)); 834 SV *pv = NEWSV (0, sizeof (CBOR));
1125 EXTEND (SP, 2); 937 EXTEND (SP, 2);
1126 PUSHs (sv); 938 PUSHs (sv);
1127 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 939 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1128} 940}
1129 941
942#if 0
943
1130void DESTROY (CBOR *self) 944void DESTROY (CBOR *self)
1131 CODE: 945 CODE:
1132 SvREFCNT_dec (self->cb_sk_object); 946 SvREFCNT_dec (self->cb_sk_object);
1133 SvREFCNT_dec (self->cb_object); 947 SvREFCNT_dec (self->cb_object);
1134 948
949#endif
950
1135PROTOTYPES: ENABLE 951PROTOTYPES: ENABLE
1136 952
1137void encode_cbor (SV *scalar) 953void encode_cbor (SV *scalar)
1138 PPCODE: 954 PPCODE:
1139{ 955{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines