ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.5 by root, Sat Oct 26 21:14:20 2013 UTC vs.
Revision 1.10 by root, Sun Oct 27 20:40:25 2013 UTC

8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#include "ecb.h" 12#include "ecb.h"
13
14// known tags
15enum cbor_tag
16{
17 // inofficial extensions (pending iana registration)
18 CBOR_TAG_PERL_OBJECT = 256,
19 CBOR_TAG_GENERIC_OBJECT = 257,
20
21 // rfc7049
22 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
23 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
24 CBOR_TAG_POS_BIGNUM = 2, // byte string
25 CBOR_TAG_NEG_BIGNUM = 3, // byte string
26 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
27 CBOR_TAG_BIGFLOAT = 5, // array
28
29 CBOR_TAG_CONV_B64U = 21, // base64url, any
30 CBOR_TAG_CONV_B64 = 22, // base64, any
31 CBOR_TAG_CONV_HEX = 23, // base16, any
32 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
33
34 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
35 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
36 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
37 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
38 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
39
40 CBOR_TAG_MAGIC = 55799 // self-describe cbor
41};
13 42
14#define F_SHRINK 0x00000200UL 43#define F_SHRINK 0x00000200UL
15#define F_ALLOW_UNKNOWN 0x00002000UL 44#define F_ALLOW_UNKNOWN 0x00002000UL
16 45
17#define INIT_SIZE 32 // initial scalar size to be allocated 46#define INIT_SIZE 32 // initial scalar size to be allocated
31#else 60#else
32# define CBOR_SLOW 0 61# define CBOR_SLOW 0
33# define CBOR_STASH cbor_stash 62# define CBOR_STASH cbor_stash
34#endif 63#endif
35 64
36static HV *cbor_stash, *cbor_boolean_stash; // CBOR::XS:: 65static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
37static SV *cbor_true, *cbor_false; 66static SV *types_true, *types_false, *types_error;
38 67
39typedef struct { 68typedef struct {
40 U32 flags; 69 U32 flags;
41 U32 max_depth; 70 U32 max_depth;
42 STRLEN max_size; 71 STRLEN max_size;
43
44 SV *cb_object;
45 HV *cb_sk_object;
46} CBOR; 72} CBOR;
47 73
48ecb_inline void 74ecb_inline void
49cbor_init (CBOR *cbor) 75cbor_init (CBOR *cbor)
50{ 76{
233 SvGETMAGIC (sv); 259 SvGETMAGIC (sv);
234 svt = SvTYPE (sv); 260 svt = SvTYPE (sv);
235 261
236 if (ecb_expect_false (SvOBJECT (sv))) 262 if (ecb_expect_false (SvOBJECT (sv)))
237 { 263 {
238 HV *stash = !CBOR_SLOW || cbor_boolean_stash 264 HV *boolean_stash = !CBOR_SLOW || types_boolean_stash
239 ? cbor_boolean_stash 265 ? types_boolean_stash
266 : gv_stashpv ("Types::Serialiser::Boolean", 1);
267 HV *error_stash = !CBOR_SLOW || types_error_stash
268 ? types_error_stash
269 : gv_stashpv ("Types::Serialiser::Error", 1);
270 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
271 ? cbor_tagged_stash
240 : gv_stashpv ("CBOR::XS::Boolean", 1); 272 : gv_stashpv ("CBOR::XS::Tagged" , 1);
241 273
242 if (SvSTASH (sv) == stash) 274 if (SvSTASH (sv) == boolean_stash)
243 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); 275 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
276 else if (SvSTASH (sv) == error_stash)
277 encode_ch (enc, 0xe0 | 23);
278 else if (SvSTASH (sv) == tagged_stash)
279 {
280 if (svt != SVt_PVAV)
281 croak ("encountered CBOR::XS::Tagged object that isn't an array");
282
283 encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
284 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
285 }
244 else 286 else
245 { 287 {
246#if 0 //TODO 288 // we re-bless the reference to get overload and other niceties right
247 if (enc->cbor.flags & F_CONV_BLESSED) 289 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
290
291 if (to_cbor)
248 { 292 {
249 // we re-bless the reference to get overload and other niceties right
250 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
251
252 if (to_cbor)
253 {
254 dSP; 293 dSP;
255 294
256 ENTER; SAVETMPS; PUSHMARK (SP); 295 ENTER; SAVETMPS; PUSHMARK (SP);
257 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); 296 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv)));
258 297
259 // calling with G_SCALAR ensures that we always get a 1 return value 298 // calling with G_SCALAR ensures that we always get a 1 return value
260 PUTBACK; 299 PUTBACK;
261 call_sv ((SV *)GvCV (to_cbor), G_SCALAR); 300 call_sv ((SV *)GvCV (to_cbor), G_SCALAR);
262 SPAGAIN; 301 SPAGAIN;
263 302
264 // catch this surprisingly common error 303 // catch this surprisingly common error
265 if (SvROK (TOPs) && SvRV (TOPs) == sv) 304 if (SvROK (TOPs) && SvRV (TOPs) == sv)
266 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); 305 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv)));
267 306
268 sv = POPs; 307 sv = POPs;
269 PUTBACK; 308 PUTBACK;
270 309
271 encode_sv (enc, sv); 310 encode_sv (enc, sv);
272 311
273 FREETMPS; LEAVE; 312 FREETMPS; LEAVE;
274 }
275 else if (enc->cbor.flags & F_ALLOW_BLESSED)
276 encode_str (enc, "null", 4, 0);
277 else
278 croak ("encountered object '%s', but neither allow_blessed enabled nor TO_CBOR method available on it",
279 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
280 } 313 }
281 else if (enc->cbor.flags & F_ALLOW_BLESSED)
282 encode_str (enc, "null", 4, 0);
283 else 314 else
284 croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled", 315 croak ("encountered object '%s', but no TO_CBOR method available on it",
285 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 316 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
286#endif
287 } 317 }
288 } 318 }
289 else if (svt == SVt_PVHV) 319 else if (svt == SVt_PVHV)
290 encode_hv (enc, (HV *)sv); 320 encode_hv (enc, (HV *)sv);
291 else if (svt == SVt_PVAV) 321 else if (svt == SVt_PVAV)
560 } 590 }
561 591
562 DEC_DEC_DEPTH; 592 DEC_DEC_DEPTH;
563 return newRV_noinc ((SV *)hv); 593 return newRV_noinc ((SV *)hv);
564 594
565#if 0
566 SV *sv;
567 HV *hv = newHV ();
568
569 DEC_INC_DEPTH;
570 decode_ws (dec);
571
572 for (;;)
573 {
574 // heuristic: assume that
575 // a) decode_str + hv_store_ent are abysmally slow.
576 // b) most hash keys are short, simple ascii text.
577 // => try to "fast-match" such strings to avoid
578 // the overhead of decode_str + hv_store_ent.
579 {
580 SV *value;
581 char *p = dec->cur;
582 char *e = p + 24; // only try up to 24 bytes
583
584 for (;;)
585 {
586 // the >= 0x80 is false on most architectures
587 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\')
588 {
589 // slow path, back up and use decode_str
590 SV *key = decode_str (dec);
591 if (!key)
592 goto fail;
593
594 decode_ws (dec); EXPECT_CH (':');
595
596 decode_ws (dec);
597 value = decode_sv (dec);
598 if (!value)
599 {
600 SvREFCNT_dec (key);
601 goto fail;
602 }
603
604 hv_store_ent (hv, key, value, 0);
605 SvREFCNT_dec (key);
606
607 break;
608 }
609 else if (*p == '"')
610 {
611 // fast path, got a simple key
612 char *key = dec->cur;
613 int len = p - key;
614 dec->cur = p + 1;
615
616 decode_ws (dec); EXPECT_CH (':');
617
618 decode_ws (dec);
619 value = decode_sv (dec);
620 if (!value)
621 goto fail;
622
623 hv_store (hv, key, len, value, 0);
624
625 break;
626 }
627
628 ++p;
629 }
630 }
631
632 decode_ws (dec);
633
634 if (*dec->cur == '}')
635 {
636 ++dec->cur;
637 break;
638 }
639
640 if (*dec->cur != ',')
641 ERR (", or } expected while parsing object/hash");
642
643 ++dec->cur;
644
645 decode_ws (dec);
646
647 if (*dec->cur == '}' && dec->cbor.flags & F_RELAXED)
648 {
649 ++dec->cur;
650 break;
651 }
652 }
653
654 DEC_DEC_DEPTH;
655 sv = newRV_noinc ((SV *)hv);
656
657 // check filter callbacks
658 if (dec->cbor.flags & F_HOOK)
659 {
660 if (dec->cbor.cb_sk_object && HvKEYS (hv) == 1)
661 {
662 HE *cb, *he;
663
664 hv_iterinit (hv);
665 he = hv_iternext (hv);
666 hv_iterinit (hv);
667
668 // the next line creates a mortal sv each time its called.
669 // might want to optimise this for common cases.
670 cb = hv_fetch_ent (dec->cbor.cb_sk_object, hv_iterkeysv (he), 0, 0);
671
672 if (cb)
673 {
674 dSP;
675 int count;
676
677 ENTER; SAVETMPS; PUSHMARK (SP);
678 XPUSHs (HeVAL (he));
679 sv_2mortal (sv);
680
681 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
682
683 if (count == 1)
684 {
685 sv = newSVsv (POPs);
686 FREETMPS; LEAVE;
687 return sv;
688 }
689
690 SvREFCNT_inc (sv);
691 FREETMPS; LEAVE;
692 }
693 }
694
695 if (dec->cbor.cb_object)
696 {
697 dSP;
698 int count;
699
700 ENTER; SAVETMPS; PUSHMARK (SP);
701 XPUSHs (sv_2mortal (sv));
702
703 PUTBACK; count = call_sv (dec->cbor.cb_object, G_ARRAY); SPAGAIN;
704
705 if (count == 1)
706 {
707 sv = newSVsv (POPs);
708 FREETMPS; LEAVE;
709 return sv;
710 }
711
712 SvREFCNT_inc (sv);
713 FREETMPS; LEAVE;
714 }
715 }
716
717 return sv;
718#endif
719
720fail: 595fail:
721 SvREFCNT_dec (hv); 596 SvREFCNT_dec (hv);
722 DEC_DEC_DEPTH; 597 DEC_DEC_DEPTH;
723 return &PL_sv_undef; 598 return &PL_sv_undef;
724} 599}
725 600
726static SV * 601static SV *
727decode_str (dec_t *dec, int utf8) 602decode_str (dec_t *dec, int utf8)
728{ 603{
729 SV *sv; 604 SV *sv = 0;
730 605
731 if ((*dec->cur & 31) == 31) 606 if ((*dec->cur & 31) == 31)
732 { 607 {
733 ++dec->cur; 608 ++dec->cur;
734 609
743 { 618 {
744 ++dec->cur; 619 ++dec->cur;
745 break; 620 break;
746 } 621 }
747 622
748 SV *sv2 = decode_sv (dec);
749 sv_catsv (sv, sv2); 623 sv_catsv (sv, decode_sv (dec));
750 } 624 }
751 } 625 }
752 else 626 else
753 { 627 {
754 STRLEN len = decode_uint (dec); 628 STRLEN len = decode_uint (dec);
762 SvUTF8_on (sv); 636 SvUTF8_on (sv);
763 637
764 return sv; 638 return sv;
765 639
766fail: 640fail:
641 SvREFCNT_dec (sv);
767 return &PL_sv_undef; 642 return &PL_sv_undef;
768} 643}
769 644
770static SV * 645static SV *
771decode_tagged (dec_t *dec) 646decode_tagged (dec_t *dec)
772{ 647{
773 UV tag = decode_uint (dec); 648 UV tag = decode_uint (dec);
774 SV *sv = decode_sv (dec); 649 SV *sv = decode_sv (dec);
775 650
776 if (tag == 55799) // 2.4.5 Self-Describe CBOR 651 if (tag == CBOR_TAG_MAGIC)
777 return sv; 652 return sv;
653
654 if (tag == CBOR_TAG_PERL_OBJECT)
655 {
656 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
657 ERR ("corrupted CBOR data (non-array perl object)");
658
659 // TODO
660 }
778 661
779 AV *av = newAV (); 662 AV *av = newAV ();
780 av_push (av, newSVuv (tag)); 663 av_push (av, newSVuv (tag));
781 av_push (av, sv); 664 av_push (av, sv);
782 return newRV_noinc ((SV *)av); 665
666 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
667 ? cbor_tagged_stash
668 : gv_stashpv ("CBOR::XS::Tagged" , 1);
669
670 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
671
672fail:
673 SvREFCNT_dec (sv);
674 return &PL_sv_undef;
783} 675}
784 676
785static SV * 677static SV *
786decode_sv (dec_t *dec) 678decode_sv (dec_t *dec)
787{ 679{
788 WANT (1); 680 WANT (1);
789 681
790 switch (*dec->cur >> 5) 682 switch (*dec->cur >> 5)
791 { 683 {
792 case 0: // unsigned int 684 case 0: // unsigned int
793 //TODO: 64 bit values on 3 2bit perls
794 return newSVuv (decode_uint (dec)); 685 return newSVuv (decode_uint (dec));
795 case 1: // negative int 686 case 1: // negative int
796 return newSViv (-1 - (IV)decode_uint (dec)); 687 return newSViv (-1 - (IV)decode_uint (dec));
797 case 2: // octet string 688 case 2: // octet string
798 return decode_str (dec, 0); 689 return decode_str (dec, 0);
807 case 7: // misc 698 case 7: // misc
808 switch (*dec->cur++ & 31) 699 switch (*dec->cur++ & 31)
809 { 700 {
810 case 20: 701 case 20:
811#if CBOR_SLOW 702#if CBOR_SLOW
812 cbor_false = get_bool ("CBOR::XS::false"); 703 types_false = get_bool ("Types::Serialiser::false");
813#endif 704#endif
814 return newSVsv (cbor_false); 705 return newSVsv (types_false);
815 case 21: 706 case 21:
816#if CBOR_SLOW 707#if CBOR_SLOW
817 cbor_true = get_bool ("CBOR::XS::true"); 708 types_true = get_bool ("Types::Serialiser::true");
818#endif 709#endif
819 return newSVsv (cbor_true); 710 return newSVsv (types_true);
820 case 22: 711 case 22:
821 return newSVsv (&PL_sv_undef); 712 return newSVsv (&PL_sv_undef);
713 case 23:
714#if CBOR_SLOW
715 types_error = get_bool ("Types::Serialiser::error");
716#endif
717 return newSVsv (types_error);
822 718
823 case 25: 719 case 25:
824 { 720 {
825 WANT (2); 721 WANT (2);
826 722
862 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 758 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
863 } 759 }
864 760
865 break; 761 break;
866 } 762 }
867#if 0
868 switch (*dec->cur)
869 {
870 //case '"': ++dec->cur; return decode_str (dec);
871 case '[': ++dec->cur; return decode_av (dec);
872 case '{': ++dec->cur; return decode_hv (dec);
873
874 case '-':
875 case '0': case '1': case '2': case '3': case '4':
876 case '5': case '6': case '7': case '8': case '9':
877 //TODO return decode_num (dec);
878
879 case 't':
880 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
881 {
882 dec->cur += 4;
883#if CBOR_SLOW
884 cbor_true = get_bool ("CBOR::XS::true");
885#endif
886 return newSVsv (cbor_true);
887 }
888 else
889 ERR ("'true' expected");
890
891 break;
892
893 case 'f':
894 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
895 {
896 dec->cur += 5;
897#if CBOR_SLOW
898 cbor_false = get_bool ("CBOR::XS::false");
899#endif
900 return newSVsv (cbor_false);
901 }
902 else
903 ERR ("'false' expected");
904
905 break;
906
907 case 'n':
908 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
909 {
910 dec->cur += 4;
911 return newSVsv (&PL_sv_undef);
912 }
913 else
914 ERR ("'null' expected");
915
916 break;
917
918 default:
919 ERR ("malformed CBOR string, neither array, object, number, string or atom");
920 break;
921 }
922#endif
923 763
924fail: 764fail:
925 return &PL_sv_undef; 765 return &PL_sv_undef;
926} 766}
927 767
970 dec.cur = (U8 *)SvPVX (string); 810 dec.cur = (U8 *)SvPVX (string);
971 dec.end = (U8 *)SvEND (string); 811 dec.end = (U8 *)SvEND (string);
972 dec.err = 0; 812 dec.err = 0;
973 dec.depth = 0; 813 dec.depth = 0;
974 814
975 if (dec.cbor.cb_object || dec.cbor.cb_sk_object)
976 ;//TODO dec.cbor.flags |= F_HOOK;
977
978 sv = decode_sv (&dec); 815 sv = decode_sv (&dec);
979 816
980 if (offset_return) 817 if (offset_return)
981 *offset_return = dec.cur; 818 *offset_return = dec.cur;
982 819
1001MODULE = CBOR::XS PACKAGE = CBOR::XS 838MODULE = CBOR::XS PACKAGE = CBOR::XS
1002 839
1003BOOT: 840BOOT:
1004{ 841{
1005 cbor_stash = gv_stashpv ("CBOR::XS" , 1); 842 cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1006 cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1); 843 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1007 844
1008 cbor_true = get_bool ("CBOR::XS::true"); 845 types_boolean_stash = gv_stashpv ("Types::Serialiser::Boolean", 1);
1009 cbor_false = get_bool ("CBOR::XS::false"); 846 types_error_stash = gv_stashpv ("Types::Serialiser::Error" , 1);
847
848 types_true = get_bool ("Types::Serialiser::true" );
849 types_false = get_bool ("Types::Serialiser::false");
850 types_error = get_bool ("Types::Serialiser::error");
1010} 851}
1011 852
1012PROTOTYPES: DISABLE 853PROTOTYPES: DISABLE
1013 854
1014void CLONE (...) 855void CLONE (...)
1015 CODE: 856 CODE:
1016 cbor_stash = 0; 857 cbor_stash = 0;
858 cbor_tagged_stash = 0;
859 types_error_stash = 0;
1017 cbor_boolean_stash = 0; 860 types_boolean_stash = 0;
1018 861
1019void new (char *klass) 862void new (char *klass)
1020 PPCODE: 863 PPCODE:
1021{ 864{
1022 SV *pv = NEWSV (0, sizeof (CBOR)); 865 SV *pv = NEWSV (0, sizeof (CBOR));
1125 EXTEND (SP, 2); 968 EXTEND (SP, 2);
1126 PUSHs (sv); 969 PUSHs (sv);
1127 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 970 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1128} 971}
1129 972
973#if 0
974
1130void DESTROY (CBOR *self) 975void DESTROY (CBOR *self)
1131 CODE: 976 CODE:
1132 SvREFCNT_dec (self->cb_sk_object); 977 SvREFCNT_dec (self->cb_sk_object);
1133 SvREFCNT_dec (self->cb_object); 978 SvREFCNT_dec (self->cb_object);
1134 979
980#endif
981
1135PROTOTYPES: ENABLE 982PROTOTYPES: ENABLE
1136 983
1137void encode_cbor (SV *scalar) 984void encode_cbor (SV *scalar)
1138 PPCODE: 985 PPCODE:
1139{ 986{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines