ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.5 by root, Sat Oct 26 21:14:20 2013 UTC vs.
Revision 1.9 by root, Sun Oct 27 10:17:12 2013 UTC

8#include <stdio.h> 8#include <stdio.h>
9#include <limits.h> 9#include <limits.h>
10#include <float.h> 10#include <float.h>
11 11
12#include "ecb.h" 12#include "ecb.h"
13
14// known tags
15enum cbor_tag
16{
17 // inofficial extensions (pending iana registration)
18 CBOR_TAG_PERL_OBJECT = 256,
19 CBOR_TAG_GENERIC_OBJECT = 257,
20
21 // rfc7049
22 CBOR_TAG_DATETIME = 0, // rfc4287, utf-8
23 CBOR_TAG_TIMESTAMP = 1, // unix timestamp, any
24 CBOR_TAG_POS_BIGNUM = 2, // byte string
25 CBOR_TAG_NEG_BIGNUM = 3, // byte string
26 CBOR_TAG_DECIMAL = 4, // decimal fraction, array
27 CBOR_TAG_BIGFLOAT = 5, // array
28
29 CBOR_TAG_CONV_B64U = 21, // base64url, any
30 CBOR_TAG_CONV_B64 = 22, // base64, any
31 CBOR_TAG_CONV_HEX = 23, // base16, any
32 CBOR_TAG_CBOR = 24, // embedded cbor, byte string
33
34 CBOR_TAG_URI = 32, // URI rfc3986, utf-8
35 CBOR_TAG_B64U = 33, // base64url rfc4648, utf-8
36 CBOR_TAG_B64 = 34, // base6 rfc46484, utf-8
37 CBOR_TAG_REGEX = 35, // regex pcre/ecma262, utf-8
38 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
39
40 CBOR_TAG_MAGIC = 55799 // self-describe cbor
41};
13 42
14#define F_SHRINK 0x00000200UL 43#define F_SHRINK 0x00000200UL
15#define F_ALLOW_UNKNOWN 0x00002000UL 44#define F_ALLOW_UNKNOWN 0x00002000UL
16 45
17#define INIT_SIZE 32 // initial scalar size to be allocated 46#define INIT_SIZE 32 // initial scalar size to be allocated
31#else 60#else
32# define CBOR_SLOW 0 61# define CBOR_SLOW 0
33# define CBOR_STASH cbor_stash 62# define CBOR_STASH cbor_stash
34#endif 63#endif
35 64
36static HV *cbor_stash, *cbor_boolean_stash; // CBOR::XS:: 65static HV *cbor_stash, *cbor_boolean_stash, *cbor_tagged_stash; // CBOR::XS::
37static SV *cbor_true, *cbor_false; 66static SV *cbor_true, *cbor_false;
38 67
39typedef struct { 68typedef struct {
40 U32 flags; 69 U32 flags;
41 U32 max_depth; 70 U32 max_depth;
42 STRLEN max_size; 71 STRLEN max_size;
43
44 SV *cb_object;
45 HV *cb_sk_object;
46} CBOR; 72} CBOR;
47 73
48ecb_inline void 74ecb_inline void
49cbor_init (CBOR *cbor) 75cbor_init (CBOR *cbor)
50{ 76{
233 SvGETMAGIC (sv); 259 SvGETMAGIC (sv);
234 svt = SvTYPE (sv); 260 svt = SvTYPE (sv);
235 261
236 if (ecb_expect_false (SvOBJECT (sv))) 262 if (ecb_expect_false (SvOBJECT (sv)))
237 { 263 {
238 HV *stash = !CBOR_SLOW || cbor_boolean_stash 264 HV *boolean_stash = !CBOR_SLOW || cbor_boolean_stash
239 ? cbor_boolean_stash 265 ? cbor_boolean_stash
240 : gv_stashpv ("CBOR::XS::Boolean", 1); 266 : gv_stashpv ("CBOR::XS::Boolean", 1);
267 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
268 ? cbor_tagged_stash
269 : gv_stashpv ("CBOR::XS::Tagged" , 1);
241 270
242 if (SvSTASH (sv) == stash) 271 if (SvSTASH (sv) == boolean_stash)
243 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20); 272 encode_ch (enc, SvIV (sv) ? 0xe0 | 21 : 0xe0 | 20);
273 else if (SvSTASH (sv) == tagged_stash)
274 {
275 if (svt != SVt_PVAV)
276 croak ("encountered CBOR::XS::Tagged object that isn't an array");
277
278 encode_uint (enc, 0xc0, SvUV (*av_fetch ((AV *)sv, 0, 1)));
279 encode_sv (enc, *av_fetch ((AV *)sv, 1, 1));
280 }
244 else 281 else
245 { 282 {
246#if 0 //TODO 283 // we re-bless the reference to get overload and other niceties right
247 if (enc->cbor.flags & F_CONV_BLESSED) 284 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
285
286 if (to_cbor)
248 { 287 {
249 // we re-bless the reference to get overload and other niceties right
250 GV *to_cbor = gv_fetchmethod_autoload (SvSTASH (sv), "TO_CBOR", 0);
251
252 if (to_cbor)
253 {
254 dSP; 288 dSP;
255 289
256 ENTER; SAVETMPS; PUSHMARK (SP); 290 ENTER; SAVETMPS; PUSHMARK (SP);
257 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv))); 291 XPUSHs (sv_bless (sv_2mortal (newRV_inc (sv)), SvSTASH (sv)));
258 292
259 // calling with G_SCALAR ensures that we always get a 1 return value 293 // calling with G_SCALAR ensures that we always get a 1 return value
260 PUTBACK; 294 PUTBACK;
261 call_sv ((SV *)GvCV (to_cbor), G_SCALAR); 295 call_sv ((SV *)GvCV (to_cbor), G_SCALAR);
262 SPAGAIN; 296 SPAGAIN;
263 297
264 // catch this surprisingly common error 298 // catch this surprisingly common error
265 if (SvROK (TOPs) && SvRV (TOPs) == sv) 299 if (SvROK (TOPs) && SvRV (TOPs) == sv)
266 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv))); 300 croak ("%s::TO_CBOR method returned same object as was passed instead of a new one", HvNAME (SvSTASH (sv)));
267 301
268 sv = POPs; 302 sv = POPs;
269 PUTBACK; 303 PUTBACK;
270 304
271 encode_sv (enc, sv); 305 encode_sv (enc, sv);
272 306
273 FREETMPS; LEAVE; 307 FREETMPS; LEAVE;
274 }
275 else if (enc->cbor.flags & F_ALLOW_BLESSED)
276 encode_str (enc, "null", 4, 0);
277 else
278 croak ("encountered object '%s', but neither allow_blessed enabled nor TO_CBOR method available on it",
279 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
280 } 308 }
281 else if (enc->cbor.flags & F_ALLOW_BLESSED)
282 encode_str (enc, "null", 4, 0);
283 else 309 else
284 croak ("encountered object '%s', but neither allow_blessed nor convert_blessed settings are enabled", 310 croak ("encountered object '%s', but no TO_CBOR method available on it",
285 SvPV_nolen (sv_2mortal (newRV_inc (sv)))); 311 SvPV_nolen (sv_2mortal (newRV_inc (sv))));
286#endif
287 } 312 }
288 } 313 }
289 else if (svt == SVt_PVHV) 314 else if (svt == SVt_PVHV)
290 encode_hv (enc, (HV *)sv); 315 encode_hv (enc, (HV *)sv);
291 else if (svt == SVt_PVAV) 316 else if (svt == SVt_PVAV)
560 } 585 }
561 586
562 DEC_DEC_DEPTH; 587 DEC_DEC_DEPTH;
563 return newRV_noinc ((SV *)hv); 588 return newRV_noinc ((SV *)hv);
564 589
565#if 0
566 SV *sv;
567 HV *hv = newHV ();
568
569 DEC_INC_DEPTH;
570 decode_ws (dec);
571
572 for (;;)
573 {
574 // heuristic: assume that
575 // a) decode_str + hv_store_ent are abysmally slow.
576 // b) most hash keys are short, simple ascii text.
577 // => try to "fast-match" such strings to avoid
578 // the overhead of decode_str + hv_store_ent.
579 {
580 SV *value;
581 char *p = dec->cur;
582 char *e = p + 24; // only try up to 24 bytes
583
584 for (;;)
585 {
586 // the >= 0x80 is false on most architectures
587 if (p == e || *p < 0x20 || *p >= 0x80 || *p == '\\')
588 {
589 // slow path, back up and use decode_str
590 SV *key = decode_str (dec);
591 if (!key)
592 goto fail;
593
594 decode_ws (dec); EXPECT_CH (':');
595
596 decode_ws (dec);
597 value = decode_sv (dec);
598 if (!value)
599 {
600 SvREFCNT_dec (key);
601 goto fail;
602 }
603
604 hv_store_ent (hv, key, value, 0);
605 SvREFCNT_dec (key);
606
607 break;
608 }
609 else if (*p == '"')
610 {
611 // fast path, got a simple key
612 char *key = dec->cur;
613 int len = p - key;
614 dec->cur = p + 1;
615
616 decode_ws (dec); EXPECT_CH (':');
617
618 decode_ws (dec);
619 value = decode_sv (dec);
620 if (!value)
621 goto fail;
622
623 hv_store (hv, key, len, value, 0);
624
625 break;
626 }
627
628 ++p;
629 }
630 }
631
632 decode_ws (dec);
633
634 if (*dec->cur == '}')
635 {
636 ++dec->cur;
637 break;
638 }
639
640 if (*dec->cur != ',')
641 ERR (", or } expected while parsing object/hash");
642
643 ++dec->cur;
644
645 decode_ws (dec);
646
647 if (*dec->cur == '}' && dec->cbor.flags & F_RELAXED)
648 {
649 ++dec->cur;
650 break;
651 }
652 }
653
654 DEC_DEC_DEPTH;
655 sv = newRV_noinc ((SV *)hv);
656
657 // check filter callbacks
658 if (dec->cbor.flags & F_HOOK)
659 {
660 if (dec->cbor.cb_sk_object && HvKEYS (hv) == 1)
661 {
662 HE *cb, *he;
663
664 hv_iterinit (hv);
665 he = hv_iternext (hv);
666 hv_iterinit (hv);
667
668 // the next line creates a mortal sv each time its called.
669 // might want to optimise this for common cases.
670 cb = hv_fetch_ent (dec->cbor.cb_sk_object, hv_iterkeysv (he), 0, 0);
671
672 if (cb)
673 {
674 dSP;
675 int count;
676
677 ENTER; SAVETMPS; PUSHMARK (SP);
678 XPUSHs (HeVAL (he));
679 sv_2mortal (sv);
680
681 PUTBACK; count = call_sv (HeVAL (cb), G_ARRAY); SPAGAIN;
682
683 if (count == 1)
684 {
685 sv = newSVsv (POPs);
686 FREETMPS; LEAVE;
687 return sv;
688 }
689
690 SvREFCNT_inc (sv);
691 FREETMPS; LEAVE;
692 }
693 }
694
695 if (dec->cbor.cb_object)
696 {
697 dSP;
698 int count;
699
700 ENTER; SAVETMPS; PUSHMARK (SP);
701 XPUSHs (sv_2mortal (sv));
702
703 PUTBACK; count = call_sv (dec->cbor.cb_object, G_ARRAY); SPAGAIN;
704
705 if (count == 1)
706 {
707 sv = newSVsv (POPs);
708 FREETMPS; LEAVE;
709 return sv;
710 }
711
712 SvREFCNT_inc (sv);
713 FREETMPS; LEAVE;
714 }
715 }
716
717 return sv;
718#endif
719
720fail: 590fail:
721 SvREFCNT_dec (hv); 591 SvREFCNT_dec (hv);
722 DEC_DEC_DEPTH; 592 DEC_DEC_DEPTH;
723 return &PL_sv_undef; 593 return &PL_sv_undef;
724} 594}
725 595
726static SV * 596static SV *
727decode_str (dec_t *dec, int utf8) 597decode_str (dec_t *dec, int utf8)
728{ 598{
729 SV *sv; 599 SV *sv = 0;
730 600
731 if ((*dec->cur & 31) == 31) 601 if ((*dec->cur & 31) == 31)
732 { 602 {
733 ++dec->cur; 603 ++dec->cur;
734 604
743 { 613 {
744 ++dec->cur; 614 ++dec->cur;
745 break; 615 break;
746 } 616 }
747 617
748 SV *sv2 = decode_sv (dec);
749 sv_catsv (sv, sv2); 618 sv_catsv (sv, decode_sv (dec));
750 } 619 }
751 } 620 }
752 else 621 else
753 { 622 {
754 STRLEN len = decode_uint (dec); 623 STRLEN len = decode_uint (dec);
762 SvUTF8_on (sv); 631 SvUTF8_on (sv);
763 632
764 return sv; 633 return sv;
765 634
766fail: 635fail:
636 SvREFCNT_dec (sv);
767 return &PL_sv_undef; 637 return &PL_sv_undef;
768} 638}
769 639
770static SV * 640static SV *
771decode_tagged (dec_t *dec) 641decode_tagged (dec_t *dec)
772{ 642{
773 UV tag = decode_uint (dec); 643 UV tag = decode_uint (dec);
774 SV *sv = decode_sv (dec); 644 SV *sv = decode_sv (dec);
775 645
776 if (tag == 55799) // 2.4.5 Self-Describe CBOR 646 if (tag == CBOR_TAG_MAGIC)
777 return sv; 647 return sv;
648
649 if (tag == CBOR_TAG_PERL_OBJECT)
650 {
651 if (!SvROK (sv) || SvTYPE (SvRV (sv)) != SVt_PVAV)
652 ERR ("corrupted CBOR data (non-array perl object)");
653
654 // TODO
655 }
778 656
779 AV *av = newAV (); 657 AV *av = newAV ();
780 av_push (av, newSVuv (tag)); 658 av_push (av, newSVuv (tag));
781 av_push (av, sv); 659 av_push (av, sv);
782 return newRV_noinc ((SV *)av); 660
661 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
662 ? cbor_tagged_stash
663 : gv_stashpv ("CBOR::XS::Tagged" , 1);
664
665 return sv_bless (newRV_noinc ((SV *)av), tagged_stash);
666
667fail:
668 SvREFCNT_dec (sv);
669 return &PL_sv_undef;
783} 670}
784 671
785static SV * 672static SV *
786decode_sv (dec_t *dec) 673decode_sv (dec_t *dec)
787{ 674{
788 WANT (1); 675 WANT (1);
789 676
790 switch (*dec->cur >> 5) 677 switch (*dec->cur >> 5)
791 { 678 {
792 case 0: // unsigned int 679 case 0: // unsigned int
793 //TODO: 64 bit values on 3 2bit perls
794 return newSVuv (decode_uint (dec)); 680 return newSVuv (decode_uint (dec));
795 case 1: // negative int 681 case 1: // negative int
796 return newSViv (-1 - (IV)decode_uint (dec)); 682 return newSViv (-1 - (IV)decode_uint (dec));
797 case 2: // octet string 683 case 2: // octet string
798 return decode_str (dec, 0); 684 return decode_str (dec, 0);
862 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)"); 748 ERR ("corrupted CBOR data (reserved/unassigned major 7 value)");
863 } 749 }
864 750
865 break; 751 break;
866 } 752 }
867#if 0
868 switch (*dec->cur)
869 {
870 //case '"': ++dec->cur; return decode_str (dec);
871 case '[': ++dec->cur; return decode_av (dec);
872 case '{': ++dec->cur; return decode_hv (dec);
873
874 case '-':
875 case '0': case '1': case '2': case '3': case '4':
876 case '5': case '6': case '7': case '8': case '9':
877 //TODO return decode_num (dec);
878
879 case 't':
880 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "true", 4))
881 {
882 dec->cur += 4;
883#if CBOR_SLOW
884 cbor_true = get_bool ("CBOR::XS::true");
885#endif
886 return newSVsv (cbor_true);
887 }
888 else
889 ERR ("'true' expected");
890
891 break;
892
893 case 'f':
894 if (dec->end - dec->cur >= 5 && !memcmp (dec->cur, "false", 5))
895 {
896 dec->cur += 5;
897#if CBOR_SLOW
898 cbor_false = get_bool ("CBOR::XS::false");
899#endif
900 return newSVsv (cbor_false);
901 }
902 else
903 ERR ("'false' expected");
904
905 break;
906
907 case 'n':
908 if (dec->end - dec->cur >= 4 && !memcmp (dec->cur, "null", 4))
909 {
910 dec->cur += 4;
911 return newSVsv (&PL_sv_undef);
912 }
913 else
914 ERR ("'null' expected");
915
916 break;
917
918 default:
919 ERR ("malformed CBOR string, neither array, object, number, string or atom");
920 break;
921 }
922#endif
923 753
924fail: 754fail:
925 return &PL_sv_undef; 755 return &PL_sv_undef;
926} 756}
927 757
970 dec.cur = (U8 *)SvPVX (string); 800 dec.cur = (U8 *)SvPVX (string);
971 dec.end = (U8 *)SvEND (string); 801 dec.end = (U8 *)SvEND (string);
972 dec.err = 0; 802 dec.err = 0;
973 dec.depth = 0; 803 dec.depth = 0;
974 804
975 if (dec.cbor.cb_object || dec.cbor.cb_sk_object)
976 ;//TODO dec.cbor.flags |= F_HOOK;
977
978 sv = decode_sv (&dec); 805 sv = decode_sv (&dec);
979 806
980 if (offset_return) 807 if (offset_return)
981 *offset_return = dec.cur; 808 *offset_return = dec.cur;
982 809
1002 829
1003BOOT: 830BOOT:
1004{ 831{
1005 cbor_stash = gv_stashpv ("CBOR::XS" , 1); 832 cbor_stash = gv_stashpv ("CBOR::XS" , 1);
1006 cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1); 833 cbor_boolean_stash = gv_stashpv ("CBOR::XS::Boolean", 1);
834 cbor_tagged_stash = gv_stashpv ("CBOR::XS::Tagged" , 1);
1007 835
1008 cbor_true = get_bool ("CBOR::XS::true"); 836 cbor_true = get_bool ("CBOR::XS::true");
1009 cbor_false = get_bool ("CBOR::XS::false"); 837 cbor_false = get_bool ("CBOR::XS::false");
1010} 838}
1011 839
1013 841
1014void CLONE (...) 842void CLONE (...)
1015 CODE: 843 CODE:
1016 cbor_stash = 0; 844 cbor_stash = 0;
1017 cbor_boolean_stash = 0; 845 cbor_boolean_stash = 0;
846 cbor_tagged_stash = 0;
1018 847
1019void new (char *klass) 848void new (char *klass)
1020 PPCODE: 849 PPCODE:
1021{ 850{
1022 SV *pv = NEWSV (0, sizeof (CBOR)); 851 SV *pv = NEWSV (0, sizeof (CBOR));
1125 EXTEND (SP, 2); 954 EXTEND (SP, 2);
1126 PUSHs (sv); 955 PUSHs (sv);
1127 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 956 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1128} 957}
1129 958
959#if 0
960
1130void DESTROY (CBOR *self) 961void DESTROY (CBOR *self)
1131 CODE: 962 CODE:
1132 SvREFCNT_dec (self->cb_sk_object); 963 SvREFCNT_dec (self->cb_sk_object);
1133 SvREFCNT_dec (self->cb_object); 964 SvREFCNT_dec (self->cb_object);
1134 965
966#endif
967
1135PROTOTYPES: ENABLE 968PROTOTYPES: ENABLE
1136 969
1137void encode_cbor (SV *scalar) 970void encode_cbor (SV *scalar)
1138 PPCODE: 971 PPCODE:
1139{ 972{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines