ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.19 by root, Wed Nov 20 02:03:09 2013 UTC vs.
Revision 1.28 by root, Sat Nov 23 18:30:59 2013 UTC

19# define HvNAMELEN(hv) HvNAMELEN_get (hv) 19# define HvNAMELEN(hv) HvNAMELEN_get (hv)
20#endif 20#endif
21#ifndef HvNAMEUTF8 21#ifndef HvNAMEUTF8
22# define HvNAMEUTF8(hv) 0 22# define HvNAMEUTF8(hv) 0
23#endif 23#endif
24#ifndef SvREFCNT_dec_NN
25# define SvREFCNT_dec_NN(sv) SvREFCNT_dec (sv)
26#endif
24 27
25// known tags 28// known tags
26enum cbor_tag 29enum cbor_tag
27{ 30{
28 // inofficial extensions (pending iana registration) 31 // inofficial extensions (pending iana registration)
54 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 57 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 58
56 CBOR_TAG_MAGIC = 55799 // self-describe cbor 59 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57}; 60};
58 61
59#define F_SHRINK 0x00000001UL 62#define F_SHRINK 0x00000001UL
60#define F_ALLOW_UNKNOWN 0x00000002UL 63#define F_ALLOW_UNKNOWN 0x00000002UL
61#define F_ALLOW_SHARING 0x00000004UL //TODO 64#define F_ALLOW_SHARING 0x00000004UL //TODO
62#define F_DEDUP_STRINGS 0x00000008UL //TODO 65#define F_ALLOW_STRINGREF 0x00000008UL //TODO
63#define F_DEDUP_KEYS 0x00000010UL //TODO
64 66
65#define INIT_SIZE 32 // initial scalar size to be allocated 67#define INIT_SIZE 32 // initial scalar size to be allocated
66 68
67#define SB do { 69#define SB do {
68#define SE } while (0) 70#define SE } while (0)
80# define CBOR_SLOW 0 82# define CBOR_SLOW 0
81# define CBOR_STASH cbor_stash 83# define CBOR_STASH cbor_stash
82#endif 84#endif
83 85
84static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS:: 86static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
85static SV *types_true, *types_false, *types_error, *sv_cbor; 87static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
86 88
87typedef struct { 89typedef struct {
88 U32 flags; 90 U32 flags;
89 U32 max_depth; 91 U32 max_depth;
90 STRLEN max_size; 92 STRLEN max_size;
93 SV *filter;
91} CBOR; 94} CBOR;
92 95
93ecb_inline void 96ecb_inline void
94cbor_init (CBOR *cbor) 97cbor_init (CBOR *cbor)
95{ 98{
96 Zero (cbor, 1, CBOR); 99 Zero (cbor, 1, CBOR);
97 cbor->max_depth = 512; 100 cbor->max_depth = 512;
101}
102
103ecb_inline void
104cbor_free (CBOR *cbor)
105{
106 SvREFCNT_dec (cbor->filter);
98} 107}
99 108
100///////////////////////////////////////////////////////////////////////////// 109/////////////////////////////////////////////////////////////////////////////
101// utility functions 110// utility functions
102 111
124 SvPV_renew (sv, SvCUR (sv) + 1); 133 SvPV_renew (sv, SvCUR (sv) + 1);
125#endif 134#endif
126 } 135 }
127} 136}
128 137
138// minimum length of a string to be registered for stringref
139ecb_inline int
140minimum_string_length (UV idx)
141{
142 return idx > 23
143 ? idx > 0xffU
144 ? idx > 0xffffU
145 ? idx > 0xffffffffU
146 ? 7
147 : 6
148 : 5
149 : 4
150 : 3;
151}
152
129///////////////////////////////////////////////////////////////////////////// 153/////////////////////////////////////////////////////////////////////////////
130// encoder 154// encoder
131 155
132// structure used for encoding CBOR 156// structure used for encoding CBOR
133typedef struct 157typedef struct
135 char *cur; // SvPVX (sv) + current output position 159 char *cur; // SvPVX (sv) + current output position
136 char *end; // SvEND (sv) 160 char *end; // SvEND (sv)
137 SV *sv; // result scalar 161 SV *sv; // result scalar
138 CBOR cbor; 162 CBOR cbor;
139 U32 depth; // recursion level 163 U32 depth; // recursion level
140 HV *stringref; // string => index, or 0 164 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
165 UV stringref_idx;
141 HV *shareable; // ptr => index, or 0 166 HV *shareable; // ptr => index, or 0
142 UV shareable_idx; 167 UV shareable_idx;
143} enc_t; 168} enc_t;
144 169
145ecb_inline void 170ecb_inline void
199 *enc->cur++ = len >> 8; 224 *enc->cur++ = len >> 8;
200 *enc->cur++ = len; 225 *enc->cur++ = len;
201 } 226 }
202} 227}
203 228
229ecb_inline void
230encode_tag (enc_t *enc, UV tag)
231{
232 encode_uint (enc, 0xc0, tag);
233}
234
204static void 235static void
205encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 236encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
206{ 237{
238 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF))
239 {
240 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
241
242 if (SvOK (*svp))
243 {
244 // already registered, use stringref
245 encode_tag (enc, CBOR_TAG_STRINGREF);
246 encode_uint (enc, 0x00, SvUV (*svp));
247 return;
248 }
249 else if (len >= minimum_string_length (enc->stringref_idx))
250 {
251 // register only
252 sv_setuv (*svp, enc->stringref_idx);
253 ++enc->stringref_idx;
254 }
255 }
256
207 encode_uint (enc, utf8 ? 0x60 : 0x40, len); 257 encode_uint (enc, utf8 ? 0x60 : 0x40, len);
208 need (enc, len); 258 need (enc, len);
209 memcpy (enc->cur, str, len); 259 memcpy (enc->cur, str, len);
210 enc->cur += len; 260 enc->cur += len;
211}
212
213ecb_inline void
214encode_tag (enc_t *enc, UV tag)
215{
216 encode_uint (enc, 0xc0, tag);
217} 261}
218 262
219static void encode_sv (enc_t *enc, SV *sv); 263static void encode_sv (enc_t *enc, SV *sv);
220 264
221static void 265static void
475 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 519 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
476 enc.cur = SvPVX (enc.sv); 520 enc.cur = SvPVX (enc.sv);
477 enc.end = SvEND (enc.sv); 521 enc.end = SvEND (enc.sv);
478 522
479 SvPOK_only (enc.sv); 523 SvPOK_only (enc.sv);
524
525 if (cbor->flags & F_ALLOW_STRINGREF)
526 {
527 encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
528 enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
529 enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
530 }
531
480 encode_sv (&enc, scalar); 532 encode_sv (&enc, scalar);
481 533
482 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 534 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
483 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings 535 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
484 536
499 const char *err; // parse error, if != 0 551 const char *err; // parse error, if != 0
500 CBOR cbor; 552 CBOR cbor;
501 U32 depth; // recursion depth 553 U32 depth; // recursion depth
502 U32 maxdepth; // recursion depth limit 554 U32 maxdepth; // recursion depth limit
503 AV *shareable; 555 AV *shareable;
556 AV *stringref;
557 SV *decode_tagged;
504} dec_t; 558} dec_t;
505 559
506#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 560#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
507 561
508#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 562#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
606 660
607static void 661static void
608decode_he (dec_t *dec, HV *hv) 662decode_he (dec_t *dec, HV *hv)
609{ 663{
610 // for speed reasons, we specialcase single-string 664 // for speed reasons, we specialcase single-string
611 // byte or utf-8 strings as keys. 665 // byte or utf-8 strings as keys, but only when !stringref
612 666
667 if (ecb_expect_true (!dec->stringref))
613 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27) 668 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
614 { 669 {
615 I32 len = decode_uint (dec); 670 I32 len = decode_uint (dec);
616 char *key = (char *)dec->cur; 671 char *key = (char *)dec->cur;
617 672
618 dec->cur += len; 673 dec->cur += len;
619 674
675 if (ecb_expect_false (dec->stringref))
676 av_push (dec->stringref, newSVpvn (key, len));
677
620 hv_store (hv, key, len, decode_sv (dec), 0); 678 hv_store (hv, key, len, decode_sv (dec), 0);
679
680 return;
621 } 681 }
622 else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) 682 else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
623 { 683 {
624 I32 len = decode_uint (dec); 684 I32 len = decode_uint (dec);
625 char *key = (char *)dec->cur; 685 char *key = (char *)dec->cur;
626 686
627 dec->cur += len; 687 dec->cur += len;
628 688
689 if (ecb_expect_false (dec->stringref))
690 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
691
629 hv_store (hv, key, -len, decode_sv (dec), 0); 692 hv_store (hv, key, -len, decode_sv (dec), 0);
693
694 return;
630 } 695 }
631 else 696
632 {
633 SV *k = decode_sv (dec); 697 SV *k = decode_sv (dec);
634 SV *v = decode_sv (dec); 698 SV *v = decode_sv (dec);
635 699
636 hv_store_ent (hv, k, v, 0); 700 hv_store_ent (hv, k, v, 0);
637 SvREFCNT_dec (k); 701 SvREFCNT_dec (k);
638 }
639} 702}
640 703
641static SV * 704static SV *
642decode_hv (dec_t *dec) 705decode_hv (dec_t *dec)
643{ 706{
709 STRLEN len = decode_uint (dec); 772 STRLEN len = decode_uint (dec);
710 773
711 WANT (len); 774 WANT (len);
712 sv = newSVpvn (dec->cur, len); 775 sv = newSVpvn (dec->cur, len);
713 dec->cur += len; 776 dec->cur += len;
777
778 if (ecb_expect_false (dec->stringref)
779 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
780 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
714 } 781 }
715 782
716 if (utf8) 783 if (utf8)
717 SvUTF8_on (sv); 784 SvUTF8_on (sv);
718 785
732 WANT (1); 799 WANT (1);
733 800
734 switch (tag) 801 switch (tag)
735 { 802 {
736 case CBOR_TAG_MAGIC: 803 case CBOR_TAG_MAGIC:
737 return decode_sv (dec); 804 sv = decode_sv (dec);
805 break;
738 806
739 case CBOR_TAG_INDIRECTION: 807 case CBOR_TAG_INDIRECTION:
740 return newRV_noinc (decode_sv (dec)); 808 sv = newRV_noinc (decode_sv (dec));
809 break;
810
811 case CBOR_TAG_STRINGREF_NAMESPACE:
812 {
813 ENTER; SAVETMPS;
814
815 SAVESPTR (dec->stringref);
816 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
817
818 sv = decode_sv (dec);
819
820 FREETMPS; LEAVE;
821 }
822 break;
823
824 case CBOR_TAG_STRINGREF:
825 {
826 if ((*dec->cur >> 5) != 0)
827 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
828
829 UV idx = decode_uint (dec);
830
831 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
832 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
833
834 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
835 }
836 break;
741 837
742 case CBOR_TAG_VALUE_SHAREABLE: 838 case CBOR_TAG_VALUE_SHAREABLE:
743 { 839 {
744 if (ecb_expect_false (!dec->shareable)) 840 if (ecb_expect_false (!dec->shareable))
745 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 841 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
749 845
750 SV *osv = decode_sv (dec); 846 SV *osv = decode_sv (dec);
751 sv_setsv (sv, osv); 847 sv_setsv (sv, osv);
752 SvREFCNT_dec_NN (osv); 848 SvREFCNT_dec_NN (osv);
753 } 849 }
754 850 break;
755 return sv;
756 851
757 case CBOR_TAG_VALUE_SHAREDREF: 852 case CBOR_TAG_VALUE_SHAREDREF:
758 { 853 {
759 if ((*dec->cur >> 5) != 0) 854 if ((*dec->cur >> 5) != 0)
760 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)"); 855 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
761 856
762 UV idx = decode_uint (dec); 857 UV idx = decode_uint (dec);
763 858
764 if (!dec->shareable || idx > AvFILLp (dec->shareable)) 859 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
765 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 860 ERR ("corrupted CBOR data (sharedref index out of bounds)");
766 861
767 return SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 862 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
768 } 863 }
864 break;
769 865
770 case CBOR_TAG_PERL_OBJECT: 866 case CBOR_TAG_PERL_OBJECT:
771 { 867 {
772 sv = decode_sv (dec); 868 sv = decode_sv (dec);
773 869
813 sv = SvREFCNT_inc (POPs); 909 sv = SvREFCNT_inc (POPs);
814 910
815 PUTBACK; 911 PUTBACK;
816 912
817 FREETMPS; LEAVE; 913 FREETMPS; LEAVE;
818
819 return sv;
820 } 914 }
915 break;
821 916
822 default: 917 default:
823 { 918 {
824 sv = decode_sv (dec); 919 sv = decode_sv (dec);
825 920
921 dSP;
922 ENTER; SAVETMPS; PUSHMARK (SP);
923 EXTEND (SP, 2);
924 PUSHs (newSVuv (tag));
925 PUSHs (sv);
926
927 PUTBACK;
928 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
929 SPAGAIN;
930
931 if (SvTRUE (ERRSV))
932 {
933 FREETMPS; LEAVE;
934 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
935 }
936
937 if (count)
938 {
939 SvREFCNT_dec (sv);
940 sv = SvREFCNT_inc (POPs);
941 }
942 else
943 {
826 AV *av = newAV (); 944 AV *av = newAV ();
827 av_push (av, newSVuv (tag)); 945 av_push (av, newSVuv (tag));
828 av_push (av, sv); 946 av_push (av, sv);
829 947
830 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 948 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
831 ? cbor_tagged_stash 949 ? cbor_tagged_stash
832 : gv_stashpv ("CBOR::XS::Tagged" , 1); 950 : gv_stashpv ("CBOR::XS::Tagged" , 1);
833
834 return sv_bless (newRV_noinc ((SV *)av), tagged_stash); 951 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
835 } 952 }
953
954 PUTBACK;
955
956 FREETMPS; LEAVE;
957 }
958 break;
836 } 959 }
960
961 return sv;
837 962
838fail: 963fail:
839 SvREFCNT_dec (sv); 964 SvREFCNT_dec (sv);
840 return &PL_sv_undef; 965 return &PL_sv_undef;
841} 966}
982 1107
983 types_true = get_bool ("Types::Serialiser::true" ); 1108 types_true = get_bool ("Types::Serialiser::true" );
984 types_false = get_bool ("Types::Serialiser::false"); 1109 types_false = get_bool ("Types::Serialiser::false");
985 types_error = get_bool ("Types::Serialiser::error"); 1110 types_error = get_bool ("Types::Serialiser::error");
986 1111
1112 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1113
987 sv_cbor = newSVpv ("CBOR", 0); 1114 sv_cbor = newSVpv ("CBOR", 0);
988 SvREADONLY_on (sv_cbor); 1115 SvREADONLY_on (sv_cbor);
989} 1116}
990 1117
991PROTOTYPES: DISABLE 1118PROTOTYPES: DISABLE
1012void shrink (CBOR *self, int enable = 1) 1139void shrink (CBOR *self, int enable = 1)
1013 ALIAS: 1140 ALIAS:
1014 shrink = F_SHRINK 1141 shrink = F_SHRINK
1015 allow_unknown = F_ALLOW_UNKNOWN 1142 allow_unknown = F_ALLOW_UNKNOWN
1016 allow_sharing = F_ALLOW_SHARING 1143 allow_sharing = F_ALLOW_SHARING
1017 dedup_keys = F_DEDUP_KEYS 1144 allow_stringref = F_ALLOW_STRINGREF
1018 dedup_strings = F_DEDUP_STRINGS
1019 PPCODE: 1145 PPCODE:
1020{ 1146{
1021 if (enable) 1147 if (enable)
1022 self->flags |= ix; 1148 self->flags |= ix;
1023 else 1149 else
1029void get_shrink (CBOR *self) 1155void get_shrink (CBOR *self)
1030 ALIAS: 1156 ALIAS:
1031 get_shrink = F_SHRINK 1157 get_shrink = F_SHRINK
1032 get_allow_unknown = F_ALLOW_UNKNOWN 1158 get_allow_unknown = F_ALLOW_UNKNOWN
1033 get_allow_sharing = F_ALLOW_SHARING 1159 get_allow_sharing = F_ALLOW_SHARING
1034 get_dedup_keys = F_DEDUP_KEYS 1160 get_allow_stringref = F_ALLOW_STRINGREF
1035 get_dedup_strings = F_DEDUP_STRINGS
1036 PPCODE: 1161 PPCODE:
1037 XPUSHs (boolSV (self->flags & ix)); 1162 XPUSHs (boolSV (self->flags & ix));
1038 1163
1039void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1164void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1040 PPCODE: 1165 PPCODE:
1053 XPUSHs (ST (0)); 1178 XPUSHs (ST (0));
1054 1179
1055int get_max_size (CBOR *self) 1180int get_max_size (CBOR *self)
1056 CODE: 1181 CODE:
1057 RETVAL = self->max_size; 1182 RETVAL = self->max_size;
1183 OUTPUT:
1184 RETVAL
1185
1186void filter (CBOR *self, SV *filter = 0)
1187 PPCODE:
1188 SvREFCNT_dec (self->filter);
1189 self->filter = filter ? newSVsv (filter) : filter;
1190 XPUSHs (ST (0));
1191
1192SV *get_filter (CBOR *self)
1193 CODE:
1194 RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1058 OUTPUT: 1195 OUTPUT:
1059 RETVAL 1196 RETVAL
1060 1197
1061void encode (CBOR *self, SV *scalar) 1198void encode (CBOR *self, SV *scalar)
1062 PPCODE: 1199 PPCODE:
1077 EXTEND (SP, 2); 1214 EXTEND (SP, 2);
1078 PUSHs (sv); 1215 PUSHs (sv);
1079 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1216 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1080} 1217}
1081 1218
1219void DESTROY (CBOR *self)
1220 PPCODE:
1221 cbor_free (self);
1222
1082PROTOTYPES: ENABLE 1223PROTOTYPES: ENABLE
1083 1224
1084void encode_cbor (SV *scalar) 1225void encode_cbor (SV *scalar)
1085 PPCODE: 1226 PPCODE:
1086{ 1227{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines