ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/CBOR-XS/XS.xs
(Generate patch)

Comparing CBOR-XS/XS.xs (file contents):
Revision 1.19 by root, Wed Nov 20 02:03:09 2013 UTC vs.
Revision 1.27 by root, Fri Nov 22 15:28:38 2013 UTC

54 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8 54 CBOR_TAG_MIME = 36, // mime message rfc2045, utf-8
55 55
56 CBOR_TAG_MAGIC = 55799 // self-describe cbor 56 CBOR_TAG_MAGIC = 55799 // self-describe cbor
57}; 57};
58 58
59#define F_SHRINK 0x00000001UL 59#define F_SHRINK 0x00000001UL
60#define F_ALLOW_UNKNOWN 0x00000002UL 60#define F_ALLOW_UNKNOWN 0x00000002UL
61#define F_ALLOW_SHARING 0x00000004UL //TODO 61#define F_ALLOW_SHARING 0x00000004UL //TODO
62#define F_DEDUP_STRINGS 0x00000008UL //TODO 62#define F_ALLOW_STRINGREF 0x00000008UL //TODO
63#define F_DEDUP_KEYS 0x00000010UL //TODO
64 63
65#define INIT_SIZE 32 // initial scalar size to be allocated 64#define INIT_SIZE 32 // initial scalar size to be allocated
66 65
67#define SB do { 66#define SB do {
68#define SE } while (0) 67#define SE } while (0)
80# define CBOR_SLOW 0 79# define CBOR_SLOW 0
81# define CBOR_STASH cbor_stash 80# define CBOR_STASH cbor_stash
82#endif 81#endif
83 82
84static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS:: 83static HV *cbor_stash, *types_boolean_stash, *types_error_stash, *cbor_tagged_stash; // CBOR::XS::
85static SV *types_true, *types_false, *types_error, *sv_cbor; 84static SV *types_true, *types_false, *types_error, *sv_cbor, *default_filter;
86 85
87typedef struct { 86typedef struct {
88 U32 flags; 87 U32 flags;
89 U32 max_depth; 88 U32 max_depth;
90 STRLEN max_size; 89 STRLEN max_size;
90 SV *filter;
91} CBOR; 91} CBOR;
92 92
93ecb_inline void 93ecb_inline void
94cbor_init (CBOR *cbor) 94cbor_init (CBOR *cbor)
95{ 95{
96 Zero (cbor, 1, CBOR); 96 Zero (cbor, 1, CBOR);
97 cbor->max_depth = 512; 97 cbor->max_depth = 512;
98}
99
100ecb_inline void
101cbor_free (CBOR *cbor)
102{
103 SvREFCNT_dec (cbor->filter);
98} 104}
99 105
100///////////////////////////////////////////////////////////////////////////// 106/////////////////////////////////////////////////////////////////////////////
101// utility functions 107// utility functions
102 108
124 SvPV_renew (sv, SvCUR (sv) + 1); 130 SvPV_renew (sv, SvCUR (sv) + 1);
125#endif 131#endif
126 } 132 }
127} 133}
128 134
135// minimum length of a string to be registered for stringref
136ecb_inline int
137minimum_string_length (UV idx)
138{
139 return idx > 23
140 ? idx > 0xffU
141 ? idx > 0xffffU
142 ? idx > 0xffffffffU
143 ? 7
144 : 6
145 : 5
146 : 4
147 : 3;
148}
149
129///////////////////////////////////////////////////////////////////////////// 150/////////////////////////////////////////////////////////////////////////////
130// encoder 151// encoder
131 152
132// structure used for encoding CBOR 153// structure used for encoding CBOR
133typedef struct 154typedef struct
135 char *cur; // SvPVX (sv) + current output position 156 char *cur; // SvPVX (sv) + current output position
136 char *end; // SvEND (sv) 157 char *end; // SvEND (sv)
137 SV *sv; // result scalar 158 SV *sv; // result scalar
138 CBOR cbor; 159 CBOR cbor;
139 U32 depth; // recursion level 160 U32 depth; // recursion level
140 HV *stringref; // string => index, or 0 161 HV *stringref[2]; // string => index, or 0 ([0] = bytes, [1] = utf-8)
162 UV stringref_idx;
141 HV *shareable; // ptr => index, or 0 163 HV *shareable; // ptr => index, or 0
142 UV shareable_idx; 164 UV shareable_idx;
143} enc_t; 165} enc_t;
144 166
145ecb_inline void 167ecb_inline void
199 *enc->cur++ = len >> 8; 221 *enc->cur++ = len >> 8;
200 *enc->cur++ = len; 222 *enc->cur++ = len;
201 } 223 }
202} 224}
203 225
226ecb_inline void
227encode_tag (enc_t *enc, UV tag)
228{
229 encode_uint (enc, 0xc0, tag);
230}
231
204static void 232static void
205encode_str (enc_t *enc, int utf8, char *str, STRLEN len) 233encode_str (enc_t *enc, int utf8, char *str, STRLEN len)
206{ 234{
235 if (ecb_expect_false (enc->cbor.flags & F_ALLOW_STRINGREF))
236 {
237 SV **svp = hv_fetch (enc->stringref[!!utf8], str, len, 1);
238
239 if (SvOK (*svp))
240 {
241 // already registered, use stringref
242 encode_tag (enc, CBOR_TAG_STRINGREF);
243 encode_uint (enc, 0x00, SvUV (*svp));
244 return;
245 }
246 else if (len >= minimum_string_length (enc->stringref_idx))
247 {
248 // register only
249 sv_setuv (*svp, enc->stringref_idx);
250 ++enc->stringref_idx;
251 }
252 }
253
207 encode_uint (enc, utf8 ? 0x60 : 0x40, len); 254 encode_uint (enc, utf8 ? 0x60 : 0x40, len);
208 need (enc, len); 255 need (enc, len);
209 memcpy (enc->cur, str, len); 256 memcpy (enc->cur, str, len);
210 enc->cur += len; 257 enc->cur += len;
211}
212
213ecb_inline void
214encode_tag (enc_t *enc, UV tag)
215{
216 encode_uint (enc, 0xc0, tag);
217} 258}
218 259
219static void encode_sv (enc_t *enc, SV *sv); 260static void encode_sv (enc_t *enc, SV *sv);
220 261
221static void 262static void
475 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE)); 516 enc.sv = sv_2mortal (NEWSV (0, INIT_SIZE));
476 enc.cur = SvPVX (enc.sv); 517 enc.cur = SvPVX (enc.sv);
477 enc.end = SvEND (enc.sv); 518 enc.end = SvEND (enc.sv);
478 519
479 SvPOK_only (enc.sv); 520 SvPOK_only (enc.sv);
521
522 if (cbor->flags & F_ALLOW_STRINGREF)
523 {
524 encode_tag (&enc, CBOR_TAG_STRINGREF_NAMESPACE);
525 enc.stringref[0]= (HV *)sv_2mortal ((SV *)newHV ());
526 enc.stringref[1]= (HV *)sv_2mortal ((SV *)newHV ());
527 }
528
480 encode_sv (&enc, scalar); 529 encode_sv (&enc, scalar);
481 530
482 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv)); 531 SvCUR_set (enc.sv, enc.cur - SvPVX (enc.sv));
483 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings 532 *SvEND (enc.sv) = 0; // many xs functions expect a trailing 0 for text strings
484 533
499 const char *err; // parse error, if != 0 548 const char *err; // parse error, if != 0
500 CBOR cbor; 549 CBOR cbor;
501 U32 depth; // recursion depth 550 U32 depth; // recursion depth
502 U32 maxdepth; // recursion depth limit 551 U32 maxdepth; // recursion depth limit
503 AV *shareable; 552 AV *shareable;
553 AV *stringref;
554 SV *decode_tagged;
504} dec_t; 555} dec_t;
505 556
506#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE 557#define ERR(reason) SB if (!dec->err) dec->err = reason; goto fail; SE
507 558
508#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data") 559#define WANT(len) if (ecb_expect_false (dec->cur + len > dec->end)) ERR ("unexpected end of CBOR data")
606 657
607static void 658static void
608decode_he (dec_t *dec, HV *hv) 659decode_he (dec_t *dec, HV *hv)
609{ 660{
610 // for speed reasons, we specialcase single-string 661 // for speed reasons, we specialcase single-string
611 // byte or utf-8 strings as keys. 662 // byte or utf-8 strings as keys, but only when !stringref
612 663
664 if (ecb_expect_true (!dec->stringref))
613 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27) 665 if (*dec->cur >= 0x40 && *dec->cur <= 0x40 + 27)
614 { 666 {
615 I32 len = decode_uint (dec); 667 I32 len = decode_uint (dec);
616 char *key = (char *)dec->cur; 668 char *key = (char *)dec->cur;
617 669
618 dec->cur += len; 670 dec->cur += len;
619 671
672 if (ecb_expect_false (dec->stringref))
673 av_push (dec->stringref, newSVpvn (key, len));
674
620 hv_store (hv, key, len, decode_sv (dec), 0); 675 hv_store (hv, key, len, decode_sv (dec), 0);
676
677 return;
621 } 678 }
622 else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27) 679 else if (*dec->cur >= 0x60 && *dec->cur <= 0x60 + 27)
623 { 680 {
624 I32 len = decode_uint (dec); 681 I32 len = decode_uint (dec);
625 char *key = (char *)dec->cur; 682 char *key = (char *)dec->cur;
626 683
627 dec->cur += len; 684 dec->cur += len;
628 685
686 if (ecb_expect_false (dec->stringref))
687 av_push (dec->stringref, newSVpvn_utf8 (key, len, 1));
688
629 hv_store (hv, key, -len, decode_sv (dec), 0); 689 hv_store (hv, key, -len, decode_sv (dec), 0);
690
691 return;
630 } 692 }
631 else 693
632 {
633 SV *k = decode_sv (dec); 694 SV *k = decode_sv (dec);
634 SV *v = decode_sv (dec); 695 SV *v = decode_sv (dec);
635 696
636 hv_store_ent (hv, k, v, 0); 697 hv_store_ent (hv, k, v, 0);
637 SvREFCNT_dec (k); 698 SvREFCNT_dec (k);
638 }
639} 699}
640 700
641static SV * 701static SV *
642decode_hv (dec_t *dec) 702decode_hv (dec_t *dec)
643{ 703{
709 STRLEN len = decode_uint (dec); 769 STRLEN len = decode_uint (dec);
710 770
711 WANT (len); 771 WANT (len);
712 sv = newSVpvn (dec->cur, len); 772 sv = newSVpvn (dec->cur, len);
713 dec->cur += len; 773 dec->cur += len;
774
775 if (ecb_expect_false (dec->stringref)
776 && SvCUR (sv) >= minimum_string_length (AvFILLp (dec->stringref) + 1))
777 av_push (dec->stringref, SvREFCNT_inc_NN (sv));
714 } 778 }
715 779
716 if (utf8) 780 if (utf8)
717 SvUTF8_on (sv); 781 SvUTF8_on (sv);
718 782
732 WANT (1); 796 WANT (1);
733 797
734 switch (tag) 798 switch (tag)
735 { 799 {
736 case CBOR_TAG_MAGIC: 800 case CBOR_TAG_MAGIC:
737 return decode_sv (dec); 801 sv = decode_sv (dec);
802 break;
738 803
739 case CBOR_TAG_INDIRECTION: 804 case CBOR_TAG_INDIRECTION:
740 return newRV_noinc (decode_sv (dec)); 805 sv = newRV_noinc (decode_sv (dec));
806 break;
807
808 case CBOR_TAG_STRINGREF_NAMESPACE:
809 {
810 ENTER; SAVETMPS;
811
812 SAVESPTR (dec->stringref);
813 dec->stringref = (AV *)sv_2mortal ((SV *)newAV ());
814
815 sv = decode_sv (dec);
816
817 FREETMPS; LEAVE;
818 }
819 break;
820
821 case CBOR_TAG_STRINGREF:
822 {
823 if ((*dec->cur >> 5) != 0)
824 ERR ("corrupted CBOR data (stringref index not an unsigned integer)");
825
826 UV idx = decode_uint (dec);
827
828 if (!dec->stringref || (int)idx > AvFILLp (dec->stringref))
829 ERR ("corrupted CBOR data (stringref index out of bounds or outside namespace)");
830
831 sv = newSVsv (AvARRAY (dec->stringref)[idx]);
832 }
833 break;
741 834
742 case CBOR_TAG_VALUE_SHAREABLE: 835 case CBOR_TAG_VALUE_SHAREABLE:
743 { 836 {
744 if (ecb_expect_false (!dec->shareable)) 837 if (ecb_expect_false (!dec->shareable))
745 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ()); 838 dec->shareable = (AV *)sv_2mortal ((SV *)newAV ());
749 842
750 SV *osv = decode_sv (dec); 843 SV *osv = decode_sv (dec);
751 sv_setsv (sv, osv); 844 sv_setsv (sv, osv);
752 SvREFCNT_dec_NN (osv); 845 SvREFCNT_dec_NN (osv);
753 } 846 }
754 847 break;
755 return sv;
756 848
757 case CBOR_TAG_VALUE_SHAREDREF: 849 case CBOR_TAG_VALUE_SHAREDREF:
758 { 850 {
759 if ((*dec->cur >> 5) != 0) 851 if ((*dec->cur >> 5) != 0)
760 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)"); 852 ERR ("corrupted CBOR data (sharedref index not an unsigned integer)");
761 853
762 UV idx = decode_uint (dec); 854 UV idx = decode_uint (dec);
763 855
764 if (!dec->shareable || idx > AvFILLp (dec->shareable)) 856 if (!dec->shareable || (int)idx > AvFILLp (dec->shareable))
765 ERR ("corrupted CBOR data (sharedref index out of bounds)"); 857 ERR ("corrupted CBOR data (sharedref index out of bounds)");
766 858
767 return SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]); 859 sv = SvREFCNT_inc_NN (AvARRAY (dec->shareable)[idx]);
768 } 860 }
861 break;
769 862
770 case CBOR_TAG_PERL_OBJECT: 863 case CBOR_TAG_PERL_OBJECT:
771 { 864 {
772 sv = decode_sv (dec); 865 sv = decode_sv (dec);
773 866
813 sv = SvREFCNT_inc (POPs); 906 sv = SvREFCNT_inc (POPs);
814 907
815 PUTBACK; 908 PUTBACK;
816 909
817 FREETMPS; LEAVE; 910 FREETMPS; LEAVE;
818
819 return sv;
820 } 911 }
912 break;
821 913
822 default: 914 default:
823 { 915 {
824 sv = decode_sv (dec); 916 sv = decode_sv (dec);
825 917
918 dSP;
919 ENTER; SAVETMPS; PUSHMARK (SP);
920 EXTEND (SP, 2);
921 PUSHs (newSVuv (tag));
922 PUSHs (sv);
923
924 PUTBACK;
925 int count = call_sv (dec->cbor.filter ? dec->cbor.filter : default_filter, G_ARRAY | G_EVAL);
926 SPAGAIN;
927
928 if (SvTRUE (ERRSV))
929 {
930 FREETMPS; LEAVE;
931 ERR (SvPVutf8_nolen (sv_2mortal (SvREFCNT_inc (ERRSV))));
932 }
933
934 if (count)
935 {
936 SvREFCNT_dec (sv);
937 sv = SvREFCNT_inc (POPs);
938 }
939 else
940 {
826 AV *av = newAV (); 941 AV *av = newAV ();
827 av_push (av, newSVuv (tag)); 942 av_push (av, newSVuv (tag));
828 av_push (av, sv); 943 av_push (av, sv);
829 944
830 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash 945 HV *tagged_stash = !CBOR_SLOW || cbor_tagged_stash
831 ? cbor_tagged_stash 946 ? cbor_tagged_stash
832 : gv_stashpv ("CBOR::XS::Tagged" , 1); 947 : gv_stashpv ("CBOR::XS::Tagged" , 1);
833
834 return sv_bless (newRV_noinc ((SV *)av), tagged_stash); 948 sv = sv_bless (newRV_noinc ((SV *)av), tagged_stash);
835 } 949 }
950
951 PUTBACK;
952
953 FREETMPS; LEAVE;
954 }
955 break;
836 } 956 }
957
958 return sv;
837 959
838fail: 960fail:
839 SvREFCNT_dec (sv); 961 SvREFCNT_dec (sv);
840 return &PL_sv_undef; 962 return &PL_sv_undef;
841} 963}
982 1104
983 types_true = get_bool ("Types::Serialiser::true" ); 1105 types_true = get_bool ("Types::Serialiser::true" );
984 types_false = get_bool ("Types::Serialiser::false"); 1106 types_false = get_bool ("Types::Serialiser::false");
985 types_error = get_bool ("Types::Serialiser::error"); 1107 types_error = get_bool ("Types::Serialiser::error");
986 1108
1109 default_filter = newSVpv ("CBOR::XS::default_filter", 0);
1110
987 sv_cbor = newSVpv ("CBOR", 0); 1111 sv_cbor = newSVpv ("CBOR", 0);
988 SvREADONLY_on (sv_cbor); 1112 SvREADONLY_on (sv_cbor);
989} 1113}
990 1114
991PROTOTYPES: DISABLE 1115PROTOTYPES: DISABLE
1012void shrink (CBOR *self, int enable = 1) 1136void shrink (CBOR *self, int enable = 1)
1013 ALIAS: 1137 ALIAS:
1014 shrink = F_SHRINK 1138 shrink = F_SHRINK
1015 allow_unknown = F_ALLOW_UNKNOWN 1139 allow_unknown = F_ALLOW_UNKNOWN
1016 allow_sharing = F_ALLOW_SHARING 1140 allow_sharing = F_ALLOW_SHARING
1017 dedup_keys = F_DEDUP_KEYS 1141 allow_stringref = F_ALLOW_STRINGREF
1018 dedup_strings = F_DEDUP_STRINGS
1019 PPCODE: 1142 PPCODE:
1020{ 1143{
1021 if (enable) 1144 if (enable)
1022 self->flags |= ix; 1145 self->flags |= ix;
1023 else 1146 else
1029void get_shrink (CBOR *self) 1152void get_shrink (CBOR *self)
1030 ALIAS: 1153 ALIAS:
1031 get_shrink = F_SHRINK 1154 get_shrink = F_SHRINK
1032 get_allow_unknown = F_ALLOW_UNKNOWN 1155 get_allow_unknown = F_ALLOW_UNKNOWN
1033 get_allow_sharing = F_ALLOW_SHARING 1156 get_allow_sharing = F_ALLOW_SHARING
1034 get_dedup_keys = F_DEDUP_KEYS 1157 get_allow_stringref = F_ALLOW_STRINGREF
1035 get_dedup_strings = F_DEDUP_STRINGS
1036 PPCODE: 1158 PPCODE:
1037 XPUSHs (boolSV (self->flags & ix)); 1159 XPUSHs (boolSV (self->flags & ix));
1038 1160
1039void max_depth (CBOR *self, U32 max_depth = 0x80000000UL) 1161void max_depth (CBOR *self, U32 max_depth = 0x80000000UL)
1040 PPCODE: 1162 PPCODE:
1053 XPUSHs (ST (0)); 1175 XPUSHs (ST (0));
1054 1176
1055int get_max_size (CBOR *self) 1177int get_max_size (CBOR *self)
1056 CODE: 1178 CODE:
1057 RETVAL = self->max_size; 1179 RETVAL = self->max_size;
1180 OUTPUT:
1181 RETVAL
1182
1183void filter (CBOR *self, SV *filter = 0)
1184 PPCODE:
1185 SvREFCNT_dec (self->filter);
1186 self->filter = filter ? newSVsv (filter) : filter;
1187 XPUSHs (ST (0));
1188
1189SV *get_filter (CBOR *self)
1190 CODE:
1191 RETVAL = self->filter ? self->filter : NEWSV (0, 0);
1058 OUTPUT: 1192 OUTPUT:
1059 RETVAL 1193 RETVAL
1060 1194
1061void encode (CBOR *self, SV *scalar) 1195void encode (CBOR *self, SV *scalar)
1062 PPCODE: 1196 PPCODE:
1077 EXTEND (SP, 2); 1211 EXTEND (SP, 2);
1078 PUSHs (sv); 1212 PUSHs (sv);
1079 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr)))); 1213 PUSHs (sv_2mortal (newSVuv (offset - SvPVX (cborstr))));
1080} 1214}
1081 1215
1216void DESTROY (CBOR *self)
1217 PPCODE:
1218 cbor_free (self);
1219
1082PROTOTYPES: ENABLE 1220PROTOTYPES: ENABLE
1083 1221
1084void encode_cbor (SV *scalar) 1222void encode_cbor (SV *scalar)
1085 PPCODE: 1223 PPCODE:
1086{ 1224{

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines