ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent/util/gen_uts46data
Revision: 1.10
Committed: Wed Sep 30 07:48:47 2020 UTC (3 years, 7 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Changes since 1.9: +8 -2 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # creates lib/AnyEvent/Util/uts46.pl - better do not run it!
4
5 use common::sense;
6 use utf8;
7 no warnings 'utf8';
8
9 binmode STDOUT, ":utf8";
10
11 open my $fh, "GET http://www.unicode.org/Public/idna/13.0.0/IdnaMappingTable.txt |"
12 or die;
13
14 my $valid;
15 my $imap; # index map \x00 char replacement
16
17 while (<$fh>) {
18 next unless /^[0-9A-F]/;
19
20 /^
21 ([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )?
22 \s*;\s*(\S+)
23 (?: \s*;\s*([0-9A-F ]+?) )?
24 (?: \s*;[^;]+ )?
25 \s*
26 (?: \#.* )?
27 $
28 /x or die "$_: unparsable";
29
30 my ($r1, $r2, $type, $map) = (hex $1, hex $2, $3, $4);
31
32 my $R1 = chr $r1;
33 my $R2 = chr $r2;
34
35 $map = join "", map chr hex, split ' ', $map;
36
37 $type = "valid" if $type eq "deviation"; # use non-transitional behaviour for deviation characters
38
39 given ($type) {
40 when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) {
41 # nop
42 }
43 when (/^(?:mapped|deviation|ignored)$/) {
44 $map = "\x01$map" if $type eq "deviation";
45
46 $imap .= "\x00" . chr . $map
47 for $r1 .. $r2 || $r1;
48 }
49 when (/^(?:valid)$/) {
50 (vec $valid, $_, 1) = 1
51 for $r1 .. $r2 || $r1;
52 }
53 default {
54 die "default: $R1,$R2,$type,$map;\n";
55 }
56 }
57 }
58
59 open my $fh, ">lib/AnyEvent/Util/uts46data.pl"
60 or die;
61 binmode $fh, ":perlio";
62 print $fh "# autogenerated by util/gen_uts46data\n";
63
64 utf8::encode $imap;
65 0 > index $imap, "\x02" # it's not supposed to be anywhere in there
66 or die "imap contains \\x02";
67 print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n";
68
69 # try to find a valid quoting character - there usually are many legal combos
70 for (33..112, 1..31) { # stay out of utf-8 range, prefer printable things
71 if (0 >= index $valid, chr) {
72 my $q = chr;
73
74 # primitive compression
75 $valid =~ s/(\x00{32,})/"$q.(\"\x00\"x" . (length $1) . ").$q"/ge;
76 $valid =~ s/(\xff{32,})/"$q.(\"\xff\"x" . (length $1) . ").$q"/ge;
77
78 print $fh "\$uts46_valid = q$q$valid$q;\n";
79 goto valid_ok;
80 }
81 }
82 die "unable to found valid quoting character";
83 valid_ok:;
84
85 print $fh "1;\n";
86 close $fh;
87