… | |
… | |
6 | use utf8; |
6 | use utf8; |
7 | no warnings 'utf8'; |
7 | no warnings 'utf8'; |
8 | |
8 | |
9 | binmode STDOUT, ":utf8"; |
9 | binmode STDOUT, ":utf8"; |
10 | |
10 | |
11 | open my $fh, "GET http://www.unicode.org/Public/idna/5.2.0/IdnaMappingTable.txt |" |
11 | open my $fh, "GET http://www.unicode.org/Public/idna/6.0.0/IdnaMappingTable.txt |" |
12 | or die; |
12 | or die; |
13 | |
13 | |
14 | my $valid; |
14 | my $valid; |
15 | my $imap; # index map \x00 char replacement |
15 | my $imap; # index map \x00 char replacement |
16 | |
16 | |
… | |
… | |
19 | |
19 | |
20 | /^ |
20 | /^ |
21 | ([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )? |
21 | ([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )? |
22 | \s*;\s*(\S+) |
22 | \s*;\s*(\S+) |
23 | (?: \s*;\s*([0-9A-F ]+?) )? |
23 | (?: \s*;\s*([0-9A-F ]+?) )? |
|
|
24 | (?: \s*;[^;]+ )? |
24 | \s* |
25 | \s* |
25 | (?: \#.* )? |
26 | (?: \#.* )? |
26 | $ |
27 | $ |
27 | /x or die "$_: unparsable"; |
28 | /x or die "$_: unparsable"; |
28 | |
29 | |
… | |
… | |
32 | my $R2 = chr $r2; |
33 | my $R2 = chr $r2; |
33 | |
34 | |
34 | $map = join "", map chr hex, split ' ', $map; |
35 | $map = join "", map chr hex, split ' ', $map; |
35 | |
36 | |
36 | given ($type) { |
37 | given ($type) { |
37 | when ("disallowed") { |
38 | when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) { |
38 | # nop |
39 | # nop |
39 | } |
40 | } |
40 | when (/mapped|deviation|ignored/) { |
41 | when (/^(?:mapped|deviation|ignored)$/) { |
41 | $map = "\x01$map" if $type eq "deviation"; |
42 | $map = "\x01$map" if $type eq "deviation"; |
42 | |
43 | |
43 | $imap .= "\x00" . chr . $map |
44 | $imap .= "\x00" . chr . $map |
44 | for $r1 .. $r2 || $r1; |
45 | for $r1 .. $r2 || $r1; |
45 | } |
46 | } |
… | |
… | |
54 | } |
55 | } |
55 | |
56 | |
56 | open my $fh, ">lib/AnyEvent/Util/uts46data.pl" |
57 | open my $fh, ">lib/AnyEvent/Util/uts46data.pl" |
57 | or die; |
58 | or die; |
58 | binmode $fh, ":perlio"; |
59 | binmode $fh, ":perlio"; |
59 | print $fh "# created by gen_uts46data\n"; |
60 | print $fh "# autogenerated by util/gen_uts46data\n"; |
60 | |
61 | |
61 | utf8::encode $imap; |
62 | utf8::encode $imap; |
62 | 0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
63 | 0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
63 | or die "imap contains \\x02"; |
64 | or die "imap contains \\x02"; |
64 | print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |
65 | print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |