1 |
root |
1.1 |
#!/opt/bin/perl |
2 |
|
|
|
3 |
|
|
# creates lib/AnyEvent/Util/uts46.pl - better do not run it! |
4 |
|
|
|
5 |
|
|
use common::sense; |
6 |
|
|
use utf8; |
7 |
|
|
no warnings 'utf8'; |
8 |
|
|
|
9 |
|
|
binmode STDOUT, ":utf8"; |
10 |
|
|
|
11 |
root |
1.4 |
open my $fh, "GET http://www.unicode.org/Public/idna/6.0.0/IdnaMappingTable.txt |" |
12 |
root |
1.1 |
or die; |
13 |
|
|
|
14 |
|
|
my $valid; |
15 |
|
|
my $imap; # index map \x00 char replacement |
16 |
|
|
|
17 |
|
|
while (<$fh>) { |
18 |
|
|
next unless /^[0-9A-F]/; |
19 |
|
|
|
20 |
|
|
/^ |
21 |
|
|
([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )? |
22 |
|
|
\s*;\s*(\S+) |
23 |
|
|
(?: \s*;\s*([0-9A-F ]+?) )? |
24 |
root |
1.4 |
(?: \s*;[^;]+ )? |
25 |
root |
1.1 |
\s* |
26 |
|
|
(?: \#.* )? |
27 |
|
|
$ |
28 |
|
|
/x or die "$_: unparsable"; |
29 |
|
|
|
30 |
|
|
my ($r1, $r2, $type, $map) = (hex $1, hex $2, $3, $4); |
31 |
|
|
|
32 |
|
|
my $R1 = chr $r1; |
33 |
|
|
my $R2 = chr $r2; |
34 |
|
|
|
35 |
|
|
$map = join "", map chr hex, split ' ', $map; |
36 |
|
|
|
37 |
|
|
given ($type) { |
38 |
root |
1.4 |
when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) { |
39 |
root |
1.1 |
# nop |
40 |
|
|
} |
41 |
root |
1.4 |
when (/^(?:mapped|deviation|ignored)$/) { |
42 |
root |
1.2 |
$map = "\x01$map" if $type eq "deviation"; |
43 |
root |
1.1 |
|
44 |
|
|
$imap .= "\x00" . chr . $map |
45 |
|
|
for $r1 .. $r2 || $r1; |
46 |
|
|
} |
47 |
|
|
when ("valid") { |
48 |
|
|
(vec $valid, $_, 1) = 1 |
49 |
|
|
for $r1 .. $r2 || $r1; |
50 |
|
|
} |
51 |
|
|
default { |
52 |
|
|
die "default: $R1,$R2,$type,$map;\n"; |
53 |
|
|
} |
54 |
|
|
} |
55 |
|
|
} |
56 |
|
|
|
57 |
|
|
open my $fh, ">lib/AnyEvent/Util/uts46data.pl" |
58 |
|
|
or die; |
59 |
|
|
binmode $fh, ":perlio"; |
60 |
root |
1.3 |
print $fh "# autogenerated by util/gen_uts46data\n"; |
61 |
root |
1.1 |
|
62 |
|
|
utf8::encode $imap; |
63 |
|
|
0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
64 |
|
|
or die "imap contains \\x02"; |
65 |
|
|
print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |
66 |
|
|
|
67 |
|
|
# try to find a valid quoting character - there usually are many legal combos |
68 |
|
|
for (1..127) { # stay out of utf-8 range |
69 |
|
|
if (0 >= index $valid, chr) { |
70 |
|
|
print $fh "\$uts46_valid = q", chr, $valid, chr, ";\n"; |
71 |
|
|
goto valid_ok; |
72 |
|
|
} |
73 |
|
|
} |
74 |
|
|
die "unable to found valid quoting character"; |
75 |
|
|
valid_ok:; |
76 |
|
|
|
77 |
|
|
print $fh "1;\n"; |
78 |
|
|
close $fh; |
79 |
|
|
|