… | |
… | |
6 | use utf8; |
6 | use utf8; |
7 | no warnings 'utf8'; |
7 | no warnings 'utf8'; |
8 | |
8 | |
9 | binmode STDOUT, ":utf8"; |
9 | binmode STDOUT, ":utf8"; |
10 | |
10 | |
11 | open my $fh, "GET http://www.unicode.org/Public/idna/6.0.0/IdnaMappingTable.txt |" |
11 | open my $fh, "GET http://www.unicode.org/Public/idna/13.0.0/IdnaMappingTable.txt |" |
12 | or die; |
12 | or die; |
13 | |
13 | |
14 | my $valid; |
14 | my $valid; |
15 | my $imap; # index map \x00 char replacement |
15 | my $imap; # index map \x00 char replacement |
16 | |
16 | |
… | |
… | |
31 | |
31 | |
32 | my $R1 = chr $r1; |
32 | my $R1 = chr $r1; |
33 | my $R2 = chr $r2; |
33 | my $R2 = chr $r2; |
34 | |
34 | |
35 | $map = join "", map chr hex, split ' ', $map; |
35 | $map = join "", map chr hex, split ' ', $map; |
|
|
36 | |
|
|
37 | $type = "valid" if $type eq "deviation"; # use non-transitional behaviour for deviation characters |
36 | |
38 | |
37 | given ($type) { |
39 | given ($type) { |
38 | when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) { |
40 | when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) { |
39 | # nop |
41 | # nop |
40 | } |
42 | } |
… | |
… | |
63 | 0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
65 | 0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
64 | or die "imap contains \\x02"; |
66 | or die "imap contains \\x02"; |
65 | print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |
67 | print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |
66 | |
68 | |
67 | # try to find a valid quoting character - there usually are many legal combos |
69 | # try to find a valid quoting character - there usually are many legal combos |
68 | for (1..127) { # stay out of utf-8 range |
70 | for (33..112, 1..31) { # stay out of utf-8 range, prefer printable things |
69 | if (0 >= index $valid, chr) { |
71 | if (0 >= index $valid, chr) { |
|
|
72 | my $q = chr; |
|
|
73 | |
|
|
74 | # primitive compression |
|
|
75 | $valid =~ s/(\x00{32,})/"$q.(\"\x00\"x" . (length $1) . ").$q"/ge; |
|
|
76 | $valid =~ s/(\xff{32,})/"$q.(\"\xff\"x" . (length $1) . ").$q"/ge; |
|
|
77 | |
70 | print $fh "\$uts46_valid = q", chr, $valid, chr, ";\n"; |
78 | print $fh "\$uts46_valid = q$q$valid$q;\n"; |
71 | goto valid_ok; |
79 | goto valid_ok; |
72 | } |
80 | } |
73 | } |
81 | } |
74 | die "unable to found valid quoting character"; |
82 | die "unable to found valid quoting character"; |
75 | valid_ok:; |
83 | valid_ok:; |