… | |
… | |
6 | use utf8; |
6 | use utf8; |
7 | no warnings 'utf8'; |
7 | no warnings 'utf8'; |
8 | |
8 | |
9 | binmode STDOUT, ":utf8"; |
9 | binmode STDOUT, ":utf8"; |
10 | |
10 | |
11 | open my $fh, "GET http://www.unicode.org/Public/idna/9.0.0/IdnaMappingTable.txt |" |
11 | open my $fh, "GET http://www.unicode.org/Public/idna/13.0.0/IdnaMappingTable.txt |" |
12 | or die; |
12 | or die; |
13 | |
13 | |
14 | my $valid; |
14 | my $valid; |
15 | my $imap; # index map \x00 char replacement |
15 | my $imap; # index map \x00 char replacement |
16 | |
16 | |
… | |
… | |
65 | 0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
65 | 0 > index $imap, "\x02" # it's not supposed to be anywhere in there |
66 | or die "imap contains \\x02"; |
66 | or die "imap contains \\x02"; |
67 | print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |
67 | print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n"; |
68 | |
68 | |
69 | # try to find a valid quoting character - there usually are many legal combos |
69 | # try to find a valid quoting character - there usually are many legal combos |
70 | for (1..127) { # stay out of utf-8 range |
70 | for (33..112, 1..31) { # stay out of utf-8 range, prefer printable things |
71 | if (0 >= index $valid, chr) { |
71 | if (0 >= index $valid, chr) { |
|
|
72 | my $q = chr; |
|
|
73 | |
|
|
74 | # primitive compression |
|
|
75 | $valid =~ s/(\x00{32,})/"$q.(\"\x00\"x" . (length $1) . ").$q"/ge; |
|
|
76 | $valid =~ s/(\xff{32,})/"$q.(\"\xff\"x" . (length $1) . ").$q"/ge; |
|
|
77 | |
72 | print $fh "\$uts46_valid = q", chr, $valid, chr, ";\n"; |
78 | print $fh "\$uts46_valid = q$q$valid$q;\n"; |
73 | goto valid_ok; |
79 | goto valid_ok; |
74 | } |
80 | } |
75 | } |
81 | } |
76 | die "unable to found valid quoting character"; |
82 | die "unable to found valid quoting character"; |
77 | valid_ok:; |
83 | valid_ok:; |