ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent/util/gen_uts46data
Revision: 1.8
Committed: Sun Apr 24 22:16:28 2016 UTC (8 years, 1 month ago) by root
Branch: MAIN
CVS Tags: rel-7_16, rel-7_15, rel-7_14, rel-7_13
Changes since 1.7: +3 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 #!/opt/bin/perl
2    
3     # creates lib/AnyEvent/Util/uts46.pl - better do not run it!
4    
5     use common::sense;
6     use utf8;
7     no warnings 'utf8';
8    
9     binmode STDOUT, ":utf8";
10    
11 root 1.8 open my $fh, "GET http://www.unicode.org/Public/idna/9.0.0/IdnaMappingTable.txt |"
12 root 1.1 or die;
13    
14     my $valid;
15     my $imap; # index map \x00 char replacement
16    
17     while (<$fh>) {
18     next unless /^[0-9A-F]/;
19    
20     /^
21     ([0-9A-F]{4,}) (?: \.\.([0-9A-F]{4,}) )?
22     \s*;\s*(\S+)
23     (?: \s*;\s*([0-9A-F ]+?) )?
24 root 1.4 (?: \s*;[^;]+ )?
25 root 1.1 \s*
26     (?: \#.* )?
27     $
28     /x or die "$_: unparsable";
29    
30     my ($r1, $r2, $type, $map) = (hex $1, hex $2, $3, $4);
31    
32     my $R1 = chr $r1;
33     my $R2 = chr $r2;
34    
35     $map = join "", map chr hex, split ' ', $map;
36    
37 root 1.8 $type = "valid" if $type eq "deviation"; # use non-transitional behaviour for deviation characters
38    
39 root 1.1 given ($type) {
40 root 1.4 when (/^(?:disallowed|disallowed_STD3_valid|disallowed_STD3_mapped)$/) {
41 root 1.1 # nop
42     }
43 root 1.4 when (/^(?:mapped|deviation|ignored)$/) {
44 root 1.2 $map = "\x01$map" if $type eq "deviation";
45 root 1.1
46     $imap .= "\x00" . chr . $map
47     for $r1 .. $r2 || $r1;
48     }
49 root 1.5 when (/^(?:valid)$/) {
50 root 1.1 (vec $valid, $_, 1) = 1
51     for $r1 .. $r2 || $r1;
52     }
53     default {
54     die "default: $R1,$R2,$type,$map;\n";
55     }
56     }
57     }
58    
59     open my $fh, ">lib/AnyEvent/Util/uts46data.pl"
60     or die;
61     binmode $fh, ":perlio";
62 root 1.3 print $fh "# autogenerated by util/gen_uts46data\n";
63 root 1.1
64     utf8::encode $imap;
65     0 > index $imap, "\x02" # it's not supposed to be anywhere in there
66     or die "imap contains \\x02";
67     print $fh "\$uts46_imap = q\x02$imap\x00\x02;\n";
68    
69     # try to find a valid quoting character - there usually are many legal combos
70     for (1..127) { # stay out of utf-8 range
71     if (0 >= index $valid, chr) {
72     print $fh "\$uts46_valid = q", chr, $valid, chr, ";\n";
73     goto valid_ok;
74     }
75     }
76     die "unable to found valid quoting character";
77     valid_ok:;
78    
79     print $fh "1;\n";
80     close $fh;
81