ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.11
Committed: Sat Apr 11 18:27:25 2015 UTC (9 years, 1 month ago) by root
Branch: MAIN
CVS Tags: rxvt-unicode-rel-9_26, rxvt-unicode-rel-9_25, rxvt-unicode-rel-9_22
Changes since 1.10: +1 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 pcg 1.1 #!/opt/bin/perl
2    
3 pcg 1.5 # the generated tables mostly have NOT been checked so far!
4 pcg 1.1
5     use v5.8.2;
6     use utf8;
7     use Encode;
8 root 1.8 use Encode::JP;
9     use Encode::CN;
10     use Encode::KR;
11     use Encode::HanExtra;
12     use Encode::JIS2K;
13 pcg 1.1
14     my $gen;
15    
16 pcg 1.5 $TO_UNICODE = 0; # also generate to_unicode tables
17    
18 pcg 1.1 sub linear {
19     my ($a, $l, $h, $b) = @_;
20     for ($l .. $h) {
21     return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
22     }
23     1;
24     }
25    
26     sub wrap {
27     my $res = " ";
28     $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
29     $res .= "$1" if $_[0] =~ /\G(.*)$/;
30     $res;
31     }
32    
33     my $last_tab_full; # hack
34    
35     sub gentab {
36     my ($enc, $l, $h, $f, $rep) = @_;
37     $last_tab_full = 0;
38    
39     " = {\n"
40     . (wrap join ", ",
41     map +(sprintf "0x$f",
42     defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
43     ), $l..$h)
44     . "\n};\n";
45     }
46    
47     sub gen {
48     my ($enc, $base, $ch) = @_;
49    
50     my ($fun, $tab);
51     my (@t1, @t2);
52    
53     for (255, 159, 127, 126, 125) {
54     if (linear $enc, 0, $_, 0) {
55     undef $enc->[$_] for 0..$_;
56     $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
57     }
58     }
59    
60     for (126, 127, 128) {
61     if (linear $enc, $_, 159, $_) {
62     undef $enc->[$_] for $_..159;
63     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
64     }
65     }
66    
67     for (126, 127) {
68     next unless defined $enc->[$_];
69     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
70     }
71    
72     my @map;
73     my @map2;
74    
75     for (0 .. $#$enc) {
76     if (defined $enc->[$_]) {
77     $map[$_] = $enc->[$_];
78     $map2[$_ >> 8]++;
79     }
80     }
81    
82     for my $p (0..255) {
83     if ($map2[$p]) {
84     my $b = $p << 8;
85    
86     my ($l, $h);
87     for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
88     for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
89    
90     if ($map2[$p] <= 5) {
91     for ($l .. $h) {
92     next unless defined $enc->[$b + $_];
93     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
94     }
95     } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
96     my $dif = $enc->[$b + $l] - ($b + $l);
97     $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
98 root 1.11 $fun .= sprintf "if (· >= 0x%04x && 0x%04x >= ·) return · %s;\n", $b + $l, $b + $h, $dif;
99 pcg 1.1 } elsif ($map2[$p] <= 5) { # defunct
100     $fun .= "switch (·)\n {\n";
101     for ($l .. $h) {
102     next unless defined $enc->[$b + $_];
103     $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
104     }
105     $fun .= " }\n";
106     } else {
107     my ($i, $t, $f) = (0, "uint8_t", "%02x");
108     for ($l .. $h) {
109     if ($enc->[$b + $_] > 255) {
110     ($i, $t, $f) = (1, "uint16_t", "%04x");
111     last;
112     }
113     }
114    
115     $i ? push @t2, [$p, $l, $h, $t, $f]
116     : push @t1, [$b, $l, $h, $t, $f];
117     }
118    
119     }
120     }
121    
122     if (@t2 <= 3) {
123     push @t1, @t2;
124     @t2 = ();
125     }
126    
127     for (@t1) {
128     my ($b, $l, $h, $t, $f) = @$_;
129     my $rep = $b + $l == 0 ? 128 : 0;
130     $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
131     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
132     . " return %s$base\_$b\[· - 0x%04x];\n",
133     $b + $l, $b + $h,
134     ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
135     $b + $l;
136     }
137    
138     if (@t2) {
139     my ($min, $max) = (255, 0);
140     my ($l, $h) = (255, 0);
141     for (@t2) {
142     my ($p, $L, $H, $t, $f) = @$_;
143     $max = $p if $p > $max;
144     $min = $p if $p < $min;
145     $l = $L if $L < $l;
146     $h = $H if $H > $h;
147     }
148    
149     $fun .= "uint8_t l = ·;\n";
150     $fun .= "uint16_t h = · >> 8;\n";
151    
152     if ($h - $l < 200) {
153     my $d = $h - $l + 1;
154    
155     my @enc;
156     for $p ($min .. $max) {
157     for $i ($l .. $h) {
158     $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
159     }
160     }
161     $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
162    
163     if ($last_tab_full) {
164     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
165     . " && 0x%02x <= l && l <= 0x%02x)\n"
166     . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
167     . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
168     . " : NOCHAR;\n",
169     $min, $max, $l, $h,
170     ($d, $min * $d + $l) x 2;
171     } else {
172     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
173     . " && 0x%02x <= l && l <= 0x%02x)\n"
174     . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
175     $min, $max, $l, $h,
176     $d,
177     $min * $d + $l;
178     }
179     } else {
180     my @tab = (0) x ($max - $min);
181     for (@t2) {
182     my ($p, undef, undef, $t, $f) = @$_;
183     $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
184     $tab[$p - $min] = "$base\_$p";
185     }
186    
187     $tab .= "const uint16_t *$base\_i[] = {\n"
188     . (wrap join ", ", @tab)
189     . "\n};\n\n";
190    
191     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
192     . " && 0x%02x <= l && l <= 0x%02x\n"
193     . " && $base\_i[h - 0x%02x])\n"
194     . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
195     . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
196     . " : NOCHAR;\n",
197     $min, $max, $l, $h,
198     $min, ($min, $l) x 2;
199     }
200     }
201    
202     $fun .= "return NOCHAR;\n";
203    
204     $fun =~ s/·/$ch/g;
205    
206     ($tab, $fun);
207     }
208    
209     while (<DATA>) {
210     my ($group, $base, $cs, $type) = split /\s+/;
211    
212     my @enc1;
213     my @enc2;
214    
215     for (0 .. 65535) {
216     my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
217    
218     if (length $enc) {
219     my $code = hex unpack "H*", $enc;
220     $enc1[$_] = $code;
221     $enc2[$code] = $_ unless defined $enc2[$code];
222     }
223     }
224    
225 pcg 1.5 my ($tab1, $fun1);
226     my ($tab2, $fun2);
227    
228     ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
229     ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;
230 pcg 1.1
231     $fun1 =~ s/^/ /gm;
232     $fun2 =~ s/^/ /gm;
233    
234     print "$base\n";
235     open OUT, ">table/$base.h" or die;
236    
237     print OUT <<EOF;
238     //
239     // AUTOMATICALLLY GENERATED by gentables
240     //
241     #ifdef ENCODING_$group
242    
243     $tab1$tab2
244 root 1.6 static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
245     $fun1}
246 pcg 1.5 EOF
247    
248     if ($TO_UNICODE) {
249     print OUT <<EOF
250     #if ENCODING_TO_UNICODE
251 root 1.6 static unicode_t cs_$base\_to_unicode (uint32_t enc) {
252     $fun2}
253 pcg 1.5 #endif
254     EOF
255     }
256    
257     print OUT <<EOF;
258 pcg 1.1
259     #else
260    
261 root 1.6 #define cs_$base\_from_unicode cs_unknown_from_unicode
262     #define cs_$base\_to_unicode cs_unknown_to_unicode
263 pcg 1.1
264     #endif
265     EOF
266    
267     close OUT;
268     }
269    
270     __DATA__
271     VN viscii viscii
272     KR ksc5601_1987_0 ksc5601-raw
273 root 1.7 ZH gb2312_1980_0 gb2312-raw
274 root 1.9 ZH gbk_0 gbk
275 root 1.7 ZH_EXT cns11643_1992_1 cns11643-1
276     ZH_EXT cns11643_1992_2 cns11643-2
277     ZH_EXT cns11643_1992_3 cns11643-3
278     ZH_EXT cns11643_1992_4 cns11643-4
279     ZH_EXT cns11643_1992_5 cns11643-5
280     ZH_EXT cns11643_1992_6 cns11643-6
281     ZH_EXT cns11643_1992_7 cns11643-7
282     ZH_EXT cns11643_1992_f cns11643-f
283     ZH big5 big5
284     ZH_EXT big5_ext big5ext
285     ZH_EXT big5_plus big5plus
286 pcg 1.1 EU koi8_r koi8-r
287     EU koi8_u koi8-u
288     DEFAULT iso8859_1 iso-8859-1
289     EU iso8859_2 iso-8859-2
290     EU iso8859_3 iso-8859-3
291     EU iso8859_4 iso-8859-4
292     EU iso8859_5 iso-8859-5
293     EU iso8859_6 iso-8859-6
294     EU iso8859_7 iso-8859-7
295     EU iso8859_8 iso-8859-8
296     EU iso8859_9 iso-8859-9
297     EU iso8859_10 iso-8859-10
298     EU iso8859_11 iso-8859-11
299     EU iso8859_13 iso-8859-13
300 ayin 1.10 EU iso8859_14 iso-8859-14
301 pcg 1.1 DEFAULT iso8859_15 iso-8859-15
302     EU iso8859_16 iso-8859-16
303     JP jis0201_1976_0 jis0201-raw
304 pcg 1.5 JP jis0208_1990_0 jis0208-raw
305 pcg 1.1 JP jis0212_1990_0 jis0212-raw
306     JP_EXT jis0213_1 jis0213-1-raw
307     JP_EXT jis0213_2 jis0213-2-raw