ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.9
Committed: Fri Dec 10 19:55:55 2004 UTC (19 years, 5 months ago) by root
Branch: MAIN
CVS Tags: rel-6_2, rel-6_3, rel-6_0, rel-6_1, rel-4_8, rel-4_9, rel-4_4, rel-4_6, rel-4_7, rel-7_3a, rel-7_3, rel-7_2, rel-7_1, rel-7_0, rel-7_7, rel-7_6, rel-7_5, rel-7_4, rel-7_9, rel-7_8, rel-5_5, rel-5_4, rel-5_7, rel-5_1, rel-5_0, rel-5_3, rel-5_2, rel-5_9, rel-5_8, rel-8_0, rel-8_1, rel-8_2, rel-8_3, rel-8_4
Changes since 1.8: +1 -0 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # the generated tables mostly have NOT been checked so far!
4
5 use v5.8.2;
6 use utf8;
7 use Encode;
8 use Encode::JP;
9 use Encode::CN;
10 use Encode::KR;
11 use Encode::HanExtra;
12 use Encode::JIS2K;
13
14 my $gen;
15
16 $TO_UNICODE = 0; # also generate to_unicode tables
17
18 sub linear {
19 my ($a, $l, $h, $b) = @_;
20 for ($l .. $h) {
21 return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
22 }
23 1;
24 }
25
26 sub wrap {
27 my $res = " ";
28 $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
29 $res .= "$1" if $_[0] =~ /\G(.*)$/;
30 $res;
31 }
32
33 my $last_tab_full; # hack
34
35 sub gentab {
36 my ($enc, $l, $h, $f, $rep) = @_;
37 $last_tab_full = 0;
38
39 " = {\n"
40 . (wrap join ", ",
41 map +(sprintf "0x$f",
42 defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
43 ), $l..$h)
44 . "\n};\n";
45 }
46
47 sub gen {
48 my ($enc, $base, $ch) = @_;
49
50 my ($fun, $tab);
51 my (@t1, @t2);
52
53 for (255, 159, 127, 126, 125) {
54 if (linear $enc, 0, $_, 0) {
55 undef $enc->[$_] for 0..$_;
56 $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
57 }
58 }
59
60 for (126, 127, 128) {
61 if (linear $enc, $_, 159, $_) {
62 undef $enc->[$_] for $_..159;
63 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
64 }
65 }
66
67 for (126, 127) {
68 next unless defined $enc->[$_];
69 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
70 }
71
72 my @map;
73 my @map2;
74
75 for (0 .. $#$enc) {
76 if (defined $enc->[$_]) {
77 $map[$_] = $enc->[$_];
78 $map2[$_ >> 8]++;
79 }
80 }
81
82 for my $p (0..255) {
83 if ($map2[$p]) {
84 my $b = $p << 8;
85
86 my ($l, $h);
87 for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
88 for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
89
90 if ($map2[$p] <= 5) {
91 for ($l .. $h) {
92 next unless defined $enc->[$b + $_];
93 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
94 }
95 } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
96 my $dif = $enc->[$b + $l] - ($b + $l);
97 $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
98 $fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif;
99 } elsif ($map2[$p] <= 5) { # defunct
100 $fun .= "switch (·)\n {\n";
101 for ($l .. $h) {
102 next unless defined $enc->[$b + $_];
103 $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
104 }
105 $fun .= " }\n";
106 } else {
107 my ($i, $t, $f) = (0, "uint8_t", "%02x");
108 for ($l .. $h) {
109 if ($enc->[$b + $_] > 255) {
110 ($i, $t, $f) = (1, "uint16_t", "%04x");
111 last;
112 }
113 }
114
115 $i ? push @t2, [$p, $l, $h, $t, $f]
116 : push @t1, [$b, $l, $h, $t, $f];
117 }
118
119 }
120 }
121
122 if (@t2 <= 3) {
123 push @t1, @t2;
124 @t2 = ();
125 }
126
127 for (@t1) {
128 my ($b, $l, $h, $t, $f) = @$_;
129 my $rep = $b + $l == 0 ? 128 : 0;
130 $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
131 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
132 . " return %s$base\_$b\[· - 0x%04x];\n",
133 $b + $l, $b + $h,
134 ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
135 $b + $l;
136 }
137
138 if (@t2) {
139 my ($min, $max) = (255, 0);
140 my ($l, $h) = (255, 0);
141 for (@t2) {
142 my ($p, $L, $H, $t, $f) = @$_;
143 $max = $p if $p > $max;
144 $min = $p if $p < $min;
145 $l = $L if $L < $l;
146 $h = $H if $H > $h;
147 }
148
149 $fun .= "uint8_t l = ·;\n";
150 $fun .= "uint16_t h = · >> 8;\n";
151
152 if ($h - $l < 200) {
153 my $d = $h - $l + 1;
154
155 my @enc;
156 for $p ($min .. $max) {
157 for $i ($l .. $h) {
158 $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
159 }
160 }
161 $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
162
163 if ($last_tab_full) {
164 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
165 . " && 0x%02x <= l && l <= 0x%02x)\n"
166 . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
167 . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
168 . " : NOCHAR;\n",
169 $min, $max, $l, $h,
170 ($d, $min * $d + $l) x 2;
171 } else {
172 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
173 . " && 0x%02x <= l && l <= 0x%02x)\n"
174 . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
175 $min, $max, $l, $h,
176 $d,
177 $min * $d + $l;
178 }
179 } else {
180 my @tab = (0) x ($max - $min);
181 for (@t2) {
182 my ($p, undef, undef, $t, $f) = @$_;
183 $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
184 $tab[$p - $min] = "$base\_$p";
185 }
186
187 $tab .= "const uint16_t *$base\_i[] = {\n"
188 . (wrap join ", ", @tab)
189 . "\n};\n\n";
190
191 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
192 . " && 0x%02x <= l && l <= 0x%02x\n"
193 . " && $base\_i[h - 0x%02x])\n"
194 . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
195 . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
196 . " : NOCHAR;\n",
197 $min, $max, $l, $h,
198 $min, ($min, $l) x 2;
199 }
200 }
201
202 $fun .= "return NOCHAR;\n";
203
204 $fun =~ s/·/$ch/g;
205
206 ($tab, $fun);
207 }
208
209 while (<DATA>) {
210 my ($group, $base, $cs, $type) = split /\s+/;
211
212 my @enc1;
213 my @enc2;
214
215 for (0 .. 65535) {
216 my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
217
218 if (length $enc) {
219 my $code = hex unpack "H*", $enc;
220 $enc1[$_] = $code;
221 $enc2[$code] = $_ unless defined $enc2[$code];
222 }
223 }
224
225 my ($tab1, $fun1);
226 my ($tab2, $fun2);
227
228 ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
229 ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;
230
231 $fun1 =~ s/^/ /gm;
232 $fun2 =~ s/^/ /gm;
233
234 print "$base\n";
235 open OUT, ">table/$base.h" or die;
236
237 print OUT <<EOF;
238 //
239 // AUTOMATICALLLY GENERATED by gentables
240 //
241 #ifdef ENCODING_$group
242
243 $tab1$tab2
244 static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
245 $fun1}
246 EOF
247
248 if ($TO_UNICODE) {
249 print OUT <<EOF
250 #if ENCODING_TO_UNICODE
251 static unicode_t cs_$base\_to_unicode (uint32_t enc) {
252 $fun2}
253 #endif
254 EOF
255 }
256
257 print OUT <<EOF;
258
259 #else
260
261 #define cs_$base\_from_unicode cs_unknown_from_unicode
262 #define cs_$base\_to_unicode cs_unknown_to_unicode
263
264 #endif
265 EOF
266
267 close OUT;
268 }
269
270 __DATA__
271 VN viscii viscii
272 KR ksc5601_1987_0 ksc5601-raw
273 ZH gb2312_1980_0 gb2312-raw
274 ZH gbk_0 gbk
275 ZH_EXT cns11643_1992_1 cns11643-1
276 ZH_EXT cns11643_1992_2 cns11643-2
277 ZH_EXT cns11643_1992_3 cns11643-3
278 ZH_EXT cns11643_1992_4 cns11643-4
279 ZH_EXT cns11643_1992_5 cns11643-5
280 ZH_EXT cns11643_1992_6 cns11643-6
281 ZH_EXT cns11643_1992_7 cns11643-7
282 ZH_EXT cns11643_1992_f cns11643-f
283 ZH big5 big5
284 ZH_EXT big5_ext big5ext
285 ZH_EXT big5_plus big5plus
286 EU koi8_r koi8-r
287 EU koi8_u koi8-u
288 DEFAULT iso8859_1 iso-8859-1
289 EU iso8859_2 iso-8859-2
290 EU iso8859_3 iso-8859-3
291 EU iso8859_4 iso-8859-4
292 EU iso8859_5 iso-8859-5
293 EU iso8859_6 iso-8859-6
294 EU iso8859_7 iso-8859-7
295 EU iso8859_8 iso-8859-8
296 EU iso8859_9 iso-8859-9
297 EU iso8859_10 iso-8859-10
298 EU iso8859_11 iso-8859-11
299 EU iso8859_13 iso-8859-13
300 EU iso8859_14 iso-8859-14
301 DEFAULT iso8859_15 iso-8859-15
302 EU iso8859_16 iso-8859-16
303 JP jis0201_1976_0 jis0201-raw
304 JP jis0208_1990_0 jis0208-raw
305 JP jis0212_1990_0 jis0212-raw
306 JP_EXT jis0213_1 jis0213-1-raw
307 JP_EXT jis0213_2 jis0213-2-raw