ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.12
Committed: Wed Jun 16 23:07:11 2021 UTC (2 years, 10 months ago) by root
Branch: MAIN
CVS Tags: rxvt-unicode-rel-9_29, rxvt-unicode-rel-9_30, HEAD
Changes since 1.11: +2 -0 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # the generated tables mostly have NOT been checked so far!
4
5 # apt install libencode-hanextra-perl libencode-jis2k-perl
6
7 use v5.8.2;
8 use utf8;
9 use Encode;
10 use Encode::JP;
11 use Encode::CN;
12 use Encode::KR;
13 use Encode::HanExtra;
14 use Encode::JIS2K;
15
16 my $gen;
17
18 $TO_UNICODE = 0; # also generate to_unicode tables
19
20 sub linear {
21 my ($a, $l, $h, $b) = @_;
22 for ($l .. $h) {
23 return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
24 }
25 1;
26 }
27
28 sub wrap {
29 my $res = " ";
30 $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
31 $res .= "$1" if $_[0] =~ /\G(.*)$/;
32 $res;
33 }
34
35 my $last_tab_full; # hack
36
37 sub gentab {
38 my ($enc, $l, $h, $f, $rep) = @_;
39 $last_tab_full = 0;
40
41 " = {\n"
42 . (wrap join ", ",
43 map +(sprintf "0x$f",
44 defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
45 ), $l..$h)
46 . "\n};\n";
47 }
48
49 sub gen {
50 my ($enc, $base, $ch) = @_;
51
52 my ($fun, $tab);
53 my (@t1, @t2);
54
55 for (255, 159, 127, 126, 125) {
56 if (linear $enc, 0, $_, 0) {
57 undef $enc->[$_] for 0..$_;
58 $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
59 }
60 }
61
62 for (126, 127, 128) {
63 if (linear $enc, $_, 159, $_) {
64 undef $enc->[$_] for $_..159;
65 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
66 }
67 }
68
69 for (126, 127) {
70 next unless defined $enc->[$_];
71 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
72 }
73
74 my @map;
75 my @map2;
76
77 for (0 .. $#$enc) {
78 if (defined $enc->[$_]) {
79 $map[$_] = $enc->[$_];
80 $map2[$_ >> 8]++;
81 }
82 }
83
84 for my $p (0..255) {
85 if ($map2[$p]) {
86 my $b = $p << 8;
87
88 my ($l, $h);
89 for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
90 for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
91
92 if ($map2[$p] <= 5) {
93 for ($l .. $h) {
94 next unless defined $enc->[$b + $_];
95 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
96 }
97 } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
98 my $dif = $enc->[$b + $l] - ($b + $l);
99 $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
100 $fun .= sprintf "if (· >= 0x%04x && 0x%04x >= ·) return · %s;\n", $b + $l, $b + $h, $dif;
101 } elsif ($map2[$p] <= 5) { # defunct
102 $fun .= "switch (·)\n {\n";
103 for ($l .. $h) {
104 next unless defined $enc->[$b + $_];
105 $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
106 }
107 $fun .= " }\n";
108 } else {
109 my ($i, $t, $f) = (0, "uint8_t", "%02x");
110 for ($l .. $h) {
111 if ($enc->[$b + $_] > 255) {
112 ($i, $t, $f) = (1, "uint16_t", "%04x");
113 last;
114 }
115 }
116
117 $i ? push @t2, [$p, $l, $h, $t, $f]
118 : push @t1, [$b, $l, $h, $t, $f];
119 }
120
121 }
122 }
123
124 if (@t2 <= 3) {
125 push @t1, @t2;
126 @t2 = ();
127 }
128
129 for (@t1) {
130 my ($b, $l, $h, $t, $f) = @$_;
131 my $rep = $b + $l == 0 ? 128 : 0;
132 $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
133 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
134 . " return %s$base\_$b\[· - 0x%04x];\n",
135 $b + $l, $b + $h,
136 ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
137 $b + $l;
138 }
139
140 if (@t2) {
141 my ($min, $max) = (255, 0);
142 my ($l, $h) = (255, 0);
143 for (@t2) {
144 my ($p, $L, $H, $t, $f) = @$_;
145 $max = $p if $p > $max;
146 $min = $p if $p < $min;
147 $l = $L if $L < $l;
148 $h = $H if $H > $h;
149 }
150
151 $fun .= "uint8_t l = ·;\n";
152 $fun .= "uint16_t h = · >> 8;\n";
153
154 if ($h - $l < 200) {
155 my $d = $h - $l + 1;
156
157 my @enc;
158 for $p ($min .. $max) {
159 for $i ($l .. $h) {
160 $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
161 }
162 }
163 $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
164
165 if ($last_tab_full) {
166 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
167 . " && 0x%02x <= l && l <= 0x%02x)\n"
168 . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
169 . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
170 . " : NOCHAR;\n",
171 $min, $max, $l, $h,
172 ($d, $min * $d + $l) x 2;
173 } else {
174 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
175 . " && 0x%02x <= l && l <= 0x%02x)\n"
176 . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
177 $min, $max, $l, $h,
178 $d,
179 $min * $d + $l;
180 }
181 } else {
182 my @tab = (0) x ($max - $min);
183 for (@t2) {
184 my ($p, undef, undef, $t, $f) = @$_;
185 $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
186 $tab[$p - $min] = "$base\_$p";
187 }
188
189 $tab .= "const uint16_t *$base\_i[] = {\n"
190 . (wrap join ", ", @tab)
191 . "\n};\n\n";
192
193 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
194 . " && 0x%02x <= l && l <= 0x%02x\n"
195 . " && $base\_i[h - 0x%02x])\n"
196 . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
197 . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
198 . " : NOCHAR;\n",
199 $min, $max, $l, $h,
200 $min, ($min, $l) x 2;
201 }
202 }
203
204 $fun .= "return NOCHAR;\n";
205
206 $fun =~ s/·/$ch/g;
207
208 ($tab, $fun);
209 }
210
211 while (<DATA>) {
212 my ($group, $base, $cs, $type) = split /\s+/;
213
214 my @enc1;
215 my @enc2;
216
217 for (0 .. 65535) {
218 my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
219
220 if (length $enc) {
221 my $code = hex unpack "H*", $enc;
222 $enc1[$_] = $code;
223 $enc2[$code] = $_ unless defined $enc2[$code];
224 }
225 }
226
227 my ($tab1, $fun1);
228 my ($tab2, $fun2);
229
230 ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
231 ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;
232
233 $fun1 =~ s/^/ /gm;
234 $fun2 =~ s/^/ /gm;
235
236 print "$base\n";
237 open OUT, ">table/$base.h" or die;
238
239 print OUT <<EOF;
240 //
241 // AUTOMATICALLLY GENERATED by gentables
242 //
243 #ifdef ENCODING_$group
244
245 $tab1$tab2
246 static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
247 $fun1}
248 EOF
249
250 if ($TO_UNICODE) {
251 print OUT <<EOF
252 #if ENCODING_TO_UNICODE
253 static unicode_t cs_$base\_to_unicode (uint32_t enc) {
254 $fun2}
255 #endif
256 EOF
257 }
258
259 print OUT <<EOF;
260
261 #else
262
263 #define cs_$base\_from_unicode cs_unknown_from_unicode
264 #define cs_$base\_to_unicode cs_unknown_to_unicode
265
266 #endif
267 EOF
268
269 close OUT;
270 }
271
272 __DATA__
273 VN viscii viscii
274 KR ksc5601_1987_0 ksc5601-raw
275 ZH gb2312_1980_0 gb2312-raw
276 ZH gbk_0 gbk
277 ZH_EXT cns11643_1992_1 cns11643-1
278 ZH_EXT cns11643_1992_2 cns11643-2
279 ZH_EXT cns11643_1992_3 cns11643-3
280 ZH_EXT cns11643_1992_4 cns11643-4
281 ZH_EXT cns11643_1992_5 cns11643-5
282 ZH_EXT cns11643_1992_6 cns11643-6
283 ZH_EXT cns11643_1992_7 cns11643-7
284 ZH_EXT cns11643_1992_f cns11643-f
285 ZH big5 big5
286 ZH_EXT big5_ext big5ext
287 ZH_EXT big5_plus big5plus
288 EU koi8_r koi8-r
289 EU koi8_u koi8-u
290 DEFAULT iso8859_1 iso-8859-1
291 EU iso8859_2 iso-8859-2
292 EU iso8859_3 iso-8859-3
293 EU iso8859_4 iso-8859-4
294 EU iso8859_5 iso-8859-5
295 EU iso8859_6 iso-8859-6
296 EU iso8859_7 iso-8859-7
297 EU iso8859_8 iso-8859-8
298 EU iso8859_9 iso-8859-9
299 EU iso8859_10 iso-8859-10
300 EU iso8859_11 iso-8859-11
301 EU iso8859_13 iso-8859-13
302 EU iso8859_14 iso-8859-14
303 DEFAULT iso8859_15 iso-8859-15
304 EU iso8859_16 iso-8859-16
305 JP jis0201_1976_0 jis0201-raw
306 JP jis0208_1990_0 jis0208-raw
307 JP jis0212_1990_0 jis0212-raw
308 JP_EXT jis0213_1 jis0213-1-raw
309 JP_EXT jis0213_2 jis0213-2-raw