ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.7
Committed: Sun Sep 5 09:32:59 2004 UTC (19 years, 8 months ago) by root
Branch: MAIN
CVS Tags: rel-4_0, rel-4_1
Changes since 1.6: +12 -12 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # the generated tables mostly have NOT been checked so far!
4
5 use v5.8.2;
6 use utf8;
7 use Encode;
8
9 my $gen;
10
11 $TO_UNICODE = 0; # also generate to_unicode tables
12
13 sub linear {
14 my ($a, $l, $h, $b) = @_;
15 for ($l .. $h) {
16 return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
17 }
18 1;
19 }
20
21 sub wrap {
22 my $res = " ";
23 $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
24 $res .= "$1" if $_[0] =~ /\G(.*)$/;
25 $res;
26 }
27
28 my $last_tab_full; # hack
29
30 sub gentab {
31 my ($enc, $l, $h, $f, $rep) = @_;
32 $last_tab_full = 0;
33
34 " = {\n"
35 . (wrap join ", ",
36 map +(sprintf "0x$f",
37 defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
38 ), $l..$h)
39 . "\n};\n";
40 }
41
42 sub gen {
43 my ($enc, $base, $ch) = @_;
44
45 my ($fun, $tab);
46 my (@t1, @t2);
47
48 for (255, 159, 127, 126, 125) {
49 if (linear $enc, 0, $_, 0) {
50 undef $enc->[$_] for 0..$_;
51 $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
52 }
53 }
54
55 for (126, 127, 128) {
56 if (linear $enc, $_, 159, $_) {
57 undef $enc->[$_] for $_..159;
58 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
59 }
60 }
61
62 for (126, 127) {
63 next unless defined $enc->[$_];
64 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
65 }
66
67 my @map;
68 my @map2;
69
70 for (0 .. $#$enc) {
71 if (defined $enc->[$_]) {
72 $map[$_] = $enc->[$_];
73 $map2[$_ >> 8]++;
74 }
75 }
76
77 for my $p (0..255) {
78 if ($map2[$p]) {
79 my $b = $p << 8;
80
81 my ($l, $h);
82 for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
83 for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
84
85 if ($map2[$p] <= 5) {
86 for ($l .. $h) {
87 next unless defined $enc->[$b + $_];
88 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
89 }
90 } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
91 my $dif = $enc->[$b + $l] - ($b + $l);
92 $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
93 $fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif;
94 } elsif ($map2[$p] <= 5) { # defunct
95 $fun .= "switch (·)\n {\n";
96 for ($l .. $h) {
97 next unless defined $enc->[$b + $_];
98 $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
99 }
100 $fun .= " }\n";
101 } else {
102 my ($i, $t, $f) = (0, "uint8_t", "%02x");
103 for ($l .. $h) {
104 if ($enc->[$b + $_] > 255) {
105 ($i, $t, $f) = (1, "uint16_t", "%04x");
106 last;
107 }
108 }
109
110 $i ? push @t2, [$p, $l, $h, $t, $f]
111 : push @t1, [$b, $l, $h, $t, $f];
112 }
113
114 }
115 }
116
117 if (@t2 <= 3) {
118 push @t1, @t2;
119 @t2 = ();
120 }
121
122 for (@t1) {
123 my ($b, $l, $h, $t, $f) = @$_;
124 my $rep = $b + $l == 0 ? 128 : 0;
125 $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
126 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
127 . " return %s$base\_$b\[· - 0x%04x];\n",
128 $b + $l, $b + $h,
129 ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
130 $b + $l;
131 }
132
133 if (@t2) {
134 my ($min, $max) = (255, 0);
135 my ($l, $h) = (255, 0);
136 for (@t2) {
137 my ($p, $L, $H, $t, $f) = @$_;
138 $max = $p if $p > $max;
139 $min = $p if $p < $min;
140 $l = $L if $L < $l;
141 $h = $H if $H > $h;
142 }
143
144 $fun .= "uint8_t l = ·;\n";
145 $fun .= "uint16_t h = · >> 8;\n";
146
147 if ($h - $l < 200) {
148 my $d = $h - $l + 1;
149
150 my @enc;
151 for $p ($min .. $max) {
152 for $i ($l .. $h) {
153 $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
154 }
155 }
156 $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
157
158 if ($last_tab_full) {
159 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
160 . " && 0x%02x <= l && l <= 0x%02x)\n"
161 . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
162 . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
163 . " : NOCHAR;\n",
164 $min, $max, $l, $h,
165 ($d, $min * $d + $l) x 2;
166 } else {
167 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
168 . " && 0x%02x <= l && l <= 0x%02x)\n"
169 . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
170 $min, $max, $l, $h,
171 $d,
172 $min * $d + $l;
173 }
174 } else {
175 my @tab = (0) x ($max - $min);
176 for (@t2) {
177 my ($p, undef, undef, $t, $f) = @$_;
178 $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
179 $tab[$p - $min] = "$base\_$p";
180 }
181
182 $tab .= "const uint16_t *$base\_i[] = {\n"
183 . (wrap join ", ", @tab)
184 . "\n};\n\n";
185
186 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
187 . " && 0x%02x <= l && l <= 0x%02x\n"
188 . " && $base\_i[h - 0x%02x])\n"
189 . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
190 . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
191 . " : NOCHAR;\n",
192 $min, $max, $l, $h,
193 $min, ($min, $l) x 2;
194 }
195 }
196
197 $fun .= "return NOCHAR;\n";
198
199 $fun =~ s/·/$ch/g;
200
201 ($tab, $fun);
202 }
203
204 while (<DATA>) {
205 my ($group, $base, $cs, $type) = split /\s+/;
206
207 my @enc1;
208 my @enc2;
209
210 for (0 .. 65535) {
211 my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
212
213 if (length $enc) {
214 my $code = hex unpack "H*", $enc;
215 $enc1[$_] = $code;
216 $enc2[$code] = $_ unless defined $enc2[$code];
217 }
218 }
219
220 my ($tab1, $fun1);
221 my ($tab2, $fun2);
222
223 ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
224 ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;
225
226 $fun1 =~ s/^/ /gm;
227 $fun2 =~ s/^/ /gm;
228
229 print "$base\n";
230 open OUT, ">table/$base.h" or die;
231
232 print OUT <<EOF;
233 //
234 // AUTOMATICALLLY GENERATED by gentables
235 //
236 #ifdef ENCODING_$group
237
238 $tab1$tab2
239 static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
240 $fun1}
241 EOF
242
243 if ($TO_UNICODE) {
244 print OUT <<EOF
245 #if ENCODING_TO_UNICODE
246 static unicode_t cs_$base\_to_unicode (uint32_t enc) {
247 $fun2}
248 #endif
249 EOF
250 }
251
252 print OUT <<EOF;
253
254 #else
255
256 #define cs_$base\_from_unicode cs_unknown_from_unicode
257 #define cs_$base\_to_unicode cs_unknown_to_unicode
258
259 #endif
260 EOF
261
262 close OUT;
263 }
264
265 __DATA__
266 VN viscii viscii
267 KR ksc5601_1987_0 ksc5601-raw
268 ZH gb2312_1980_0 gb2312-raw
269 ZH_EXT cns11643_1992_1 cns11643-1
270 ZH_EXT cns11643_1992_2 cns11643-2
271 ZH_EXT cns11643_1992_3 cns11643-3
272 ZH_EXT cns11643_1992_4 cns11643-4
273 ZH_EXT cns11643_1992_5 cns11643-5
274 ZH_EXT cns11643_1992_6 cns11643-6
275 ZH_EXT cns11643_1992_7 cns11643-7
276 ZH_EXT cns11643_1992_f cns11643-f
277 ZH big5 big5
278 ZH_EXT big5_ext big5ext
279 ZH_EXT big5_plus big5plus
280 EU koi8_r koi8-r
281 EU koi8_u koi8-u
282 DEFAULT iso8859_1 iso-8859-1
283 EU iso8859_2 iso-8859-2
284 EU iso8859_3 iso-8859-3
285 EU iso8859_4 iso-8859-4
286 EU iso8859_5 iso-8859-5
287 EU iso8859_6 iso-8859-6
288 EU iso8859_7 iso-8859-7
289 EU iso8859_8 iso-8859-8
290 EU iso8859_9 iso-8859-9
291 EU iso8859_10 iso-8859-10
292 EU iso8859_11 iso-8859-11
293 EU iso8859_13 iso-8859-13
294 EU iso8859_14 iso-8859-14
295 DEFAULT iso8859_15 iso-8859-15
296 EU iso8859_16 iso-8859-16
297 JP jis0201_1976_0 jis0201-raw
298 JP jis0208_1990_0 jis0208-raw
299 JP jis0212_1990_0 jis0212-raw
300 JP_EXT jis0213_1 jis0213-1-raw
301 JP_EXT jis0213_2 jis0213-2-raw