ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.2
Committed: Wed Mar 3 00:20:33 2004 UTC (20 years, 2 months ago) by pcg
Branch: MAIN
CVS Tags: rel-2_1_0, rel-2_2, rel-2_3
Changes since 1.1: +1 -0 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # the generated tables mostly have NOT been chcked so far!
4
5 use v5.8.2;
6 use utf8;
7 use Encode;
8
9 my $gen;
10
11 sub linear {
12 my ($a, $l, $h, $b) = @_;
13 for ($l .. $h) {
14 return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
15 }
16 1;
17 }
18
19 sub wrap {
20 my $res = " ";
21 $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
22 $res .= "$1" if $_[0] =~ /\G(.*)$/;
23 $res;
24 }
25
26 my $last_tab_full; # hack
27
28 sub gentab {
29 my ($enc, $l, $h, $f, $rep) = @_;
30 $last_tab_full = 0;
31
32 " = {\n"
33 . (wrap join ", ",
34 map +(sprintf "0x$f",
35 defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
36 ), $l..$h)
37 . "\n};\n";
38 }
39
40 sub gen {
41 my ($enc, $base, $ch) = @_;
42
43 my ($fun, $tab);
44 my (@t1, @t2);
45
46 for (255, 159, 127, 126, 125) {
47 if (linear $enc, 0, $_, 0) {
48 undef $enc->[$_] for 0..$_;
49 $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
50 }
51 }
52
53 for (126, 127, 128) {
54 if (linear $enc, $_, 159, $_) {
55 undef $enc->[$_] for $_..159;
56 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
57 }
58 }
59
60 for (126, 127) {
61 next unless defined $enc->[$_];
62 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
63 }
64
65 my @map;
66 my @map2;
67
68 for (0 .. $#$enc) {
69 if (defined $enc->[$_]) {
70 $map[$_] = $enc->[$_];
71 $map2[$_ >> 8]++;
72 }
73 }
74
75 for my $p (0..255) {
76 if ($map2[$p]) {
77 my $b = $p << 8;
78
79 my ($l, $h);
80 for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
81 for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
82
83 if ($map2[$p] <= 5) {
84 for ($l .. $h) {
85 next unless defined $enc->[$b + $_];
86 $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
87 }
88 } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
89 my $dif = $enc->[$b + $l] - ($b + $l);
90 $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
91 $fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif;
92 } elsif ($map2[$p] <= 5) { # defunct
93 $fun .= "switch (·)\n {\n";
94 for ($l .. $h) {
95 next unless defined $enc->[$b + $_];
96 $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
97 }
98 $fun .= " }\n";
99 } else {
100 my ($i, $t, $f) = (0, "uint8_t", "%02x");
101 for ($l .. $h) {
102 if ($enc->[$b + $_] > 255) {
103 ($i, $t, $f) = (1, "uint16_t", "%04x");
104 last;
105 }
106 }
107
108 $i ? push @t2, [$p, $l, $h, $t, $f]
109 : push @t1, [$b, $l, $h, $t, $f];
110 }
111
112 }
113 }
114
115 if (@t2 <= 3) {
116 push @t1, @t2;
117 @t2 = ();
118 }
119
120 for (@t1) {
121 my ($b, $l, $h, $t, $f) = @$_;
122 my $rep = $b + $l == 0 ? 128 : 0;
123 $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
124 $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
125 . " return %s$base\_$b\[· - 0x%04x];\n",
126 $b + $l, $b + $h,
127 ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
128 $b + $l;
129 }
130
131 if (@t2) {
132 my ($min, $max) = (255, 0);
133 my ($l, $h) = (255, 0);
134 for (@t2) {
135 my ($p, $L, $H, $t, $f) = @$_;
136 $max = $p if $p > $max;
137 $min = $p if $p < $min;
138 $l = $L if $L < $l;
139 $h = $H if $H > $h;
140 }
141
142 $fun .= "uint8_t l = ·;\n";
143 $fun .= "uint16_t h = · >> 8;\n";
144
145 if ($h - $l < 200) {
146 my $d = $h - $l + 1;
147
148 my @enc;
149 for $p ($min .. $max) {
150 for $i ($l .. $h) {
151 $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
152 }
153 }
154 $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
155
156 if ($last_tab_full) {
157 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
158 . " && 0x%02x <= l && l <= 0x%02x)\n"
159 . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
160 . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
161 . " : NOCHAR;\n",
162 $min, $max, $l, $h,
163 ($d, $min * $d + $l) x 2;
164 } else {
165 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
166 . " && 0x%02x <= l && l <= 0x%02x)\n"
167 . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
168 $min, $max, $l, $h,
169 $d,
170 $min * $d + $l;
171 }
172 } else {
173 my @tab = (0) x ($max - $min);
174 for (@t2) {
175 my ($p, undef, undef, $t, $f) = @$_;
176 $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
177 $tab[$p - $min] = "$base\_$p";
178 }
179
180 $tab .= "const uint16_t *$base\_i[] = {\n"
181 . (wrap join ", ", @tab)
182 . "\n};\n\n";
183
184 $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
185 . " && 0x%02x <= l && l <= 0x%02x\n"
186 . " && $base\_i[h - 0x%02x])\n"
187 . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
188 . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
189 . " : NOCHAR;\n",
190 $min, $max, $l, $h,
191 $min, ($min, $l) x 2;
192 }
193 }
194
195 $fun .= "return NOCHAR;\n";
196
197 $fun =~ s/·/$ch/g;
198
199 ($tab, $fun);
200 }
201
202 while (<DATA>) {
203 my ($group, $base, $cs, $type) = split /\s+/;
204
205 my @enc1;
206 my @enc2;
207
208 for (0 .. 65535) {
209 my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
210
211 if (length $enc) {
212 my $code = hex unpack "H*", $enc;
213 $enc1[$_] = $code;
214 $enc2[$code] = $_ unless defined $enc2[$code];
215 }
216 }
217
218 my ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
219 my ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc";
220
221 #$tab1 =~ s/^/ /gm;
222 #$tab2 =~ s/^/ /gm;
223 $fun1 =~ s/^/ /gm;
224 $fun2 =~ s/^/ /gm;
225
226 print "$base\n";
227 open OUT, ">table/$base.h" or die;
228
229 print OUT <<EOF;
230 //
231 // AUTOMATICALLLY GENERATED by gentables
232 //
233 #ifdef ENCODING_$group
234
235 $tab1$tab2
236 struct rxvt_codeset_conv_$base : rxvt_codeset_conv {
237 uint32_t from_unicode (uint32_t unicode) const {
238 $fun1 }
239 uint32_t to_unicode (uint32_t enc) const {
240 $fun2 }
241 } rxvt_codeset_conv_$base;
242
243 #else
244
245 #define rxvt_codeset_conv_$base rxvt_codeset_conv_unknown
246
247 #endif
248 EOF
249
250 close OUT;
251 }
252
253 __DATA__
254 VN viscii viscii
255 KR ksc5601_1987_0 ksc5601-raw
256 CN gb2312_1980_0 gb2312-raw
257 CN_EXT cns11643_1992_1 cns11643-1
258 CN_EXT cns11643_1992_2 cns11643-2
259 CN_EXT cns11643_1992_3 cns11643-3
260 CN_EXT cns11643_1992_4 cns11643-4
261 CN_EXT cns11643_1992_5 cns11643-5
262 CN_EXT cns11643_1992_6 cns11643-6
263 CN_EXT cns11643_1992_7 cns11643-7
264 CN_EXT cns11643_1992_f cns11643-f
265 CN big5 big5
266 CN_EXT big5_ext big5ext
267 CN_EXT big5_plus big5plus
268 EU koi8_r koi8-r
269 EU koi8_u koi8-u
270 DEFAULT iso8859_1 iso-8859-1
271 EU iso8859_2 iso-8859-2
272 EU iso8859_3 iso-8859-3
273 EU iso8859_4 iso-8859-4
274 EU iso8859_5 iso-8859-5
275 EU iso8859_6 iso-8859-6
276 EU iso8859_7 iso-8859-7
277 EU iso8859_8 iso-8859-8
278 EU iso8859_9 iso-8859-9
279 EU iso8859_10 iso-8859-10
280 EU iso8859_11 iso-8859-11
281 EU iso8859_13 iso-8859-13
282 EU iso8859_14 iso-8859-14
283 DEFAULT iso8859_15 iso-8859-15
284 EU iso8859_16 iso-8859-16
285 JP jis0201_1976_0 jis0201-raw
286 JP jis0208_1983_0 jis0208-raw
287 JP jis0212_1990_0 jis0212-raw
288 JP_EXT jis0213_1 jis0213-1-raw
289 JP_EXT jis0213_2 jis0213-2-raw