1 |
#!/opt/bin/perl |
2 |
|
3 |
# the generated tables mostly have NOT been chcked so far! |
4 |
|
5 |
use v5.8.2; |
6 |
use utf8; |
7 |
use Encode; |
8 |
|
9 |
my $gen; |
10 |
|
11 |
sub linear { |
12 |
my ($a, $l, $h, $b) = @_; |
13 |
for ($l .. $h) { |
14 |
return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l); |
15 |
} |
16 |
1; |
17 |
} |
18 |
|
19 |
sub wrap { |
20 |
my $res = " "; |
21 |
$res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc; |
22 |
$res .= "$1" if $_[0] =~ /\G(.*)$/; |
23 |
$res; |
24 |
} |
25 |
|
26 |
my $last_tab_full; # hack |
27 |
|
28 |
sub gentab { |
29 |
my ($enc, $l, $h, $f, $rep) = @_; |
30 |
$last_tab_full = 0; |
31 |
|
32 |
" = {\n" |
33 |
. (wrap join ", ", |
34 |
map +(sprintf "0x$f", |
35 |
defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep |
36 |
), $l..$h) |
37 |
. "\n};\n"; |
38 |
} |
39 |
|
40 |
sub gen { |
41 |
my ($enc, $base, $ch) = @_; |
42 |
|
43 |
my ($fun, $tab); |
44 |
my (@t1, @t2); |
45 |
|
46 |
for (255, 159, 127, 126, 125) { |
47 |
if (linear $enc, 0, $_, 0) { |
48 |
undef $enc->[$_] for 0..$_; |
49 |
$fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_; |
50 |
} |
51 |
} |
52 |
|
53 |
for (126, 127, 128) { |
54 |
if (linear $enc, $_, 159, $_) { |
55 |
undef $enc->[$_] for $_..159; |
56 |
$fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159; |
57 |
} |
58 |
} |
59 |
|
60 |
for (126, 127) { |
61 |
next unless defined $enc->[$_]; |
62 |
$fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_]; |
63 |
} |
64 |
|
65 |
my @map; |
66 |
my @map2; |
67 |
|
68 |
for (0 .. $#$enc) { |
69 |
if (defined $enc->[$_]) { |
70 |
$map[$_] = $enc->[$_]; |
71 |
$map2[$_ >> 8]++; |
72 |
} |
73 |
} |
74 |
|
75 |
for my $p (0..255) { |
76 |
if ($map2[$p]) { |
77 |
my $b = $p << 8; |
78 |
|
79 |
my ($l, $h); |
80 |
for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; } |
81 |
for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; } |
82 |
|
83 |
if ($map2[$p] <= 5) { |
84 |
for ($l .. $h) { |
85 |
next unless defined $enc->[$b + $_]; |
86 |
$fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_]; |
87 |
} |
88 |
} elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) { |
89 |
my $dif = $enc->[$b + $l] - ($b + $l); |
90 |
$dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif; |
91 |
$fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif; |
92 |
} elsif ($map2[$p] <= 5) { # defunct |
93 |
$fun .= "switch (·)\n {\n"; |
94 |
for ($l .. $h) { |
95 |
next unless defined $enc->[$b + $_]; |
96 |
$fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_]; |
97 |
} |
98 |
$fun .= " }\n"; |
99 |
} else { |
100 |
my ($i, $t, $f) = (0, "uint8_t", "%02x"); |
101 |
for ($l .. $h) { |
102 |
if ($enc->[$b + $_] > 255) { |
103 |
($i, $t, $f) = (1, "uint16_t", "%04x"); |
104 |
last; |
105 |
} |
106 |
} |
107 |
|
108 |
$i ? push @t2, [$p, $l, $h, $t, $f] |
109 |
: push @t1, [$b, $l, $h, $t, $f]; |
110 |
} |
111 |
|
112 |
} |
113 |
} |
114 |
|
115 |
if (@t2 <= 3) { |
116 |
push @t1, @t2; |
117 |
@t2 = (); |
118 |
} |
119 |
|
120 |
for (@t1) { |
121 |
my ($b, $l, $h, $t, $f) = @$_; |
122 |
my $rep = $b + $l == 0 ? 128 : 0; |
123 |
$tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f; |
124 |
$fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n" |
125 |
. " return %s$base\_$b\[· - 0x%04x];\n", |
126 |
$b + $l, $b + $h, |
127 |
($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""), |
128 |
$b + $l; |
129 |
} |
130 |
|
131 |
if (@t2) { |
132 |
my ($min, $max) = (255, 0); |
133 |
my ($l, $h) = (255, 0); |
134 |
for (@t2) { |
135 |
my ($p, $L, $H, $t, $f) = @$_; |
136 |
$max = $p if $p > $max; |
137 |
$min = $p if $p < $min; |
138 |
$l = $L if $L < $l; |
139 |
$h = $H if $H > $h; |
140 |
} |
141 |
|
142 |
$fun .= "uint8_t l = ·;\n"; |
143 |
$fun .= "uint16_t h = · >> 8;\n"; |
144 |
|
145 |
if ($h - $l < 200) { |
146 |
my $d = $h - $l + 1; |
147 |
|
148 |
my @enc; |
149 |
for $p ($min .. $max) { |
150 |
for $i ($l .. $h) { |
151 |
$enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i]; |
152 |
} |
153 |
} |
154 |
$tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x"; |
155 |
|
156 |
if ($last_tab_full) { |
157 |
$fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" |
158 |
. " && 0x%02x <= l && l <= 0x%02x)\n" |
159 |
. " return $base\_m\[h * 0x%02x + l - 0x%04x]\n" |
160 |
. " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n" |
161 |
. " : NOCHAR;\n", |
162 |
$min, $max, $l, $h, |
163 |
($d, $min * $d + $l) x 2; |
164 |
} else { |
165 |
$fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" |
166 |
. " && 0x%02x <= l && l <= 0x%02x)\n" |
167 |
. " return $base\_m\[h * 0x%02x + l - 0x%04x];\n", |
168 |
$min, $max, $l, $h, |
169 |
$d, |
170 |
$min * $d + $l; |
171 |
} |
172 |
} else { |
173 |
my @tab = (0) x ($max - $min); |
174 |
for (@t2) { |
175 |
my ($p, undef, undef, $t, $f) = @$_; |
176 |
$tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0; |
177 |
$tab[$p - $min] = "$base\_$p"; |
178 |
} |
179 |
|
180 |
$tab .= "const uint16_t *$base\_i[] = {\n" |
181 |
. (wrap join ", ", @tab) |
182 |
. "\n};\n\n"; |
183 |
|
184 |
$fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" |
185 |
. " && 0x%02x <= l && l <= 0x%02x\n" |
186 |
. " && $base\_i[h - 0x%02x])\n" |
187 |
. " return $base\_i\[h - 0x%02x][l - 0x%02x]\n" |
188 |
. " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n" |
189 |
. " : NOCHAR;\n", |
190 |
$min, $max, $l, $h, |
191 |
$min, ($min, $l) x 2; |
192 |
} |
193 |
} |
194 |
|
195 |
$fun .= "return NOCHAR;\n"; |
196 |
|
197 |
$fun =~ s/·/$ch/g; |
198 |
|
199 |
($tab, $fun); |
200 |
} |
201 |
|
202 |
while (<DATA>) { |
203 |
my ($group, $base, $cs, $type) = split /\s+/; |
204 |
|
205 |
my @enc1; |
206 |
my @enc2; |
207 |
|
208 |
for (0 .. 65535) { |
209 |
my $enc = encode $cs, (chr $_), Encode::FB_QUIET; |
210 |
|
211 |
if (length $enc) { |
212 |
my $code = hex unpack "H*", $enc; |
213 |
$enc1[$_] = $code; |
214 |
$enc2[$code] = $_ unless defined $enc2[$code]; |
215 |
} |
216 |
} |
217 |
|
218 |
my ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode"; |
219 |
my ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc"; |
220 |
|
221 |
#$tab1 =~ s/^/ /gm; |
222 |
#$tab2 =~ s/^/ /gm; |
223 |
$fun1 =~ s/^/ /gm; |
224 |
$fun2 =~ s/^/ /gm; |
225 |
|
226 |
print "$base\n"; |
227 |
open OUT, ">table/$base.h" or die; |
228 |
|
229 |
print OUT <<EOF; |
230 |
// |
231 |
// AUTOMATICALLLY GENERATED by gentables |
232 |
// |
233 |
#ifdef ENCODING_$group |
234 |
|
235 |
$tab1$tab2 |
236 |
struct rxvt_codeset_conv_$base : rxvt_codeset_conv { |
237 |
uint32_t from_unicode (uint32_t unicode) const { |
238 |
$fun1 } |
239 |
uint32_t to_unicode (uint32_t enc) const { |
240 |
$fun2 } |
241 |
} rxvt_codeset_conv_$base; |
242 |
|
243 |
#else |
244 |
|
245 |
#define rxvt_codeset_conv_$base rxvt_codeset_conv_unknown |
246 |
|
247 |
#endif |
248 |
EOF |
249 |
|
250 |
close OUT; |
251 |
} |
252 |
|
253 |
__DATA__ |
254 |
VN viscii viscii |
255 |
KR ksc5601_1987_0 ksc5601-raw |
256 |
CN gb2312_1980_0 gb2312-raw |
257 |
CN_EXT cns11643_1992_1 cns11643-1 |
258 |
CN_EXT cns11643_1992_2 cns11643-2 |
259 |
CN_EXT cns11643_1992_3 cns11643-3 |
260 |
CN_EXT cns11643_1992_4 cns11643-4 |
261 |
CN_EXT cns11643_1992_5 cns11643-5 |
262 |
CN_EXT cns11643_1992_6 cns11643-6 |
263 |
CN_EXT cns11643_1992_7 cns11643-7 |
264 |
CN_EXT cns11643_1992_f cns11643-f |
265 |
CN_EXT big5_ext big5ext |
266 |
CN_EXT big5_plus big5plus |
267 |
EU koi8_r koi8-r |
268 |
EU koi8_u koi8-u |
269 |
DEFAULT iso8859_1 iso-8859-1 |
270 |
EU iso8859_2 iso-8859-2 |
271 |
EU iso8859_3 iso-8859-3 |
272 |
EU iso8859_4 iso-8859-4 |
273 |
EU iso8859_5 iso-8859-5 |
274 |
EU iso8859_6 iso-8859-6 |
275 |
EU iso8859_7 iso-8859-7 |
276 |
EU iso8859_8 iso-8859-8 |
277 |
EU iso8859_9 iso-8859-9 |
278 |
EU iso8859_10 iso-8859-10 |
279 |
EU iso8859_11 iso-8859-11 |
280 |
EU iso8859_13 iso-8859-13 |
281 |
EU iso8859_14 iso-8859-14 |
282 |
DEFAULT iso8859_15 iso-8859-15 |
283 |
EU iso8859_16 iso-8859-16 |
284 |
JP jis0201_1976_0 jis0201-raw |
285 |
JP jis0208_1983_0 jis0208-raw |
286 |
JP jis0212_1990_0 jis0212-raw |
287 |
JP_EXT jis0213_1 jis0213-1-raw |
288 |
JP_EXT jis0213_2 jis0213-2-raw |