1 |
pcg |
1.1 |
#!/opt/bin/perl |
2 |
|
|
|
3 |
|
|
# the generated tables mostly have NOT been chcked so far! |
4 |
|
|
|
5 |
|
|
use v5.8.2; |
6 |
|
|
use utf8; |
7 |
|
|
use Encode; |
8 |
|
|
|
9 |
|
|
my $gen; |
10 |
|
|
|
11 |
|
|
sub linear { |
12 |
|
|
my ($a, $l, $h, $b) = @_; |
13 |
|
|
for ($l .. $h) { |
14 |
|
|
return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l); |
15 |
|
|
} |
16 |
|
|
1; |
17 |
|
|
} |
18 |
|
|
|
19 |
|
|
sub wrap { |
20 |
|
|
my $res = " "; |
21 |
|
|
$res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc; |
22 |
|
|
$res .= "$1" if $_[0] =~ /\G(.*)$/; |
23 |
|
|
$res; |
24 |
|
|
} |
25 |
|
|
|
26 |
|
|
my $last_tab_full; # hack |
27 |
|
|
|
28 |
|
|
sub gentab { |
29 |
|
|
my ($enc, $l, $h, $f, $rep) = @_; |
30 |
|
|
$last_tab_full = 0; |
31 |
|
|
|
32 |
|
|
" = {\n" |
33 |
|
|
. (wrap join ", ", |
34 |
|
|
map +(sprintf "0x$f", |
35 |
|
|
defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep |
36 |
|
|
), $l..$h) |
37 |
|
|
. "\n};\n"; |
38 |
|
|
} |
39 |
|
|
|
40 |
|
|
sub gen { |
41 |
|
|
my ($enc, $base, $ch) = @_; |
42 |
|
|
|
43 |
|
|
my ($fun, $tab); |
44 |
|
|
my (@t1, @t2); |
45 |
|
|
|
46 |
|
|
for (255, 159, 127, 126, 125) { |
47 |
|
|
if (linear $enc, 0, $_, 0) { |
48 |
|
|
undef $enc->[$_] for 0..$_; |
49 |
|
|
$fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_; |
50 |
|
|
} |
51 |
|
|
} |
52 |
|
|
|
53 |
|
|
for (126, 127, 128) { |
54 |
|
|
if (linear $enc, $_, 159, $_) { |
55 |
|
|
undef $enc->[$_] for $_..159; |
56 |
|
|
$fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159; |
57 |
|
|
} |
58 |
|
|
} |
59 |
|
|
|
60 |
|
|
for (126, 127) { |
61 |
|
|
next unless defined $enc->[$_]; |
62 |
|
|
$fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_]; |
63 |
|
|
} |
64 |
|
|
|
65 |
|
|
my @map; |
66 |
|
|
my @map2; |
67 |
|
|
|
68 |
|
|
for (0 .. $#$enc) { |
69 |
|
|
if (defined $enc->[$_]) { |
70 |
|
|
$map[$_] = $enc->[$_]; |
71 |
|
|
$map2[$_ >> 8]++; |
72 |
|
|
} |
73 |
|
|
} |
74 |
|
|
|
75 |
|
|
for my $p (0..255) { |
76 |
|
|
if ($map2[$p]) { |
77 |
|
|
my $b = $p << 8; |
78 |
|
|
|
79 |
|
|
my ($l, $h); |
80 |
|
|
for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; } |
81 |
|
|
for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; } |
82 |
|
|
|
83 |
|
|
if ($map2[$p] <= 5) { |
84 |
|
|
for ($l .. $h) { |
85 |
|
|
next unless defined $enc->[$b + $_]; |
86 |
|
|
$fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_]; |
87 |
|
|
} |
88 |
|
|
} elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) { |
89 |
|
|
my $dif = $enc->[$b + $l] - ($b + $l); |
90 |
|
|
$dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif; |
91 |
|
|
$fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif; |
92 |
|
|
} elsif ($map2[$p] <= 5) { # defunct |
93 |
|
|
$fun .= "switch (·)\n {\n"; |
94 |
|
|
for ($l .. $h) { |
95 |
|
|
next unless defined $enc->[$b + $_]; |
96 |
|
|
$fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_]; |
97 |
|
|
} |
98 |
|
|
$fun .= " }\n"; |
99 |
|
|
} else { |
100 |
|
|
my ($i, $t, $f) = (0, "uint8_t", "%02x"); |
101 |
|
|
for ($l .. $h) { |
102 |
|
|
if ($enc->[$b + $_] > 255) { |
103 |
|
|
($i, $t, $f) = (1, "uint16_t", "%04x"); |
104 |
|
|
last; |
105 |
|
|
} |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
$i ? push @t2, [$p, $l, $h, $t, $f] |
109 |
|
|
: push @t1, [$b, $l, $h, $t, $f]; |
110 |
|
|
} |
111 |
|
|
|
112 |
|
|
} |
113 |
|
|
} |
114 |
|
|
|
115 |
|
|
if (@t2 <= 3) { |
116 |
|
|
push @t1, @t2; |
117 |
|
|
@t2 = (); |
118 |
|
|
} |
119 |
|
|
|
120 |
|
|
for (@t1) { |
121 |
|
|
my ($b, $l, $h, $t, $f) = @$_; |
122 |
|
|
my $rep = $b + $l == 0 ? 128 : 0; |
123 |
|
|
$tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f; |
124 |
|
|
$fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n" |
125 |
|
|
. " return %s$base\_$b\[· - 0x%04x];\n", |
126 |
|
|
$b + $l, $b + $h, |
127 |
|
|
($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""), |
128 |
|
|
$b + $l; |
129 |
|
|
} |
130 |
|
|
|
131 |
|
|
if (@t2) { |
132 |
|
|
my ($min, $max) = (255, 0); |
133 |
|
|
my ($l, $h) = (255, 0); |
134 |
|
|
for (@t2) { |
135 |
|
|
my ($p, $L, $H, $t, $f) = @$_; |
136 |
|
|
$max = $p if $p > $max; |
137 |
|
|
$min = $p if $p < $min; |
138 |
|
|
$l = $L if $L < $l; |
139 |
|
|
$h = $H if $H > $h; |
140 |
|
|
} |
141 |
|
|
|
142 |
|
|
$fun .= "uint8_t l = ·;\n"; |
143 |
|
|
$fun .= "uint16_t h = · >> 8;\n"; |
144 |
|
|
|
145 |
|
|
if ($h - $l < 200) { |
146 |
|
|
my $d = $h - $l + 1; |
147 |
|
|
|
148 |
|
|
my @enc; |
149 |
|
|
for $p ($min .. $max) { |
150 |
|
|
for $i ($l .. $h) { |
151 |
|
|
$enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i]; |
152 |
|
|
} |
153 |
|
|
} |
154 |
|
|
$tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x"; |
155 |
|
|
|
156 |
|
|
if ($last_tab_full) { |
157 |
|
|
$fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" |
158 |
|
|
. " && 0x%02x <= l && l <= 0x%02x)\n" |
159 |
|
|
. " return $base\_m\[h * 0x%02x + l - 0x%04x]\n" |
160 |
|
|
. " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n" |
161 |
|
|
. " : NOCHAR;\n", |
162 |
|
|
$min, $max, $l, $h, |
163 |
|
|
($d, $min * $d + $l) x 2; |
164 |
|
|
} else { |
165 |
|
|
$fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" |
166 |
|
|
. " && 0x%02x <= l && l <= 0x%02x)\n" |
167 |
|
|
. " return $base\_m\[h * 0x%02x + l - 0x%04x];\n", |
168 |
|
|
$min, $max, $l, $h, |
169 |
|
|
$d, |
170 |
|
|
$min * $d + $l; |
171 |
|
|
} |
172 |
|
|
} else { |
173 |
|
|
my @tab = (0) x ($max - $min); |
174 |
|
|
for (@t2) { |
175 |
|
|
my ($p, undef, undef, $t, $f) = @$_; |
176 |
|
|
$tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0; |
177 |
|
|
$tab[$p - $min] = "$base\_$p"; |
178 |
|
|
} |
179 |
|
|
|
180 |
|
|
$tab .= "const uint16_t *$base\_i[] = {\n" |
181 |
|
|
. (wrap join ", ", @tab) |
182 |
|
|
. "\n};\n\n"; |
183 |
|
|
|
184 |
|
|
$fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n" |
185 |
|
|
. " && 0x%02x <= l && l <= 0x%02x\n" |
186 |
|
|
. " && $base\_i[h - 0x%02x])\n" |
187 |
|
|
. " return $base\_i\[h - 0x%02x][l - 0x%02x]\n" |
188 |
|
|
. " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n" |
189 |
|
|
. " : NOCHAR;\n", |
190 |
|
|
$min, $max, $l, $h, |
191 |
|
|
$min, ($min, $l) x 2; |
192 |
|
|
} |
193 |
|
|
} |
194 |
|
|
|
195 |
|
|
$fun .= "return NOCHAR;\n"; |
196 |
|
|
|
197 |
|
|
$fun =~ s/·/$ch/g; |
198 |
|
|
|
199 |
|
|
($tab, $fun); |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
while (<DATA>) { |
203 |
|
|
my ($group, $base, $cs, $type) = split /\s+/; |
204 |
|
|
|
205 |
|
|
my @enc1; |
206 |
|
|
my @enc2; |
207 |
|
|
|
208 |
|
|
for (0 .. 65535) { |
209 |
|
|
my $enc = encode $cs, (chr $_), Encode::FB_QUIET; |
210 |
|
|
|
211 |
|
|
if (length $enc) { |
212 |
|
|
my $code = hex unpack "H*", $enc; |
213 |
|
|
$enc1[$_] = $code; |
214 |
|
|
$enc2[$code] = $_ unless defined $enc2[$code]; |
215 |
|
|
} |
216 |
|
|
} |
217 |
|
|
|
218 |
|
|
my ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode"; |
219 |
|
|
my ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc"; |
220 |
|
|
|
221 |
|
|
#$tab1 =~ s/^/ /gm; |
222 |
|
|
#$tab2 =~ s/^/ /gm; |
223 |
|
|
$fun1 =~ s/^/ /gm; |
224 |
|
|
$fun2 =~ s/^/ /gm; |
225 |
|
|
|
226 |
|
|
print "$base\n"; |
227 |
|
|
open OUT, ">table/$base.h" or die; |
228 |
|
|
|
229 |
|
|
print OUT <<EOF; |
230 |
|
|
// |
231 |
|
|
// AUTOMATICALLLY GENERATED by gentables |
232 |
|
|
// |
233 |
|
|
#ifdef ENCODING_$group |
234 |
|
|
|
235 |
|
|
$tab1$tab2 |
236 |
|
|
struct rxvt_codeset_conv_$base : rxvt_codeset_conv { |
237 |
|
|
uint32_t from_unicode (uint32_t unicode) const { |
238 |
|
|
$fun1 } |
239 |
|
|
uint32_t to_unicode (uint32_t enc) const { |
240 |
|
|
$fun2 } |
241 |
|
|
} rxvt_codeset_conv_$base; |
242 |
|
|
|
243 |
|
|
#else |
244 |
|
|
|
245 |
|
|
#define rxvt_codeset_conv_$base rxvt_codeset_conv_unknown |
246 |
|
|
|
247 |
|
|
#endif |
248 |
|
|
EOF |
249 |
|
|
|
250 |
|
|
close OUT; |
251 |
|
|
} |
252 |
|
|
|
253 |
|
|
__DATA__ |
254 |
|
|
VN viscii viscii |
255 |
|
|
KR ksc5601_1987_0 ksc5601-raw |
256 |
|
|
CN gb2312_1980_0 gb2312-raw |
257 |
|
|
CN_EXT cns11643_1992_1 cns11643-1 |
258 |
|
|
CN_EXT cns11643_1992_2 cns11643-2 |
259 |
|
|
CN_EXT cns11643_1992_3 cns11643-3 |
260 |
|
|
CN_EXT cns11643_1992_4 cns11643-4 |
261 |
|
|
CN_EXT cns11643_1992_5 cns11643-5 |
262 |
|
|
CN_EXT cns11643_1992_6 cns11643-6 |
263 |
|
|
CN_EXT cns11643_1992_7 cns11643-7 |
264 |
|
|
CN_EXT cns11643_1992_f cns11643-f |
265 |
|
|
CN_EXT big5_ext big5ext |
266 |
|
|
CN_EXT big5_plus big5plus |
267 |
|
|
EU koi8_r koi8-r |
268 |
|
|
EU koi8_u koi8-u |
269 |
|
|
DEFAULT iso8859_1 iso-8859-1 |
270 |
|
|
EU iso8859_2 iso-8859-2 |
271 |
|
|
EU iso8859_3 iso-8859-3 |
272 |
|
|
EU iso8859_4 iso-8859-4 |
273 |
|
|
EU iso8859_5 iso-8859-5 |
274 |
|
|
EU iso8859_6 iso-8859-6 |
275 |
|
|
EU iso8859_7 iso-8859-7 |
276 |
|
|
EU iso8859_8 iso-8859-8 |
277 |
|
|
EU iso8859_9 iso-8859-9 |
278 |
|
|
EU iso8859_10 iso-8859-10 |
279 |
|
|
EU iso8859_11 iso-8859-11 |
280 |
|
|
EU iso8859_13 iso-8859-13 |
281 |
|
|
EU iso8859_14 iso-8859-14 |
282 |
|
|
DEFAULT iso8859_15 iso-8859-15 |
283 |
|
|
EU iso8859_16 iso-8859-16 |
284 |
|
|
JP jis0201_1976_0 jis0201-raw |
285 |
|
|
JP jis0208_1983_0 jis0208-raw |
286 |
|
|
JP jis0212_1990_0 jis0212-raw |
287 |
|
|
JP_EXT jis0213_1 jis0213-1-raw |
288 |
|
|
JP_EXT jis0213_2 jis0213-2-raw |