ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.12
Committed: Wed Jun 16 23:07:11 2021 UTC (2 years, 11 months ago) by root
Branch: MAIN
CVS Tags: rxvt-unicode-rel-9_29, rxvt-unicode-rel-9_30, HEAD
Changes since 1.11: +2 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 pcg 1.1 #!/opt/bin/perl
2    
3 pcg 1.5 # the generated tables mostly have NOT been checked so far!
4 pcg 1.1
5 root 1.12 # apt install libencode-hanextra-perl libencode-jis2k-perl
6    
7 pcg 1.1 use v5.8.2;
8     use utf8;
9     use Encode;
10 root 1.8 use Encode::JP;
11     use Encode::CN;
12     use Encode::KR;
13     use Encode::HanExtra;
14     use Encode::JIS2K;
15 pcg 1.1
16     my $gen;
17    
18 pcg 1.5 $TO_UNICODE = 0; # also generate to_unicode tables
19    
20 pcg 1.1 sub linear {
21     my ($a, $l, $h, $b) = @_;
22     for ($l .. $h) {
23     return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
24     }
25     1;
26     }
27    
28     sub wrap {
29     my $res = " ";
30     $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
31     $res .= "$1" if $_[0] =~ /\G(.*)$/;
32     $res;
33     }
34    
35     my $last_tab_full; # hack
36    
37     sub gentab {
38     my ($enc, $l, $h, $f, $rep) = @_;
39     $last_tab_full = 0;
40    
41     " = {\n"
42     . (wrap join ", ",
43     map +(sprintf "0x$f",
44     defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
45     ), $l..$h)
46     . "\n};\n";
47     }
48    
49     sub gen {
50     my ($enc, $base, $ch) = @_;
51    
52     my ($fun, $tab);
53     my (@t1, @t2);
54    
55     for (255, 159, 127, 126, 125) {
56     if (linear $enc, 0, $_, 0) {
57     undef $enc->[$_] for 0..$_;
58     $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
59     }
60     }
61    
62     for (126, 127, 128) {
63     if (linear $enc, $_, 159, $_) {
64     undef $enc->[$_] for $_..159;
65     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
66     }
67     }
68    
69     for (126, 127) {
70     next unless defined $enc->[$_];
71     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
72     }
73    
74     my @map;
75     my @map2;
76    
77     for (0 .. $#$enc) {
78     if (defined $enc->[$_]) {
79     $map[$_] = $enc->[$_];
80     $map2[$_ >> 8]++;
81     }
82     }
83    
84     for my $p (0..255) {
85     if ($map2[$p]) {
86     my $b = $p << 8;
87    
88     my ($l, $h);
89     for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
90     for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
91    
92     if ($map2[$p] <= 5) {
93     for ($l .. $h) {
94     next unless defined $enc->[$b + $_];
95     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
96     }
97     } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
98     my $dif = $enc->[$b + $l] - ($b + $l);
99     $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
100 root 1.11 $fun .= sprintf "if (· >= 0x%04x && 0x%04x >= ·) return · %s;\n", $b + $l, $b + $h, $dif;
101 pcg 1.1 } elsif ($map2[$p] <= 5) { # defunct
102     $fun .= "switch (·)\n {\n";
103     for ($l .. $h) {
104     next unless defined $enc->[$b + $_];
105     $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
106     }
107     $fun .= " }\n";
108     } else {
109     my ($i, $t, $f) = (0, "uint8_t", "%02x");
110     for ($l .. $h) {
111     if ($enc->[$b + $_] > 255) {
112     ($i, $t, $f) = (1, "uint16_t", "%04x");
113     last;
114     }
115     }
116    
117     $i ? push @t2, [$p, $l, $h, $t, $f]
118     : push @t1, [$b, $l, $h, $t, $f];
119     }
120    
121     }
122     }
123    
124     if (@t2 <= 3) {
125     push @t1, @t2;
126     @t2 = ();
127     }
128    
129     for (@t1) {
130     my ($b, $l, $h, $t, $f) = @$_;
131     my $rep = $b + $l == 0 ? 128 : 0;
132     $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
133     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
134     . " return %s$base\_$b\[· - 0x%04x];\n",
135     $b + $l, $b + $h,
136     ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
137     $b + $l;
138     }
139    
140     if (@t2) {
141     my ($min, $max) = (255, 0);
142     my ($l, $h) = (255, 0);
143     for (@t2) {
144     my ($p, $L, $H, $t, $f) = @$_;
145     $max = $p if $p > $max;
146     $min = $p if $p < $min;
147     $l = $L if $L < $l;
148     $h = $H if $H > $h;
149     }
150    
151     $fun .= "uint8_t l = ·;\n";
152     $fun .= "uint16_t h = · >> 8;\n";
153    
154     if ($h - $l < 200) {
155     my $d = $h - $l + 1;
156    
157     my @enc;
158     for $p ($min .. $max) {
159     for $i ($l .. $h) {
160     $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
161     }
162     }
163     $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
164    
165     if ($last_tab_full) {
166     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
167     . " && 0x%02x <= l && l <= 0x%02x)\n"
168     . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
169     . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
170     . " : NOCHAR;\n",
171     $min, $max, $l, $h,
172     ($d, $min * $d + $l) x 2;
173     } else {
174     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
175     . " && 0x%02x <= l && l <= 0x%02x)\n"
176     . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
177     $min, $max, $l, $h,
178     $d,
179     $min * $d + $l;
180     }
181     } else {
182     my @tab = (0) x ($max - $min);
183     for (@t2) {
184     my ($p, undef, undef, $t, $f) = @$_;
185     $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
186     $tab[$p - $min] = "$base\_$p";
187     }
188    
189     $tab .= "const uint16_t *$base\_i[] = {\n"
190     . (wrap join ", ", @tab)
191     . "\n};\n\n";
192    
193     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
194     . " && 0x%02x <= l && l <= 0x%02x\n"
195     . " && $base\_i[h - 0x%02x])\n"
196     . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
197     . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
198     . " : NOCHAR;\n",
199     $min, $max, $l, $h,
200     $min, ($min, $l) x 2;
201     }
202     }
203    
204     $fun .= "return NOCHAR;\n";
205    
206     $fun =~ s/·/$ch/g;
207    
208     ($tab, $fun);
209     }
210    
211     while (<DATA>) {
212     my ($group, $base, $cs, $type) = split /\s+/;
213    
214     my @enc1;
215     my @enc2;
216    
217     for (0 .. 65535) {
218     my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
219    
220     if (length $enc) {
221     my $code = hex unpack "H*", $enc;
222     $enc1[$_] = $code;
223     $enc2[$code] = $_ unless defined $enc2[$code];
224     }
225     }
226    
227 pcg 1.5 my ($tab1, $fun1);
228     my ($tab2, $fun2);
229    
230     ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
231     ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;
232 pcg 1.1
233     $fun1 =~ s/^/ /gm;
234     $fun2 =~ s/^/ /gm;
235    
236     print "$base\n";
237     open OUT, ">table/$base.h" or die;
238    
239     print OUT <<EOF;
240     //
241     // AUTOMATICALLLY GENERATED by gentables
242     //
243     #ifdef ENCODING_$group
244    
245     $tab1$tab2
246 root 1.6 static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
247     $fun1}
248 pcg 1.5 EOF
249    
250     if ($TO_UNICODE) {
251     print OUT <<EOF
252     #if ENCODING_TO_UNICODE
253 root 1.6 static unicode_t cs_$base\_to_unicode (uint32_t enc) {
254     $fun2}
255 pcg 1.5 #endif
256     EOF
257     }
258    
259     print OUT <<EOF;
260 pcg 1.1
261     #else
262    
263 root 1.6 #define cs_$base\_from_unicode cs_unknown_from_unicode
264     #define cs_$base\_to_unicode cs_unknown_to_unicode
265 pcg 1.1
266     #endif
267     EOF
268    
269     close OUT;
270     }
271    
272     __DATA__
273     VN viscii viscii
274     KR ksc5601_1987_0 ksc5601-raw
275 root 1.7 ZH gb2312_1980_0 gb2312-raw
276 root 1.9 ZH gbk_0 gbk
277 root 1.7 ZH_EXT cns11643_1992_1 cns11643-1
278     ZH_EXT cns11643_1992_2 cns11643-2
279     ZH_EXT cns11643_1992_3 cns11643-3
280     ZH_EXT cns11643_1992_4 cns11643-4
281     ZH_EXT cns11643_1992_5 cns11643-5
282     ZH_EXT cns11643_1992_6 cns11643-6
283     ZH_EXT cns11643_1992_7 cns11643-7
284     ZH_EXT cns11643_1992_f cns11643-f
285     ZH big5 big5
286     ZH_EXT big5_ext big5ext
287     ZH_EXT big5_plus big5plus
288 pcg 1.1 EU koi8_r koi8-r
289     EU koi8_u koi8-u
290     DEFAULT iso8859_1 iso-8859-1
291     EU iso8859_2 iso-8859-2
292     EU iso8859_3 iso-8859-3
293     EU iso8859_4 iso-8859-4
294     EU iso8859_5 iso-8859-5
295     EU iso8859_6 iso-8859-6
296     EU iso8859_7 iso-8859-7
297     EU iso8859_8 iso-8859-8
298     EU iso8859_9 iso-8859-9
299     EU iso8859_10 iso-8859-10
300     EU iso8859_11 iso-8859-11
301     EU iso8859_13 iso-8859-13
302 ayin 1.10 EU iso8859_14 iso-8859-14
303 pcg 1.1 DEFAULT iso8859_15 iso-8859-15
304     EU iso8859_16 iso-8859-16
305     JP jis0201_1976_0 jis0201-raw
306 pcg 1.5 JP jis0208_1990_0 jis0208-raw
307 pcg 1.1 JP jis0212_1990_0 jis0212-raw
308     JP_EXT jis0213_1 jis0213-1-raw
309     JP_EXT jis0213_2 jis0213-2-raw