ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.6
Committed: Wed Aug 4 03:29:28 2004 UTC (19 years, 9 months ago) by root
Branch: MAIN
CVS Tags: rel-3_7, rel-3_6, rel-3_5, rel-3_4, rel-3_8
Changes since 1.5: +6 -7 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 pcg 1.1 #!/opt/bin/perl
2    
3 pcg 1.5 # the generated tables mostly have NOT been checked so far!
4 pcg 1.1
5     use v5.8.2;
6     use utf8;
7     use Encode;
8    
9     my $gen;
10    
11 pcg 1.5 $TO_UNICODE = 0; # also generate to_unicode tables
12    
13 pcg 1.1 sub linear {
14     my ($a, $l, $h, $b) = @_;
15     for ($l .. $h) {
16     return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
17     }
18     1;
19     }
20    
21     sub wrap {
22     my $res = " ";
23     $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
24     $res .= "$1" if $_[0] =~ /\G(.*)$/;
25     $res;
26     }
27    
28     my $last_tab_full; # hack
29    
30     sub gentab {
31     my ($enc, $l, $h, $f, $rep) = @_;
32     $last_tab_full = 0;
33    
34     " = {\n"
35     . (wrap join ", ",
36     map +(sprintf "0x$f",
37     defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
38     ), $l..$h)
39     . "\n};\n";
40     }
41    
42     sub gen {
43     my ($enc, $base, $ch) = @_;
44    
45     my ($fun, $tab);
46     my (@t1, @t2);
47    
48     for (255, 159, 127, 126, 125) {
49     if (linear $enc, 0, $_, 0) {
50     undef $enc->[$_] for 0..$_;
51     $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
52     }
53     }
54    
55     for (126, 127, 128) {
56     if (linear $enc, $_, 159, $_) {
57     undef $enc->[$_] for $_..159;
58     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
59     }
60     }
61    
62     for (126, 127) {
63     next unless defined $enc->[$_];
64     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
65     }
66    
67     my @map;
68     my @map2;
69    
70     for (0 .. $#$enc) {
71     if (defined $enc->[$_]) {
72     $map[$_] = $enc->[$_];
73     $map2[$_ >> 8]++;
74     }
75     }
76    
77     for my $p (0..255) {
78     if ($map2[$p]) {
79     my $b = $p << 8;
80    
81     my ($l, $h);
82     for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
83     for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
84    
85     if ($map2[$p] <= 5) {
86     for ($l .. $h) {
87     next unless defined $enc->[$b + $_];
88     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
89     }
90     } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
91     my $dif = $enc->[$b + $l] - ($b + $l);
92     $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
93     $fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif;
94     } elsif ($map2[$p] <= 5) { # defunct
95     $fun .= "switch (·)\n {\n";
96     for ($l .. $h) {
97     next unless defined $enc->[$b + $_];
98     $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
99     }
100     $fun .= " }\n";
101     } else {
102     my ($i, $t, $f) = (0, "uint8_t", "%02x");
103     for ($l .. $h) {
104     if ($enc->[$b + $_] > 255) {
105     ($i, $t, $f) = (1, "uint16_t", "%04x");
106     last;
107     }
108     }
109    
110     $i ? push @t2, [$p, $l, $h, $t, $f]
111     : push @t1, [$b, $l, $h, $t, $f];
112     }
113    
114     }
115     }
116    
117     if (@t2 <= 3) {
118     push @t1, @t2;
119     @t2 = ();
120     }
121    
122     for (@t1) {
123     my ($b, $l, $h, $t, $f) = @$_;
124     my $rep = $b + $l == 0 ? 128 : 0;
125     $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
126     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
127     . " return %s$base\_$b\[· - 0x%04x];\n",
128     $b + $l, $b + $h,
129     ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
130     $b + $l;
131     }
132    
133     if (@t2) {
134     my ($min, $max) = (255, 0);
135     my ($l, $h) = (255, 0);
136     for (@t2) {
137     my ($p, $L, $H, $t, $f) = @$_;
138     $max = $p if $p > $max;
139     $min = $p if $p < $min;
140     $l = $L if $L < $l;
141     $h = $H if $H > $h;
142     }
143    
144     $fun .= "uint8_t l = ·;\n";
145     $fun .= "uint16_t h = · >> 8;\n";
146    
147     if ($h - $l < 200) {
148     my $d = $h - $l + 1;
149    
150     my @enc;
151     for $p ($min .. $max) {
152     for $i ($l .. $h) {
153     $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
154     }
155     }
156     $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
157    
158     if ($last_tab_full) {
159     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
160     . " && 0x%02x <= l && l <= 0x%02x)\n"
161     . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
162     . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
163     . " : NOCHAR;\n",
164     $min, $max, $l, $h,
165     ($d, $min * $d + $l) x 2;
166     } else {
167     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
168     . " && 0x%02x <= l && l <= 0x%02x)\n"
169     . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
170     $min, $max, $l, $h,
171     $d,
172     $min * $d + $l;
173     }
174     } else {
175     my @tab = (0) x ($max - $min);
176     for (@t2) {
177     my ($p, undef, undef, $t, $f) = @$_;
178     $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
179     $tab[$p - $min] = "$base\_$p";
180     }
181    
182     $tab .= "const uint16_t *$base\_i[] = {\n"
183     . (wrap join ", ", @tab)
184     . "\n};\n\n";
185    
186     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
187     . " && 0x%02x <= l && l <= 0x%02x\n"
188     . " && $base\_i[h - 0x%02x])\n"
189     . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
190     . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
191     . " : NOCHAR;\n",
192     $min, $max, $l, $h,
193     $min, ($min, $l) x 2;
194     }
195     }
196    
197     $fun .= "return NOCHAR;\n";
198    
199     $fun =~ s/·/$ch/g;
200    
201     ($tab, $fun);
202     }
203    
204     while (<DATA>) {
205     my ($group, $base, $cs, $type) = split /\s+/;
206    
207     my @enc1;
208     my @enc2;
209    
210     for (0 .. 65535) {
211     my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
212    
213     if (length $enc) {
214     my $code = hex unpack "H*", $enc;
215     $enc1[$_] = $code;
216     $enc2[$code] = $_ unless defined $enc2[$code];
217     }
218     }
219    
220 pcg 1.5 my ($tab1, $fun1);
221     my ($tab2, $fun2);
222    
223     ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
224     ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc" if $TO_UNICODE;
225 pcg 1.1
226     $fun1 =~ s/^/ /gm;
227     $fun2 =~ s/^/ /gm;
228    
229     print "$base\n";
230     open OUT, ">table/$base.h" or die;
231    
232     print OUT <<EOF;
233     //
234     // AUTOMATICALLLY GENERATED by gentables
235     //
236     #ifdef ENCODING_$group
237    
238     $tab1$tab2
239 root 1.6 static uint32_t cs_$base\_from_unicode (unicode_t unicode) {
240     $fun1}
241 pcg 1.5 EOF
242    
243     if ($TO_UNICODE) {
244     print OUT <<EOF
245     #if ENCODING_TO_UNICODE
246 root 1.6 static unicode_t cs_$base\_to_unicode (uint32_t enc) {
247     $fun2}
248 pcg 1.5 #endif
249     EOF
250     }
251    
252     print OUT <<EOF;
253 pcg 1.1
254     #else
255    
256 root 1.6 #define cs_$base\_from_unicode cs_unknown_from_unicode
257     #define cs_$base\_to_unicode cs_unknown_to_unicode
258 pcg 1.1
259     #endif
260     EOF
261    
262     close OUT;
263     }
264    
265     __DATA__
266     VN viscii viscii
267     KR ksc5601_1987_0 ksc5601-raw
268     CN gb2312_1980_0 gb2312-raw
269     CN_EXT cns11643_1992_1 cns11643-1
270     CN_EXT cns11643_1992_2 cns11643-2
271     CN_EXT cns11643_1992_3 cns11643-3
272     CN_EXT cns11643_1992_4 cns11643-4
273     CN_EXT cns11643_1992_5 cns11643-5
274     CN_EXT cns11643_1992_6 cns11643-6
275     CN_EXT cns11643_1992_7 cns11643-7
276     CN_EXT cns11643_1992_f cns11643-f
277 pcg 1.2 CN big5 big5
278 pcg 1.1 CN_EXT big5_ext big5ext
279     CN_EXT big5_plus big5plus
280     EU koi8_r koi8-r
281     EU koi8_u koi8-u
282     DEFAULT iso8859_1 iso-8859-1
283     EU iso8859_2 iso-8859-2
284     EU iso8859_3 iso-8859-3
285     EU iso8859_4 iso-8859-4
286     EU iso8859_5 iso-8859-5
287     EU iso8859_6 iso-8859-6
288     EU iso8859_7 iso-8859-7
289     EU iso8859_8 iso-8859-8
290     EU iso8859_9 iso-8859-9
291     EU iso8859_10 iso-8859-10
292     EU iso8859_11 iso-8859-11
293     EU iso8859_13 iso-8859-13
294     EU iso8859_14 iso-8859-14
295     DEFAULT iso8859_15 iso-8859-15
296     EU iso8859_16 iso-8859-16
297     JP jis0201_1976_0 jis0201-raw
298 pcg 1.5 JP jis0208_1990_0 jis0208-raw
299 pcg 1.1 JP jis0212_1990_0 jis0212-raw
300     JP_EXT jis0213_1 jis0213-1-raw
301     JP_EXT jis0213_2 jis0213-2-raw