ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/rxvt-unicode/src/gentables
Revision: 1.2
Committed: Wed Mar 3 00:20:33 2004 UTC (20 years, 3 months ago) by pcg
Branch: MAIN
CVS Tags: rel-2_1_0, rel-2_2, rel-2_3
Changes since 1.1: +1 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 pcg 1.1 #!/opt/bin/perl
2    
3     # the generated tables mostly have NOT been chcked so far!
4    
5     use v5.8.2;
6     use utf8;
7     use Encode;
8    
9     my $gen;
10    
11     sub linear {
12     my ($a, $l, $h, $b) = @_;
13     for ($l .. $h) {
14     return unless defined $a->[$_] && $a->[$_] == $_ + ($b - $l);
15     }
16     1;
17     }
18    
19     sub wrap {
20     my $res = " ";
21     $res .= "$1\n " while $_[0] =~ /\G(.{90}\S*?)\s+/gc;
22     $res .= "$1" if $_[0] =~ /\G(.*)$/;
23     $res;
24     }
25    
26     my $last_tab_full; # hack
27    
28     sub gentab {
29     my ($enc, $l, $h, $f, $rep) = @_;
30     $last_tab_full = 0;
31    
32     " = {\n"
33     . (wrap join ", ",
34     map +(sprintf "0x$f",
35     defined $enc->[$_] ? $enc->[$_] : $last_tab_full++ * 0 + $rep
36     ), $l..$h)
37     . "\n};\n";
38     }
39    
40     sub gen {
41     my ($enc, $base, $ch) = @_;
42    
43     my ($fun, $tab);
44     my (@t1, @t2);
45    
46     for (255, 159, 127, 126, 125) {
47     if (linear $enc, 0, $_, 0) {
48     undef $enc->[$_] for 0..$_;
49     $fun .= sprintf "if (· <= 0x%04x) return ·;\n", $_;
50     }
51     }
52    
53     for (126, 127, 128) {
54     if (linear $enc, $_, 159, $_) {
55     undef $enc->[$_] for $_..159;
56     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x) return ·;\n", $_, 159;
57     }
58     }
59    
60     for (126, 127) {
61     next unless defined $enc->[$_];
62     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $_, delete $enc->[$_];
63     }
64    
65     my @map;
66     my @map2;
67    
68     for (0 .. $#$enc) {
69     if (defined $enc->[$_]) {
70     $map[$_] = $enc->[$_];
71     $map2[$_ >> 8]++;
72     }
73     }
74    
75     for my $p (0..255) {
76     if ($map2[$p]) {
77     my $b = $p << 8;
78    
79     my ($l, $h);
80     for my $i (0..255) { $l = $i, last if defined $map[$b + $i]; }
81     for my $i (0..255) { $h = 255 - $i, last if defined $map[$b + 255 - $i]; }
82    
83     if ($map2[$p] <= 5) {
84     for ($l .. $h) {
85     next unless defined $enc->[$b + $_];
86     $fun .= sprintf "if (· == 0x%04x) return 0x%04x;\n", $b + $_, $enc->[$b + $_];
87     }
88     } elsif (linear $enc, $b + $l, $b + $h, $enc->[$b + $l]) {
89     my $dif = $enc->[$b + $l] - ($b + $l);
90     $dif = $dif < 0 ? sprintf "- 0x%04x", -$dif : sprintf "+ 0x%04x", $dif;
91     $fun .= sprintf "if (· <= 0x%04x && 0x%04x <= ·) return · %s;\n", $b + $l, $b + $h, $dif;
92     } elsif ($map2[$p] <= 5) { # defunct
93     $fun .= "switch (·)\n {\n";
94     for ($l .. $h) {
95     next unless defined $enc->[$b + $_];
96     $fun .= sprintf " case 0x%04x: return 0x%04x;\n", $b + $_, $enc->[$b + $_];
97     }
98     $fun .= " }\n";
99     } else {
100     my ($i, $t, $f) = (0, "uint8_t", "%02x");
101     for ($l .. $h) {
102     if ($enc->[$b + $_] > 255) {
103     ($i, $t, $f) = (1, "uint16_t", "%04x");
104     last;
105     }
106     }
107    
108     $i ? push @t2, [$p, $l, $h, $t, $f]
109     : push @t1, [$b, $l, $h, $t, $f];
110     }
111    
112     }
113     }
114    
115     if (@t2 <= 3) {
116     push @t1, @t2;
117     @t2 = ();
118     }
119    
120     for (@t1) {
121     my ($b, $l, $h, $t, $f) = @$_;
122     my $rep = $b + $l == 0 ? 128 : 0;
123     $tab .= "static const $t $base\_$b\[]" . gentab $enc, $b + $l, $b + $h, $f;
124     $fun .= sprintf "if (0x%04x <= · && · <= 0x%04x)\n"
125     . " return %s$base\_$b\[· - 0x%04x];\n",
126     $b + $l, $b + $h,
127     ($last_tab_full ? sprintf "$base\_$b\[· - 0x%04x] == $rep ? NOCHAR : ", $b + $l : ""),
128     $b + $l;
129     }
130    
131     if (@t2) {
132     my ($min, $max) = (255, 0);
133     my ($l, $h) = (255, 0);
134     for (@t2) {
135     my ($p, $L, $H, $t, $f) = @$_;
136     $max = $p if $p > $max;
137     $min = $p if $p < $min;
138     $l = $L if $L < $l;
139     $h = $H if $H > $h;
140     }
141    
142     $fun .= "uint8_t l = ·;\n";
143     $fun .= "uint16_t h = · >> 8;\n";
144    
145     if ($h - $l < 200) {
146     my $d = $h - $l + 1;
147    
148     my @enc;
149     for $p ($min .. $max) {
150     for $i ($l .. $h) {
151     $enc[($p - $min) * $d + $i - $l] = $enc->[$p * 256 + $i];
152     }
153     }
154     $tab .= "static const uint16_t $base\_m[]" . gentab \@enc, 0, $#enc, "%04x";
155    
156     if ($last_tab_full) {
157     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
158     . " && 0x%02x <= l && l <= 0x%02x)\n"
159     . " return $base\_m\[h * 0x%02x + l - 0x%04x]\n"
160     . " ? $base\_m\[h * 0x%02x + l - 0x%04x]\n"
161     . " : NOCHAR;\n",
162     $min, $max, $l, $h,
163     ($d, $min * $d + $l) x 2;
164     } else {
165     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
166     . " && 0x%02x <= l && l <= 0x%02x)\n"
167     . " return $base\_m\[h * 0x%02x + l - 0x%04x];\n",
168     $min, $max, $l, $h,
169     $d,
170     $min * $d + $l;
171     }
172     } else {
173     my @tab = (0) x ($max - $min);
174     for (@t2) {
175     my ($p, undef, undef, $t, $f) = @$_;
176     $tab .= "static const $t $base\_$p\[]" . gentab $enc, $p * 256 + $l, $p * 256 + $h, $f, 0;
177     $tab[$p - $min] = "$base\_$p";
178     }
179    
180     $tab .= "const uint16_t *$base\_i[] = {\n"
181     . (wrap join ", ", @tab)
182     . "\n};\n\n";
183    
184     $fun .= sprintf "if (0x%02x <= h && h <= 0x%02x\n"
185     . " && 0x%02x <= l && l <= 0x%02x\n"
186     . " && $base\_i[h - 0x%02x])\n"
187     . " return $base\_i\[h - 0x%02x][l - 0x%02x]\n"
188     . " ? $base\_i\[h - 0x%02x][l - 0x%02x]\n"
189     . " : NOCHAR;\n",
190     $min, $max, $l, $h,
191     $min, ($min, $l) x 2;
192     }
193     }
194    
195     $fun .= "return NOCHAR;\n";
196    
197     $fun =~ s/·/$ch/g;
198    
199     ($tab, $fun);
200     }
201    
202     while (<DATA>) {
203     my ($group, $base, $cs, $type) = split /\s+/;
204    
205     my @enc1;
206     my @enc2;
207    
208     for (0 .. 65535) {
209     my $enc = encode $cs, (chr $_), Encode::FB_QUIET;
210    
211     if (length $enc) {
212     my $code = hex unpack "H*", $enc;
213     $enc1[$_] = $code;
214     $enc2[$code] = $_ unless defined $enc2[$code];
215     }
216     }
217    
218     my ($tab1, $fun1) = gen \@enc1, "$base\_f", "unicode";
219     my ($tab2, $fun2) = gen \@enc2, "$base\_t", "enc";
220    
221     #$tab1 =~ s/^/ /gm;
222     #$tab2 =~ s/^/ /gm;
223     $fun1 =~ s/^/ /gm;
224     $fun2 =~ s/^/ /gm;
225    
226     print "$base\n";
227     open OUT, ">table/$base.h" or die;
228    
229     print OUT <<EOF;
230     //
231     // AUTOMATICALLLY GENERATED by gentables
232     //
233     #ifdef ENCODING_$group
234    
235     $tab1$tab2
236     struct rxvt_codeset_conv_$base : rxvt_codeset_conv {
237     uint32_t from_unicode (uint32_t unicode) const {
238     $fun1 }
239     uint32_t to_unicode (uint32_t enc) const {
240     $fun2 }
241     } rxvt_codeset_conv_$base;
242    
243     #else
244    
245     #define rxvt_codeset_conv_$base rxvt_codeset_conv_unknown
246    
247     #endif
248     EOF
249    
250     close OUT;
251     }
252    
253     __DATA__
254     VN viscii viscii
255     KR ksc5601_1987_0 ksc5601-raw
256     CN gb2312_1980_0 gb2312-raw
257     CN_EXT cns11643_1992_1 cns11643-1
258     CN_EXT cns11643_1992_2 cns11643-2
259     CN_EXT cns11643_1992_3 cns11643-3
260     CN_EXT cns11643_1992_4 cns11643-4
261     CN_EXT cns11643_1992_5 cns11643-5
262     CN_EXT cns11643_1992_6 cns11643-6
263     CN_EXT cns11643_1992_7 cns11643-7
264     CN_EXT cns11643_1992_f cns11643-f
265 pcg 1.2 CN big5 big5
266 pcg 1.1 CN_EXT big5_ext big5ext
267     CN_EXT big5_plus big5plus
268     EU koi8_r koi8-r
269     EU koi8_u koi8-u
270     DEFAULT iso8859_1 iso-8859-1
271     EU iso8859_2 iso-8859-2
272     EU iso8859_3 iso-8859-3
273     EU iso8859_4 iso-8859-4
274     EU iso8859_5 iso-8859-5
275     EU iso8859_6 iso-8859-6
276     EU iso8859_7 iso-8859-7
277     EU iso8859_8 iso-8859-8
278     EU iso8859_9 iso-8859-9
279     EU iso8859_10 iso-8859-10
280     EU iso8859_11 iso-8859-11
281     EU iso8859_13 iso-8859-13
282     EU iso8859_14 iso-8859-14
283     DEFAULT iso8859_15 iso-8859-15
284     EU iso8859_16 iso-8859-16
285     JP jis0201_1976_0 jis0201-raw
286     JP jis0208_1983_0 jis0208-raw
287     JP jis0212_1990_0 jis0212-raw
288     JP_EXT jis0213_1 jis0213-1-raw
289     JP_EXT jis0213_2 jis0213-2-raw