ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/Net-Knuddels/util/dic2bin
Revision: 1.8
Committed: Mon Jul 21 04:16:11 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
CVS Tags: HEAD
Changes since 1.7: +6 -6 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 #!/opt/bin/perl
2
3 # utility to convert the extracted dictionary to a perl hash for further use
4
5 # use jcf-dump -v GroupChat.class and extract the longest constant string sans '"'
6 # (or jad GroupChat.java)
7
8 use Encode;
9 use Data::Dumper;
10
11 local $/;
12
13 binmode STDIN;
14 binmode STDOUT, ":utf8";
15
16 my $dictionary = <>;
17
18 $dictionary =~ s%
19 \\
20 (?: \\
21 | 0([0-9a-f]{2}) # jcf
22 #| [0-7]{3} # jad
23 | u([0-9a-f]{4})
24 | ([rnt\x27\x22])
25 | (.)
26 )
27 %
28 if ($1) {
29 chr hex "$1"; # jcf
30 #chr oct "$1"; # jad
31 } elsif ($2) {
32 chr hex "$2";
33 } elsif ($3 eq "r") { "\015"
34 } elsif ($3 eq "n") { "\012"
35 } elsif ($3 eq "t") { "\011"
36 } elsif ($3 eq "'") { "'"
37 } elsif ($3 eq '"') { '"'
38 } elsif ($4) {
39 die "<<<$4>>>\n";
40 } else {
41 "\\";
42 }
43 %sgex;
44
45 #$dictionary = Encode::decode "utf-8", $dictionary; # jad
46
47 my ($idx, $bit) = (0, -32);
48
49 while (length $dictionary) {
50 my $prefix = ord substr $dictionary, 0, 1, "";
51 $prefix = unpack "n", Encode::encode "iso-8859-1", substr $dictionary, 0, 2, "" if $prefix == 255;
52
53 my ($len, $width) = (int $prefix / 21, $prefix % 21);
54 my $string = substr $dictionary, 0, $len + 1, "";
55
56 # carry
57 while ($idx & 1) {
58 $idx >>= 1;
59 $bit--;
60 }
61
62 $idx |= 1;
63 # widen
64 while ($bit < $width) {
65 $idx &= 0x7fffffff;
66 $idx <<= 1;
67 $bit++;
68 }
69
70 my $code = reverse unpack "b$width", pack "V", $idx;
71 #$string =~ s/([^\x20-\x7e])/sprintf "\\x{%x}", ord $1/ge;
72
73 $code{$code} = $string;
74 }
75
76 "" eq delete $code{"0000000000"}
77 or die "FATAL";
78
79 print "package Net::Knuddels::Dictionary;\n",
80 "\n",
81 "\$Net::Knuddels::Dictionary = ";
82
83 print Data::Dumper->new([\%code])->Terse(1)->Useqq(0)->Quotekeys(1)->Sortkeys(0)->Dump;
84
85 print ";\n1;\n";
86