1 |
#!/opt/bin/perl |
2 |
|
3 |
# utility to convert the extracted dictionary to a perl hash for further use |
4 |
|
5 |
# use jcf-dump -v GroupChat.class and extract the longest constant string sans '"' |
6 |
# (or jad GroupChat.java) |
7 |
|
8 |
use Encode; |
9 |
use Data::Dumper; |
10 |
|
11 |
local $/; |
12 |
|
13 |
binmode STDIN; |
14 |
binmode STDOUT, ":utf8"; |
15 |
|
16 |
my $dictionary = <>; |
17 |
|
18 |
$dictionary =~ s% |
19 |
\\ |
20 |
(?: \\ |
21 |
| 0([0-9a-f]{2}) # jcf |
22 |
#| [0-7]{3} # jad |
23 |
| u([0-9a-f]{4}) |
24 |
| ([rnt\x27\x22]) |
25 |
| (.) |
26 |
) |
27 |
% |
28 |
if ($1) { |
29 |
chr hex "$1"; # jcf |
30 |
#chr oct "$1"; # jad |
31 |
} elsif ($2) { |
32 |
chr hex "$2"; |
33 |
} elsif ($3 eq "r") { "\015" |
34 |
} elsif ($3 eq "n") { "\012" |
35 |
} elsif ($3 eq "t") { "\011" |
36 |
} elsif ($3 eq "'") { "'" |
37 |
} elsif ($3 eq '"') { '"' |
38 |
} elsif ($4) { |
39 |
die "<<<$4>>>\n"; |
40 |
} else { |
41 |
"\\"; |
42 |
} |
43 |
%sgex; |
44 |
|
45 |
#$dictionary = Encode::decode "utf-8", $dictionary; # jad |
46 |
|
47 |
my ($idx, $bit) = (0, -32); |
48 |
|
49 |
while (length $dictionary) { |
50 |
my $prefix = ord substr $dictionary, 0, 1, ""; |
51 |
$prefix = unpack "n", Encode::encode "iso-8859-1", substr $dictionary, 0, 2, "" if $prefix == 255; |
52 |
|
53 |
my ($len, $width) = (int $prefix / 21, $prefix % 21); |
54 |
my $string = substr $dictionary, 0, $len + 1, ""; |
55 |
|
56 |
# carry |
57 |
while ($idx & 1) { |
58 |
$idx >>= 1; |
59 |
$bit--; |
60 |
} |
61 |
|
62 |
$idx |= 1; |
63 |
# widen |
64 |
while ($bit < $width) { |
65 |
$idx &= 0x7fffffff; |
66 |
$idx <<= 1; |
67 |
$bit++; |
68 |
} |
69 |
|
70 |
my $code = reverse unpack "b$width", pack "V", $idx; |
71 |
#$string =~ s/([^\x20-\x7e])/sprintf "\\x{%x}", ord $1/ge; |
72 |
|
73 |
$code{$code} = $string; |
74 |
} |
75 |
|
76 |
"" eq delete $code{"0000000000"} |
77 |
or die "FATAL"; |
78 |
|
79 |
print "package Net::Knuddels::Dictionary;\n", |
80 |
"\n", |
81 |
"\$Net::Knuddels::Dictionary = "; |
82 |
|
83 |
print Data::Dumper->new([\%code])->Terse(1)->Useqq(0)->Quotekeys(1)->Sortkeys(0)->Dump; |
84 |
|
85 |
print ";\n1;\n"; |
86 |
|