1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
package markov; |
4 |
|
5 |
sub new { |
6 |
my ($class, @arg) = @_; |
7 |
|
8 |
bless { |
9 |
longest => 10, |
10 |
s_beg => [], # arbitarry "unique" start symbol |
11 |
s_end => [], # arbitrary "unique" end symbol |
12 |
tree => {}, |
13 |
@arg, |
14 |
}, $class; |
15 |
} |
16 |
|
17 |
sub simplify { |
18 |
local $_ = lc shift; |
19 |
y/üöä//d; |
20 |
s/ß/ss/g; |
21 |
y/a-z\000//cd; |
22 |
$_; |
23 |
} |
24 |
|
25 |
sub seed { |
26 |
my ($self, $symbols) = @_; |
27 |
|
28 |
my @sym = (@$symbols, $self->{s_end}); |
29 |
my @seq = $self->{s_beg}; |
30 |
my $tree = $self->{tree}; |
31 |
|
32 |
while () { |
33 |
my $next = shift @sym |
34 |
or last; |
35 |
|
36 |
shift @seq while @seq > $self->{longest}; |
37 |
|
38 |
for (1 .. @seq) { |
39 |
my $node = $tree->{simplify join "\0", @seq[-$_ .. -1]} ||= {}; |
40 |
$node->{$next}++; |
41 |
$node->{""}++; |
42 |
} |
43 |
|
44 |
push @seq, $next; |
45 |
} |
46 |
} |
47 |
|
48 |
sub complete { |
49 |
my ($self, $symbols, $prob) = @_; |
50 |
|
51 |
my $tree = $self->{tree}; |
52 |
my @sym = @$symbols; |
53 |
my @res = @sym; |
54 |
|
55 |
@sym = $self->{s_beg} unless @sym; |
56 |
|
57 |
shift @sym while @sym && !$tree->{simplify join "\0", @sym}; |
58 |
|
59 |
return unless @sym; |
60 |
|
61 |
#use Dumpvalue; Dumpvalue->new (compactDump => 1, veryCompact => 1, quoteHighBit => 1, tick => '"')->dumpValue ($self); |
62 |
|
63 |
outer: |
64 |
while () { |
65 |
my $node = $tree->{simplify join "\0", @sym}; |
66 |
|
67 |
if ($node) { |
68 |
my $sel = rand $node->{""}; |
69 |
keys %$node; |
70 |
|
71 |
while (my ($k, $v) = each %$node) { |
72 |
if (length $k and ($sel -= $v) < 0) { |
73 |
last outer if $k eq $self->{s_end}; |
74 |
|
75 |
push @sym, $k; |
76 |
push @res, $k; |
77 |
|
78 |
$prob->{$k} = $v / $node->{""}; |
79 |
|
80 |
next outer; |
81 |
} |
82 |
} |
83 |
|
84 |
die "FATAL: internal error"; |
85 |
} else { |
86 |
shift @sym; |
87 |
@sym or last; # die "FATAL: empty prefix (ENOTUNDERSTOOD)"; |
88 |
} |
89 |
} |
90 |
|
91 |
@res |
92 |
} |
93 |
|
94 |
|
95 |
package main; |
96 |
|
97 |
use IO::Select; |
98 |
use IO::Socket::INET; |
99 |
use Net::IRC::Server; |
100 |
use Data::Dumper; |
101 |
|
102 |
my %words; |
103 |
my $wcnt; |
104 |
|
105 |
my $markov = new markov longest => 4; |
106 |
my $rmarkov = new markov longest => 4; |
107 |
|
108 |
open FO, "<", $ARGV[0] or die "couldn't open $!"; |
109 |
#:raw:perlio:encoding(iso-8859-1) |
110 |
my $i = 0; |
111 |
my $data; |
112 |
while (<FO>) { |
113 |
chomp; |
114 |
$data .= $_; |
115 |
} |
116 |
|
117 |
for (split /[.!?] /, $data) { |
118 |
my @syms = split /\s+/, $_; |
119 |
$words{markov::simplify $_}++ for @syms; |
120 |
$wcnt += @syms; |
121 |
$markov->seed (\@syms); |
122 |
$rmarkov->seed ([ reverse @syms ]); |
123 |
print "$i\n" if ++$i % 100 == 0; |
124 |
last if defined $ARGV[1] and $ARGV[1] < $i; |
125 |
} |
126 |
undef $data; |
127 |
print "markov finished!\n"; |
128 |
|
129 |
for (keys %words) { |
130 |
$words{$_} = $words{$_} / $wcnt; |
131 |
} |
132 |
|
133 |
#map { print "$_ : $words{$_}\n" } grep { $words{$_} < 0.00001 } keys %words; |
134 |
#die ":$wcnt :\n"; |
135 |
|
136 |
my $is = new Net::IRC::Server; |
137 |
|
138 |
my $sock = IO::Socket::INET->new( |
139 |
PeerAddr => 'irc.schmorp.de', |
140 |
PeerPort => 24, |
141 |
Proto => 'tcp'); |
142 |
|
143 |
if (!$sock) { die "Couldn't connect: $!\n" } |
144 |
|
145 |
sub ircsend { |
146 |
my ($cmd, $trail, @params) = @_; |
147 |
my $s = $is->mk_msg (undef, $cmd, $trail, @params); |
148 |
print ">$s\n"; |
149 |
$sock->syswrite ($s); |
150 |
} |
151 |
|
152 |
$is->set_cmd_cb ('*', sub { |
153 |
my ($cl, $msg) = @_; |
154 |
print "CM: ".join('=>', %$msg)."\n"; |
155 |
return 1; |
156 |
}); |
157 |
$is->set_cmd_cb ('376', sub { |
158 |
ircsend 'JOIN', undef, '#schmorp'; |
159 |
}); |
160 |
|
161 |
my $nick = "qubi"; |
162 |
|
163 |
my %topics; |
164 |
use List::Util; |
165 |
|
166 |
sub uniq { my %uniq; grep !$uniq{$_}++, @_ } |
167 |
|
168 |
$is->set_cmd_cb ('PRIVMSG', sub { |
169 |
my ($c, $m) = @_; |
170 |
if ($m->{params}[0] eq '#schmorp') { |
171 |
my $s = $m->{params}[1]; |
172 |
if ($s =~ m/^${nick}:\s*(.+?)\s*$/) { |
173 |
my $ms = $1; |
174 |
$ms =~ s/^\s+//; |
175 |
my $res; |
176 |
my $repval = -1e30; |
177 |
|
178 |
my @words = split /\b|\s+/, $ms; |
179 |
my @a = @words; |
180 |
|
181 |
$topics{$_}++ for uniq grep $words{$_} < ($wcnt/1000), @words; |
182 |
|
183 |
my @topics = keys %topics; |
184 |
@topics = 'lukas' unless @topics; |
185 |
for (1..200) { |
186 |
print "$_\n" if $_ % 100 == 0; |
187 |
my @r = reverse $rmarkov->complete ([ reverse $markov->complete ( [ $topics[$_ % @topics] ] ) ]); |
188 |
my $v = List::Util::sum map $topics{$_}, uniq @r; |
189 |
$v -= 0.2 * abs @r - 10; |
190 |
|
191 |
if ($repval < $v) { |
192 |
$repval = $v; |
193 |
$res = \@r; |
194 |
} |
195 |
|
196 |
} |
197 |
|
198 |
$_ *= 0.5 for values %topics; |
199 |
delete @topics{grep $topics{$_} < 0.2, keys %topics}; |
200 |
|
201 |
my $str = join " ", @$res; |
202 |
print join ('=>', %topics)."\n"; |
203 |
#$str =~ s/^\s+//; |
204 |
ircsend 'PRIVMSG', "$repval\: $str", '#schmorp'; |
205 |
} |
206 |
} |
207 |
}); |
208 |
|
209 |
my $s = IO::Select->new (); |
210 |
$s->add ($sock); |
211 |
|
212 |
print "??\n"; |
213 |
ircsend ('NICK', undef, $nick); |
214 |
ircsend ('USER', $nick, $nick, '*' ,'0'); |
215 |
|
216 |
while ((my @ready = $s->can_read (1000))) { |
217 |
for (@ready) { |
218 |
my $data; |
219 |
my $c = $_->sysread ($data, 2048); |
220 |
|
221 |
if ($c == 0) { |
222 |
$s->remove ($_); |
223 |
$_->close (); |
224 |
} else { |
225 |
print "recv $data\n"; |
226 |
$is->feed_irc_data ({}, $data); |
227 |
} |
228 |
} |
229 |
} |