1 |
#!/opt/bin/perl |
2 |
|
3 |
use strict; |
4 |
|
5 |
package markov; |
6 |
|
7 |
sub new { |
8 |
my ($class, @arg) = @_; |
9 |
|
10 |
bless { |
11 |
longest => 10, |
12 |
s_beg => [], # arbitarry "unique" start symbol |
13 |
s_end => [], # arbitrary "unique" end symbol |
14 |
tree => {}, |
15 |
@arg, |
16 |
}, $class; |
17 |
} |
18 |
|
19 |
sub simplify { |
20 |
local $_ = lc shift; |
21 |
y/aeiouüöä//d; |
22 |
s/ß/ss/g; |
23 |
y/a-z\000//cd; |
24 |
$_; |
25 |
} |
26 |
|
27 |
sub seed { |
28 |
my ($self, $symbols) = @_; |
29 |
|
30 |
my @sym = (@$symbols, $self->{s_end}); |
31 |
my @seq = $self->{s_beg}; |
32 |
my $tree = $self->{tree}; |
33 |
|
34 |
while () { |
35 |
my $next = shift @sym |
36 |
or last; |
37 |
|
38 |
shift @seq while @seq > $self->{longest}; |
39 |
|
40 |
for (1 .. @seq) { |
41 |
my $node = $tree->{simplify join "\0", @seq[-$_ .. -1]} ||= {}; |
42 |
$node->{$next}++; |
43 |
$node->{""}++; |
44 |
} |
45 |
|
46 |
push @seq, $next; |
47 |
} |
48 |
} |
49 |
|
50 |
sub complete { |
51 |
my ($self, $symbols, $prob) = @_; |
52 |
|
53 |
my $tree = $self->{tree}; |
54 |
my @sym = @$symbols; |
55 |
my @res = @sym; |
56 |
|
57 |
@sym = $self->{s_beg} unless @sym; |
58 |
|
59 |
shift @sym while @sym && !$tree->{simplify join "\0", @sym}; |
60 |
|
61 |
return unless @sym; |
62 |
|
63 |
#use Dumpvalue; Dumpvalue->new (compactDump => 1, veryCompact => 1, quoteHighBit => 1, tick => '"')->dumpValue ($self); |
64 |
|
65 |
outer: |
66 |
while () { |
67 |
my $node = $tree->{simplify join "\0", @sym}; |
68 |
|
69 |
if ($node) { |
70 |
my $sel = rand $node->{""}; |
71 |
keys %$node; |
72 |
|
73 |
while (my ($k, $v) = each %$node) { |
74 |
if (length $k and ($sel -= $v) < 0) { |
75 |
last outer if $k eq $self->{s_end}; |
76 |
|
77 |
push @sym, $k; |
78 |
push @res, $k; |
79 |
|
80 |
$prob->{$k} = $v / $node->{""}; |
81 |
|
82 |
next outer; |
83 |
} |
84 |
} |
85 |
|
86 |
die "FATAL: internal error"; |
87 |
} else { |
88 |
shift @sym; |
89 |
@sym |
90 |
or die "FATAL: empty prefix (ENOTUNDERSTOOD)"; |
91 |
} |
92 |
} |
93 |
|
94 |
@res |
95 |
} |
96 |
|
97 |
package main; |
98 |
|
99 |
use Socket; |
100 |
use IO::Socket::INET; |
101 |
|
102 |
use YAML; |
103 |
use Encode; |
104 |
use Event; |
105 |
use Net::Knuddels; |
106 |
use List::Util; |
107 |
use Time::HiRes; |
108 |
|
109 |
my @CHANNELS = ( |
110 |
'Flirt', |
111 |
'Flirt Private', |
112 |
'Singles 11-14', |
113 |
'Singles 15-17', |
114 |
|
115 |
'Singles 11-14 2', |
116 |
'Singles 15-17 2', |
117 |
'Flirt 2', |
118 |
'Flirt Private 2', |
119 |
'Singles 11-14 3', |
120 |
'Singles 15-17 3', |
121 |
'Flirt 3', |
122 |
'Flirt Private 3', |
123 |
'Singles 11-14 4', |
124 |
'Singles 15-17 4', |
125 |
'Flirt 4', |
126 |
'Flirt Private 4', |
127 |
'Singles 11-14 5', |
128 |
'Singles 15-17 5', |
129 |
'Flirt 5', |
130 |
'Flirt Private 5', |
131 |
'Singles 11-14 6', |
132 |
'Singles 15-17 6', |
133 |
'Flirt 6', |
134 |
'Flirt Private 6', |
135 |
'Singles 11-14 7', |
136 |
'Singles 15-17 7', |
137 |
'Flirt 7', |
138 |
'Flirt Private 7', |
139 |
'Singles 11-14 8', |
140 |
'Singles 15-17 8', |
141 |
'Flirt 8', |
142 |
'Flirt Private 8', |
143 |
); |
144 |
|
145 |
my $logdir = "logs"; |
146 |
|
147 |
my $Knick = $ARGV[0]; |
148 |
my $Kpass = $ARGV[1]; |
149 |
|
150 |
my $client; |
151 |
|
152 |
my $seed = [split /\n/, do { open my $fh, "<:utf8", "markovbot.txt"; local $/; <$fh> } ]; |
153 |
|
154 |
my $fwd = new markov longest => 2; |
155 |
my $rev = new markov longest => 2; |
156 |
|
157 |
my %freq; |
158 |
my $word_cnt; |
159 |
|
160 |
sub word { |
161 |
$_[0] =~ /(\w+)/ ? lc $1 : (); |
162 |
} |
163 |
|
164 |
sub seed_msg { |
165 |
my $msg = $_[0]; |
166 |
|
167 |
return if $msg =~ /leck|fick|möse|mose|moese|uschi|usci|ushi|fikk|scheide|vagina |
168 |
|bums|wichs|wix|popp|popen|fuck|dreck|laber|saugen|pussy|\bsuck\b|lutsch |
169 |
|knallen|schlecken |
170 |
|\bblas|piss|schluck|fingern|spritz|\bloch|\bsteck|vögel|voegel|vogel |
171 |
|hure|strich|sklave|handschell|slave|perver|befehl|stöhn|dildo |
172 |
|stengel|penis|schwanz|pimmel|steifen|ejak|sperma|\btitt|\bass|\bsack|\bsaft\b|\beier\b|willy |
173 |
|\bcs\b|\bts\b|\brs\b|\bicq\b |
174 |
|cam\b|\bmsn\b|\bbot\b|\bchat.*bot\b|tanga|schlafen|sex|\bsau\b |
175 |
|intim|arsch|fotze|dumm|schnauze|klappe|rasiert|fresse|maul\b|\bmaul|rosett?e |
176 |
|zieh.*aus|nackt|geile|feucht|willig |
177 |
|sätz|saetz|setze|\bsatz |
178 |
|sc?h?wul|schwuchtel|transe|transv|schlampe|tussi|nutte|mädel |
179 |
|admin|nachgeburt |
180 |
|(?-i:[A-Z]{4,}) |
181 |
|\S{20,} |
182 |
/xi; |
183 |
|
184 |
my @msg = $msg =~ m/(\S+)/g; |
185 |
#my @msg = split /\b/, $msg; |
186 |
|
187 |
$freq{word $_}++ for @msg; |
188 |
$word_cnt += @msg; |
189 |
|
190 |
$fwd->seed (\@msg); |
191 |
$rev->seed ([reverse @msg]); |
192 |
} |
193 |
|
194 |
for (@$seed) { |
195 |
last if /^$/; |
196 |
seed_msg $_; |
197 |
} |
198 |
|
199 |
my %grammar_reply = qw( |
200 |
ich du |
201 |
du ich |
202 |
mir dir |
203 |
dir mir |
204 |
mein dein |
205 |
dein mein |
206 |
deine meine |
207 |
meine deine |
208 |
deiner meiner |
209 |
meiner deiner |
210 |
frau mann |
211 |
mädel junge |
212 |
mädchen junge |
213 |
girls boys |
214 |
boys girls |
215 |
huhu hi |
216 |
hi hi |
217 |
hallo hi |
218 |
typen mädels |
219 |
bye bye |
220 |
ciao bye |
221 |
); |
222 |
|
223 |
sub gen_reply { |
224 |
my ($msg) = @_; |
225 |
|
226 |
my @msg = $msg =~ /(\S+)/g; |
227 |
|
228 |
my @key = map { |
229 |
my $word = word $_; |
230 |
|
231 |
$freq{$word} < $word_cnt * 0.003 |
232 |
&& $freq{$word} |
233 |
&& 2 <= length $word |
234 |
? $word |
235 |
: () |
236 |
} @msg; |
237 |
|
238 |
my @srch = (("") x 5, @key, map { |
239 |
my $word = word $_; |
240 |
|
241 |
$grammar_reply{$word} || () |
242 |
} @msg); |
243 |
|
244 |
my $reply; |
245 |
my $best = -1; |
246 |
my $idx; |
247 |
|
248 |
#warn "KEY<@key> SRCH<@srch>\n";#d# |
249 |
|
250 |
for (1..200) { |
251 |
my $prob = {}; |
252 |
|
253 |
my @r = $rev->complete ( |
254 |
[reverse $fwd->complete ( |
255 |
[ |
256 |
$srch[++$idx % @srch] |
257 |
], |
258 |
$prob |
259 |
) ], $prob |
260 |
); |
261 |
|
262 |
# my $b = (rand 0.02 / (@r + 1)) |
263 |
# + (@key ? (List::Util::sum map $prob->{$_} || 1, @key) / @key : 0); |
264 |
# |
265 |
# $b += 0.2 if @r < 3; |
266 |
|
267 |
$b = @r ** 0.2 * (rand) |
268 |
+ (@key ? (List::Util::sum map $prob->{$_} || 1, @key) / @key : 0); |
269 |
|
270 |
#my $b = (List::Util::sum map $freq{word $_}, @r) / (@r ** 3 * $word_cnt); |
271 |
|
272 |
($reply, $best) = ((join " ", reverse @r), $b) if $b > $best; |
273 |
} |
274 |
|
275 |
$reply; |
276 |
} |
277 |
|
278 |
while ($ENV{DEBUG}) { |
279 |
my $r = gen_reply scalar <>; |
280 |
print "$r\n\n"; |
281 |
} |
282 |
|
283 |
Event->signal (signal => "INT", cb => sub { Event::unloop(-1) }); |
284 |
|
285 |
Event->timer (after => 60, interval => 60, cb => sub { |
286 |
open my $fh, ">:utf8", "markovbot.txt~" |
287 |
or return; |
288 |
print $fh join "\n", @$seed; |
289 |
close $fh; |
290 |
rename "markovbot.txt~", "markovbot.txt"; |
291 |
}); |
292 |
|
293 |
sub logit { |
294 |
my ($msg, $file, $src, $dst, $room) = @_; |
295 |
|
296 |
mkdir $logdir; |
297 |
my $fh; |
298 |
|
299 |
unless (open $fh, ">>:utf8", Encode::encode_utf8 "$logdir/$file") { |
300 |
warn "Couldn't open for appending $logdir/$src: $!\n"; |
301 |
return; |
302 |
} |
303 |
|
304 |
print $fh "$room\t$src\t$dst\t$msg\n"; |
305 |
} |
306 |
|
307 |
#################################################################################### |
308 |
########################## MAIN START ############################################## |
309 |
#################################################################################### |
310 |
|
311 |
$client = new Net::Knuddels::Client |
312 |
PeerAddr => "213.61.5.150:2710", |
313 |
command_wait => sub { |
314 |
my ($client, $wait) = @_; |
315 |
Event->timer (after => $wait, cb => sub { $client->command_cb }); |
316 |
}; |
317 |
|
318 |
Event->io ( |
319 |
fd => $client->fh, |
320 |
poll => 'r', |
321 |
cb => sub { |
322 |
$client->ready |
323 |
or $_[0]->w->cancel; |
324 |
}); |
325 |
|
326 |
$client->login; |
327 |
|
328 |
$client->register (dialog => sub { |
329 |
use Dumpvalue; |
330 |
print "---\n"; |
331 |
Dumpvalue->new (compactDump => 1, veryCompact => 1, quoteHighBit => 1, tick => '"')->dumpValue ([@_]); |
332 |
Event::unloop(-1) if grep /Falsches.*Passwort/i, @_; |
333 |
}); |
334 |
|
335 |
$client->register (login => sub { |
336 |
Event->timer (after => 0, interval => 3, cb => sub { |
337 |
$client->enter_room (shift @CHANNELS, $Knick, $Kpass); |
338 |
}); |
339 |
}); |
340 |
|
341 |
$client->register (msg_room => sub { |
342 |
my ($room, $user, $msg) = @_; |
343 |
}); |
344 |
|
345 |
my @queue; |
346 |
|
347 |
Event->timer (interval => 1, cb => sub { |
348 |
my $msg = shift @queue |
349 |
or return; |
350 |
|
351 |
logit ($msg->[2], $msg->[0], $Knick, $msg->[0], $msg->[1]); |
352 |
$client->send_priv_msg (@$msg); |
353 |
}); |
354 |
|
355 |
my $some_room; |
356 |
|
357 |
$client->register (room_info => sub { |
358 |
print "JOIN ROOM: $_[0]\n"; |
359 |
$some_room = $_[0]; |
360 |
}); |
361 |
|
362 |
Event->timer (after => 60, interval => 60, cb => sub { |
363 |
$client->send_priv_msg ("James", $some_room, "/knuschel"); |
364 |
}); |
365 |
|
366 |
my %next_time; |
367 |
|
368 |
$client->register (msg_priv_nondup => sub { |
369 |
my ($room, $src, $dst, $msg) = @_; |
370 |
|
371 |
my $NOW = Time::HiRes::time; |
372 |
|
373 |
$msg =~ s/\260[^\260]*\260//g; |
374 |
|
375 |
print "($room) $src >> $msg\n"; |
376 |
logit ($msg, $src, $src, $dst, $room); |
377 |
|
378 |
return if $next_time{$src} > time; # do not talk unnaturally often |
379 |
|
380 |
my $reply = gen_reply $msg; |
381 |
|
382 |
push @$seed, $msg; |
383 |
#seed_msg $msg;#d# |
384 |
|
385 |
my $delay = 2 + 30 * (rand) ** 5 + 0.2 * length $reply; |
386 |
$next_time{$src} = time + $delay; |
387 |
|
388 |
print "($room) $src << $reply ($delay)\n"; |
389 |
|
390 |
Event->timer (at => $NOW + $delay, cb => sub { |
391 |
push @queue, [$src, $room, $reply]; |
392 |
}); |
393 |
}); |
394 |
|
395 |
Event::loop; |
396 |
|