#!/usr/bin/perl use strict; package markov; sub new { my ($class, @arg) = @_; bless { longest => 10, s_beg => [], # arbitarry "unique" start symbol s_end => [], # arbitrary "unique" end symbol tree => {}, @arg, }, $class; } sub simplify { local $_ = lc shift; y/üöä//d; s/ß/ss/g; y/a-z\000//cd; $_; } sub seed { my ($self, $symbols) = @_; my @sym = (@$symbols, $self->{s_end}); my @seq = $self->{s_beg}; my $tree = $self->{tree}; while () { my $next = shift @sym or last; shift @seq while @seq > $self->{longest}; for (1 .. @seq) { my $node = $tree->{simplify join "\0", @seq[-$_ .. -1]} ||= {}; $node->{$next}++; $node->{""}++; } push @seq, $next; } } sub complete { my ($self, $symbols, $prob) = @_; my $tree = $self->{tree}; my @sym = @$symbols; my @res = @sym; @sym = $self->{s_beg} unless @sym; shift @sym while @sym && !$tree->{simplify join "\0", @sym}; return unless @sym; #use Dumpvalue; Dumpvalue->new (compactDump => 1, veryCompact => 1, quoteHighBit => 1, tick => '"')->dumpValue ($self); outer: while () { my $node = $tree->{simplify join "\0", @sym}; if ($node) { my $sel = rand $node->{""}; keys %$node; while (my ($k, $v) = each %$node) { if (length $k and ($sel -= $v) < 0) { last outer if $k eq $self->{s_end}; push @sym, $k; push @res, $k; $prob->{$k} = $v / $node->{""}; next outer; } } die "FATAL: internal error"; } else { shift @sym; @sym or last; # die "FATAL: empty prefix (ENOTUNDERSTOOD)"; } } @res } package main; use IO::Select; use IO::Socket::INET; use Net::IRC::Server; use Data::Dumper; my %words; my $wcnt; my $markov = new markov longest => 4; my $rmarkov = new markov longest => 4; open FO, "<", $ARGV[0] or die "couldn't open $!"; #:raw:perlio:encoding(iso-8859-1) my $i = 0; my $data; while () { chomp; $data .= $_; } for (split /[.!?] /, $data) { my @syms = split /\s+/, $_; $words{markov::simplify $_}++ for @syms; $wcnt += @syms; $markov->seed (\@syms); $rmarkov->seed ([ reverse @syms ]); print "$i\n" if ++$i % 100 == 0; last if defined $ARGV[1] and $ARGV[1] < $i; } undef $data; print "markov finished!\n"; for (keys %words) { $words{$_} = $words{$_} / $wcnt; } #map { print "$_ : $words{$_}\n" } grep { $words{$_} < 0.00001 } keys %words; #die ":$wcnt :\n"; my $is = new Net::IRC::Server; my $sock = IO::Socket::INET->new( PeerAddr => 'irc.schmorp.de', PeerPort => 24, Proto => 'tcp'); if (!$sock) { die "Couldn't connect: $!\n" } sub ircsend { my ($cmd, $trail, @params) = @_; my $s = $is->mk_msg (undef, $cmd, $trail, @params); print ">$s\n"; $sock->syswrite ($s); } $is->set_cmd_cb ('*', sub { my ($cl, $msg) = @_; print "CM: ".join('=>', %$msg)."\n"; return 1; }); $is->set_cmd_cb ('376', sub { ircsend 'JOIN', undef, '#schmorp'; }); my $nick = "qubi"; my %topics; use List::Util; sub uniq { my %uniq; grep !$uniq{$_}++, @_ } $is->set_cmd_cb ('PRIVMSG', sub { my ($c, $m) = @_; if ($m->{params}[0] eq '#schmorp') { my $s = $m->{params}[1]; if ($s =~ m/^${nick}:\s*(.+?)\s*$/) { my $ms = $1; $ms =~ s/^\s+//; my $res; my $repval = -1e30; my @words = split /\b|\s+/, $ms; my @a = @words; $topics{$_}++ for uniq grep $words{$_} < ($wcnt/1000), @words; my @topics = keys %topics; @topics = 'lukas' unless @topics; for (1..200) { print "$_\n" if $_ % 100 == 0; my @r = reverse $rmarkov->complete ([ reverse $markov->complete ( [ $topics[$_ % @topics] ] ) ]); my $v = List::Util::sum map $topics{$_}, uniq @r; $v -= 0.2 * abs @r - 10; if ($repval < $v) { $repval = $v; $res = \@r; } } $_ *= 0.5 for values %topics; delete @topics{grep $topics{$_} < 0.2, keys %topics}; my $str = join " ", @$res; print join ('=>', %topics)."\n"; #$str =~ s/^\s+//; ircsend 'PRIVMSG', "$repval\: $str", '#schmorp'; } } }); my $s = IO::Select->new (); $s->add ($sock); print "??\n"; ircsend ('NICK', undef, $nick); ircsend ('USER', $nick, $nick, '*' ,'0'); while ((my @ready = $s->can_read (1000))) { for (@ready) { my $data; my $c = $_->sysread ($data, 2048); if ($c == 0) { $s->remove ($_); $_->close (); } else { print "recv $data\n"; $is->feed_irc_data ({}, $data); } } }