Markov: a module to implement a chatterbot via markov chains.
yeah, we have MegaHAL, but i can't find a good implementation in python that actually works and is stable, so we'll implement a simple thing ourselves. works pretty much like MegaHAL does, but without the string corruption. original code provided by ape, care of mike bloy
This commit is contained in:
parent
8f86b7484a
commit
8dd223f778
|
@ -0,0 +1,229 @@
|
|||
"""
|
||||
Markov - Chatterbot via Markov chains for IRC
|
||||
Copyright (C) 2010 Brian S. Stephan
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
import cPickle
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
|
||||
from extlib import irclib
|
||||
|
||||
from Module import Module
|
||||
|
||||
class Markov(Module):
|
||||
|
||||
"""
|
||||
Create a chatterbot very similar to a MegaHAL, but simpler and
|
||||
implemented in pure Python. Proof of concept code from Ape.
|
||||
|
||||
Ape wrote: based on this:
|
||||
http://uswaretech.com/blog/2009/06/pseudo-random-text-markov-chains-python/
|
||||
and this:
|
||||
http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/
|
||||
"""
|
||||
|
||||
def __init__(self, irc, config, server):
|
||||
"""Create the Markov chainer, and learn text from a file if available."""
|
||||
|
||||
Module.__init__(self, irc, config, server)
|
||||
|
||||
self.brain_filename = 'dr.botzo.markov'
|
||||
|
||||
# set up some keywords for use in the chains --- don't change these
|
||||
# once you've created a brain
|
||||
self.start1 = '__start1'
|
||||
self.start2 = '__start2'
|
||||
self.stop = '__stop'
|
||||
|
||||
# set up regexes, for replying to specific stuff
|
||||
trainpattern = '!markov\s+train\s+(.*)$'
|
||||
learnpattern = '!markov\s+learn\s+(.*)$'
|
||||
replypattern = '!markov\s+reply(\s+(.*)$|$)'
|
||||
|
||||
self.trainre = re.compile(trainpattern)
|
||||
self.learnre = re.compile(learnpattern)
|
||||
self.replyre = re.compile(replypattern)
|
||||
|
||||
try:
|
||||
brainfile = open(self.brain_filename, 'r')
|
||||
self.brain = cPickle.load(brainfile)
|
||||
brainfile.close()
|
||||
except IOError:
|
||||
self.brain = {}
|
||||
self.brain.setdefault((self.start1, self.start2), []).append(self.stop)
|
||||
|
||||
def register_handlers(self, server):
|
||||
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
|
||||
|
||||
self.server.add_global_handler('pubmsg', self.learn_from_irc_event)
|
||||
self.server.add_global_handler('privmsg', self.learn_from_irc_event)
|
||||
|
||||
def unregister_handlers(self):
|
||||
self.server.remove_global_handler('pubmsg', self.learn_from_irc_event)
|
||||
self.server.remove_global_handler('privmsg', self.learn_from_irc_event)
|
||||
|
||||
def save(self):
|
||||
"""Pickle the brain upon save."""
|
||||
|
||||
brainfile = open(self.brain_filename, 'w')
|
||||
cPickle.dump(self.brain, brainfile)
|
||||
brainfile.close()
|
||||
|
||||
def learn_from_irc_event(self, connection, event):
|
||||
"""Learn from IRC events."""
|
||||
|
||||
what = ''.join(event.arguments()[0])
|
||||
|
||||
# don't learn from commands
|
||||
if self.trainre.search(what) or self.learnre.search(what) or self.replyre.search(what):
|
||||
return
|
||||
|
||||
self._learn_line(what)
|
||||
|
||||
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
||||
"""Handle commands and inputs."""
|
||||
|
||||
if self.trainre.search(what):
|
||||
return self.markov_train(connection, event, nick, userhost, what, admin_unlocked)
|
||||
elif self.learnre.search(what):
|
||||
return self.markov_learn(connection, event, nick, userhost, what, admin_unlocked)
|
||||
elif self.replyre.search(what):
|
||||
return self.markov_reply(connection, event, nick, userhost, what, admin_unlocked)
|
||||
|
||||
# not a command, so see if i'm being mentioned
|
||||
if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None:
|
||||
addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)'
|
||||
addressed_re = re.compile(addressed_pattern)
|
||||
if addressed_re.match(what):
|
||||
# i was addressed directly, so respond, addressing the speaker
|
||||
return '{0:s}: {1:s}'.format(nick, self._reply_to_line(addressed_re.match(what).group(1)))
|
||||
else:
|
||||
# i wasn't addressed directly, so just respond
|
||||
return '{0:s}'.format(self._reply_to_line(what))
|
||||
|
||||
def markov_train(self, connection, event, nick, userhost, what, admin_unlocked):
|
||||
"""Learn lines from a file. Good for initializing a brain."""
|
||||
|
||||
match = self.trainre.search(what)
|
||||
if match and admin_unlocked:
|
||||
filename = match.group(1)
|
||||
|
||||
try:
|
||||
for line in open(filename, 'r'):
|
||||
self._learn_line(line)
|
||||
|
||||
return 'Learned from \'{0:s}\'.'.format(filename)
|
||||
except IOError:
|
||||
return 'No such file \'{0:s}\'.'.format(filename)
|
||||
|
||||
def markov_learn(self, connection, event, nick, userhost, what, admin_unlocked):
|
||||
"""Learn one line, as provided to the command."""
|
||||
|
||||
match = self.learnre.search(what)
|
||||
if match:
|
||||
line = match.group(1)
|
||||
self._learn_line(line)
|
||||
|
||||
def markov_reply(self, connection, event, nick, userhost, what, admin_unlocked):
|
||||
"""Generate a reply to one line, without learning it."""
|
||||
|
||||
match = self.replyre.search(what)
|
||||
if match:
|
||||
if match.group(2):
|
||||
line = match.group(2)
|
||||
return self._reply_to_line(line)
|
||||
else:
|
||||
return self._reply()
|
||||
|
||||
def _learn_line(self, line):
|
||||
"""Create Markov chains from the provided line."""
|
||||
|
||||
# set up the head of the chain
|
||||
w1 = self.start1
|
||||
w2 = self.start2
|
||||
|
||||
# for each word pair, add the next word to the dictionary
|
||||
for word in line.split():
|
||||
self.brain.setdefault((w1, w2), []).append(word.lower())
|
||||
w1, w2 = w2, word.lower()
|
||||
|
||||
# cap the end of the chain
|
||||
self.brain.setdefault((w1, w2), []).append(self.stop)
|
||||
|
||||
def _reply(self, size=25):
|
||||
"""Generate a totally random string from the chains, of specified limit of words."""
|
||||
|
||||
# if the limit is too low, there's nothing to do
|
||||
if (size <= 3):
|
||||
raise Exception("size is too small: %d" % size)
|
||||
|
||||
# start with an empty chain, and work from there
|
||||
gen_words = [self.start1, self.start2]
|
||||
|
||||
# walk a chain, randomly, building the list of words
|
||||
while len(gen_words) < size + 2 and gen_words[-1] != self.stop:
|
||||
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
|
||||
|
||||
# chop off the seed data at the start
|
||||
gen_words = gen_words[2:]
|
||||
|
||||
# chop off the end text, if it was the keyword indicating an end of chain
|
||||
if gen_words[-1] == self.stop:
|
||||
gen_words = gen_words[:-1]
|
||||
|
||||
return ' '.join(gen_words)
|
||||
|
||||
def _reply_to_line(self, line, size=25):
|
||||
"""Reply to a line, using some text in the line as a point in the chain."""
|
||||
|
||||
# if the limit is too low, there's nothing to do
|
||||
if (size <= 3):
|
||||
raise Exception("size is too small: %d" % size)
|
||||
|
||||
# get a random word from the input
|
||||
words = line.split()
|
||||
target_word = words[random.randint(0, len(words)-1)]
|
||||
print('trying ' + target_word)
|
||||
|
||||
# start with an empty chain, and work from there
|
||||
gen_words = [self.start1, self.start2]
|
||||
|
||||
# walk a chain, randomly, building the list of words
|
||||
while len(gen_words) < size + 2 and gen_words[-1] != self.stop:
|
||||
# use the chain that includes the target word, if it is found
|
||||
if target_word in self.brain[(gen_words[-2], gen_words[-1])]:
|
||||
print('found ' + target_word)
|
||||
gen_words.append(target_word)
|
||||
# generate new word
|
||||
target_word = words[random.randint(0, len(words)-1)]
|
||||
print('trying ' + target_word)
|
||||
else:
|
||||
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
|
||||
|
||||
# chop off the seed data at the start
|
||||
gen_words = gen_words[2:]
|
||||
|
||||
# chop off the end text, if it was the keyword indicating an end of chain
|
||||
if gen_words[-1] == self.stop:
|
||||
gen_words = gen_words[:-1]
|
||||
|
||||
return ' '.join(gen_words)
|
||||
|
||||
# vi:tabstop=4:expandtab:autoindent
|
||||
# kate: indent-mode python;indent-width 4;replace-tabs on;
|
Loading…
Reference in New Issue