From 8dd223f7782c8e01f9f868dda120b0ebfe5905ae Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Tue, 18 Jan 2011 22:30:59 -0600 Subject: [PATCH] Markov: a module to implement a chatterbot via markov chains. yeah, we have MegaHAL, but i can't find a good implementation in python that actually works and is stable, so we'll implement a simple thing ourselves. works pretty much like MegaHAL does, but without the string corruption. original code provided by ape, care of mike bloy --- modules/Markov.py | 229 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 modules/Markov.py diff --git a/modules/Markov.py b/modules/Markov.py new file mode 100644 index 0000000..30add76 --- /dev/null +++ b/modules/Markov.py @@ -0,0 +1,229 @@ +""" +Markov - Chatterbot via Markov chains for IRC +Copyright (C) 2010 Brian S. Stephan + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +""" + +import cPickle +import os +import random +import re +import sys + +from extlib import irclib + +from Module import Module + +class Markov(Module): + + """ + Create a chatterbot very similar to a MegaHAL, but simpler and + implemented in pure Python. Proof of concept code from Ape. + + Ape wrote: based on this: + http://uswaretech.com/blog/2009/06/pseudo-random-text-markov-chains-python/ + and this: + http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/ + """ + + def __init__(self, irc, config, server): + """Create the Markov chainer, and learn text from a file if available.""" + + Module.__init__(self, irc, config, server) + + self.brain_filename = 'dr.botzo.markov' + + # set up some keywords for use in the chains --- don't change these + # once you've created a brain + self.start1 = '__start1' + self.start2 = '__start2' + self.stop = '__stop' + + # set up regexes, for replying to specific stuff + trainpattern = '!markov\s+train\s+(.*)$' + learnpattern = '!markov\s+learn\s+(.*)$' + replypattern = '!markov\s+reply(\s+(.*)$|$)' + + self.trainre = re.compile(trainpattern) + self.learnre = re.compile(learnpattern) + self.replyre = re.compile(replypattern) + + try: + brainfile = open(self.brain_filename, 'r') + self.brain = cPickle.load(brainfile) + brainfile.close() + except IOError: + self.brain = {} + self.brain.setdefault((self.start1, self.start2), []).append(self.stop) + + def register_handlers(self, server): + """Handle pubmsg/privmsg, to learn and/or reply to IRC events.""" + + self.server.add_global_handler('pubmsg', self.learn_from_irc_event) + self.server.add_global_handler('privmsg', self.learn_from_irc_event) + + def unregister_handlers(self): + self.server.remove_global_handler('pubmsg', self.learn_from_irc_event) + self.server.remove_global_handler('privmsg', self.learn_from_irc_event) + + def save(self): + """Pickle the brain upon save.""" + + brainfile = open(self.brain_filename, 'w') + cPickle.dump(self.brain, brainfile) + brainfile.close() + + def learn_from_irc_event(self, connection, event): + """Learn from IRC events.""" + + what = ''.join(event.arguments()[0]) + + # don't learn from commands + if self.trainre.search(what) or self.learnre.search(what) or self.replyre.search(what): + return + + self._learn_line(what) + + def do(self, connection, event, nick, userhost, what, admin_unlocked): + """Handle commands and inputs.""" + + if self.trainre.search(what): + return self.markov_train(connection, event, nick, userhost, what, admin_unlocked) + elif self.learnre.search(what): + return self.markov_learn(connection, event, nick, userhost, what, admin_unlocked) + elif self.replyre.search(what): + return self.markov_reply(connection, event, nick, userhost, what, admin_unlocked) + + # not a command, so see if i'm being mentioned + if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None: + addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)' + addressed_re = re.compile(addressed_pattern) + if addressed_re.match(what): + # i was addressed directly, so respond, addressing the speaker + return '{0:s}: {1:s}'.format(nick, self._reply_to_line(addressed_re.match(what).group(1))) + else: + # i wasn't addressed directly, so just respond + return '{0:s}'.format(self._reply_to_line(what)) + + def markov_train(self, connection, event, nick, userhost, what, admin_unlocked): + """Learn lines from a file. Good for initializing a brain.""" + + match = self.trainre.search(what) + if match and admin_unlocked: + filename = match.group(1) + + try: + for line in open(filename, 'r'): + self._learn_line(line) + + return 'Learned from \'{0:s}\'.'.format(filename) + except IOError: + return 'No such file \'{0:s}\'.'.format(filename) + + def markov_learn(self, connection, event, nick, userhost, what, admin_unlocked): + """Learn one line, as provided to the command.""" + + match = self.learnre.search(what) + if match: + line = match.group(1) + self._learn_line(line) + + def markov_reply(self, connection, event, nick, userhost, what, admin_unlocked): + """Generate a reply to one line, without learning it.""" + + match = self.replyre.search(what) + if match: + if match.group(2): + line = match.group(2) + return self._reply_to_line(line) + else: + return self._reply() + + def _learn_line(self, line): + """Create Markov chains from the provided line.""" + + # set up the head of the chain + w1 = self.start1 + w2 = self.start2 + + # for each word pair, add the next word to the dictionary + for word in line.split(): + self.brain.setdefault((w1, w2), []).append(word.lower()) + w1, w2 = w2, word.lower() + + # cap the end of the chain + self.brain.setdefault((w1, w2), []).append(self.stop) + + def _reply(self, size=25): + """Generate a totally random string from the chains, of specified limit of words.""" + + # if the limit is too low, there's nothing to do + if (size <= 3): + raise Exception("size is too small: %d" % size) + + # start with an empty chain, and work from there + gen_words = [self.start1, self.start2] + + # walk a chain, randomly, building the list of words + while len(gen_words) < size + 2 and gen_words[-1] != self.stop: + gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])])) + + # chop off the seed data at the start + gen_words = gen_words[2:] + + # chop off the end text, if it was the keyword indicating an end of chain + if gen_words[-1] == self.stop: + gen_words = gen_words[:-1] + + return ' '.join(gen_words) + + def _reply_to_line(self, line, size=25): + """Reply to a line, using some text in the line as a point in the chain.""" + + # if the limit is too low, there's nothing to do + if (size <= 3): + raise Exception("size is too small: %d" % size) + + # get a random word from the input + words = line.split() + target_word = words[random.randint(0, len(words)-1)] + print('trying ' + target_word) + + # start with an empty chain, and work from there + gen_words = [self.start1, self.start2] + + # walk a chain, randomly, building the list of words + while len(gen_words) < size + 2 and gen_words[-1] != self.stop: + # use the chain that includes the target word, if it is found + if target_word in self.brain[(gen_words[-2], gen_words[-1])]: + print('found ' + target_word) + gen_words.append(target_word) + # generate new word + target_word = words[random.randint(0, len(words)-1)] + print('trying ' + target_word) + else: + gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])])) + + # chop off the seed data at the start + gen_words = gen_words[2:] + + # chop off the end text, if it was the keyword indicating an end of chain + if gen_words[-1] == self.stop: + gen_words = gen_words[:-1] + + return ' '.join(gen_words) + +# vi:tabstop=4:expandtab:autoindent +# kate: indent-mode python;indent-width 4;replace-tabs on;