2011-01-18 22:30:59 -06:00
|
|
|
"""
|
|
|
|
Markov - Chatterbot via Markov chains for IRC
|
|
|
|
Copyright (C) 2010 Brian S. Stephan
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import cPickle
|
2011-05-01 10:31:20 -05:00
|
|
|
from datetime import datetime
|
2011-01-18 22:30:59 -06:00
|
|
|
import os
|
|
|
|
import random
|
|
|
|
import re
|
2011-02-24 20:39:32 -06:00
|
|
|
import sqlite3
|
2011-01-18 22:30:59 -06:00
|
|
|
import sys
|
|
|
|
|
2011-05-01 10:31:20 -05:00
|
|
|
from dateutil.parser import *
|
|
|
|
from dateutil.relativedelta import *
|
2011-01-18 22:30:59 -06:00
|
|
|
from extlib import irclib
|
|
|
|
|
|
|
|
from Module import Module
|
|
|
|
|
|
|
|
class Markov(Module):
|
|
|
|
|
|
|
|
"""
|
|
|
|
Create a chatterbot very similar to a MegaHAL, but simpler and
|
|
|
|
implemented in pure Python. Proof of concept code from Ape.
|
2011-01-20 14:15:10 -06:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
Ape wrote: based on this:
|
|
|
|
http://uswaretech.com/blog/2009/06/pseudo-random-text-markov-chains-python/
|
|
|
|
and this:
|
|
|
|
http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/
|
|
|
|
"""
|
2011-01-20 14:15:10 -06:00
|
|
|
|
2011-04-30 15:43:59 -05:00
|
|
|
def timer_interval(self):
|
|
|
|
"""Do various conversation scoring and infinite reply checks."""
|
|
|
|
return 30
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
def __init__(self, irc, config, server):
|
|
|
|
"""Create the Markov chainer, and learn text from a file if available."""
|
|
|
|
|
|
|
|
# set up some keywords for use in the chains --- don't change these
|
|
|
|
# once you've created a brain
|
|
|
|
self.start1 = '__start1'
|
|
|
|
self.start2 = '__start2'
|
|
|
|
self.stop = '__stop'
|
|
|
|
|
|
|
|
# set up regexes, for replying to specific stuff
|
2011-01-19 10:20:20 -06:00
|
|
|
trainpattern = '^!markov\s+train\s+(.*)$'
|
|
|
|
learnpattern = '^!markov\s+learn\s+(.*)$'
|
2011-01-25 20:25:15 -06:00
|
|
|
replypattern = '^!markov\s+reply(\s+min=(\d+))?(\s+max=(\d+))?(\s+(.*)$|$)'
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
self.trainre = re.compile(trainpattern)
|
|
|
|
self.learnre = re.compile(learnpattern)
|
|
|
|
self.replyre = re.compile(replypattern)
|
|
|
|
|
2011-04-30 15:43:59 -05:00
|
|
|
self.shut_up = False
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen = []
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2011-02-24 20:39:32 -06:00
|
|
|
Module.__init__(self, irc, config, server)
|
|
|
|
|
|
|
|
def db_init(self):
|
|
|
|
"""Create the markov chain table."""
|
|
|
|
|
|
|
|
version = self.db_module_registered(self.__class__.__name__)
|
|
|
|
if (version == None):
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
db.execute('''
|
|
|
|
CREATE TABLE markov_chain (
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
k1 TEXT NOT NULL,
|
|
|
|
k2 TEXT NOT NULL,
|
|
|
|
v TEXT NOT NULL
|
|
|
|
)''')
|
|
|
|
sql = 'INSERT INTO drbotzo_modules VALUES (?,?)'
|
|
|
|
db.execute(sql, (self.__class__.__name__, 1))
|
|
|
|
db.commit()
|
|
|
|
version = 1
|
|
|
|
|
|
|
|
self._learn_line('')
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
|
|
|
print("sqlite error: " + str(e))
|
|
|
|
raise
|
2011-04-23 16:07:32 -05:00
|
|
|
if (version < 2):
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
db.execute('''
|
|
|
|
ALTER TABLE markov_chain
|
|
|
|
ADD COLUMN context TEXT DEFAULT NULL''')
|
|
|
|
db.execute('''
|
|
|
|
CREATE TABLE markov_context (
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
context TEXT NOT NULL
|
|
|
|
)''')
|
|
|
|
db.execute('''
|
|
|
|
CREATE TABLE markov_target_to_context_map (
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
target TEXT NOT NULL,
|
|
|
|
context_id INTEGER NOT NULL,
|
|
|
|
FOREIGN KEY(context_id) REFERENCES markov_context(id)
|
|
|
|
)''')
|
|
|
|
db.execute('UPDATE drbotzo_modules SET version = ? WHERE module = ?',
|
|
|
|
(2, self.__class__.__name__))
|
|
|
|
db.commit()
|
|
|
|
version = 2
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
2011-04-23 16:25:01 -05:00
|
|
|
if (version < 3):
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
db.execute('''
|
|
|
|
CREATE INDEX markov_chain_keys_index
|
|
|
|
ON markov_chain (k1, k2)''')
|
|
|
|
db.execute('UPDATE drbotzo_modules SET version = ? WHERE module = ?',
|
|
|
|
(3, self.__class__.__name__))
|
|
|
|
db.commit()
|
|
|
|
version = 3
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
2011-04-27 21:38:52 -05:00
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
|
|
|
if (version < 4):
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
db.execute('UPDATE drbotzo_modules SET version = ? WHERE module = ?',
|
|
|
|
(4, self.__class__.__name__))
|
|
|
|
db.commit()
|
|
|
|
version = 4
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
2011-04-23 16:25:01 -05:00
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
2011-05-01 09:47:45 -05:00
|
|
|
if (version < 5):
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
version = 5
|
|
|
|
db.execute('''
|
|
|
|
CREATE TABLE markov_chatter_target (
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
target TEXT NOT NULL
|
|
|
|
)''')
|
|
|
|
db.commit()
|
|
|
|
self.db_register_module_version(self.__class__.__name__, version)
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
2011-06-14 22:10:57 -05:00
|
|
|
if (version < 6):
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
version = 6
|
|
|
|
db.execute('''
|
|
|
|
CREATE INDEX markov_chain_keys_and_context_index
|
|
|
|
ON markov_chain (k1, k2, context)''')
|
|
|
|
db.commit()
|
|
|
|
self.db_register_module_version(self.__class__.__name__, version)
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
2011-01-18 22:30:59 -06:00
|
|
|
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
def register_handlers(self):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
|
|
|
|
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
self.server.add_global_handler('pubmsg', self.on_pub_or_privmsg, self.priority())
|
|
|
|
self.server.add_global_handler('privmsg', self.on_pub_or_privmsg, self.priority())
|
2011-01-18 22:30:59 -06:00
|
|
|
self.server.add_global_handler('pubmsg', self.learn_from_irc_event)
|
|
|
|
self.server.add_global_handler('privmsg', self.learn_from_irc_event)
|
|
|
|
|
|
|
|
def unregister_handlers(self):
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
self.server.remove_global_handler('pubmsg', self.on_pub_or_privmsg)
|
|
|
|
self.server.remove_global_handler('privmsg', self.on_pub_or_privmsg)
|
2011-01-18 22:30:59 -06:00
|
|
|
self.server.remove_global_handler('pubmsg', self.learn_from_irc_event)
|
|
|
|
self.server.remove_global_handler('privmsg', self.learn_from_irc_event)
|
|
|
|
|
|
|
|
def learn_from_irc_event(self, connection, event):
|
|
|
|
"""Learn from IRC events."""
|
|
|
|
|
|
|
|
what = ''.join(event.arguments()[0])
|
2011-04-22 19:40:36 -05:00
|
|
|
my_nick = connection.get_nickname()
|
|
|
|
what = re.sub('^' + my_nick + '[:,]\s+', '', what)
|
2011-04-23 16:07:32 -05:00
|
|
|
target = event.target()
|
2011-05-01 10:31:20 -05:00
|
|
|
nick = irclib.nm_to_n(event.source())
|
|
|
|
|
|
|
|
self.lines_seen.append((nick, datetime.now()))
|
|
|
|
self.connection = connection
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
# don't learn from commands
|
|
|
|
if self.trainre.search(what) or self.learnre.search(what) or self.replyre.search(what):
|
|
|
|
return
|
|
|
|
|
2011-04-23 16:07:32 -05:00
|
|
|
self._learn_line(what, target)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
|
|
"""Handle commands and inputs."""
|
|
|
|
|
2011-06-14 22:10:57 -05:00
|
|
|
target = event.target()
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
if self.trainre.search(what):
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
return self.reply(connection, event, self.markov_train(connection, event, nick, userhost, what, admin_unlocked))
|
2011-01-18 22:30:59 -06:00
|
|
|
elif self.learnre.search(what):
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
return self.reply(connection, event, self.markov_learn(connection, event, nick, userhost, what, admin_unlocked))
|
2011-04-30 15:43:59 -05:00
|
|
|
elif self.replyre.search(what) and not self.shut_up:
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
return self.reply(connection, event, self.markov_reply(connection, event, nick, userhost, what, admin_unlocked))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-04-30 15:43:59 -05:00
|
|
|
if not self.shut_up:
|
|
|
|
# not a command, so see if i'm being mentioned
|
|
|
|
if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None:
|
|
|
|
addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)'
|
|
|
|
addressed_re = re.compile(addressed_pattern)
|
|
|
|
if addressed_re.match(what):
|
|
|
|
# i was addressed directly, so respond, addressing the speaker
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2011-06-15 12:29:18 -05:00
|
|
|
return self.reply(connection, event, '{0:s}: {1:s}'.format(nick, self._generate_line(target, line=addressed_re.match(what).group(1))))
|
2011-04-30 15:43:59 -05:00
|
|
|
else:
|
|
|
|
# i wasn't addressed directly, so just respond
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2011-06-15 12:29:18 -05:00
|
|
|
return self.reply(connection, event, '{0:s}'.format(self._generate_line(target, line=what)))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
def markov_train(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
|
|
"""Learn lines from a file. Good for initializing a brain."""
|
|
|
|
|
|
|
|
match = self.trainre.search(what)
|
|
|
|
if match and admin_unlocked:
|
|
|
|
filename = match.group(1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
for line in open(filename, 'r'):
|
|
|
|
self._learn_line(line)
|
|
|
|
|
|
|
|
return 'Learned from \'{0:s}\'.'.format(filename)
|
|
|
|
except IOError:
|
|
|
|
return 'No such file \'{0:s}\'.'.format(filename)
|
|
|
|
|
|
|
|
def markov_learn(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
|
|
"""Learn one line, as provided to the command."""
|
|
|
|
|
2011-04-23 16:07:32 -05:00
|
|
|
target = event.target()
|
2011-01-18 22:30:59 -06:00
|
|
|
match = self.learnre.search(what)
|
|
|
|
if match:
|
|
|
|
line = match.group(1)
|
2011-04-23 16:07:32 -05:00
|
|
|
self._learn_line(line, target)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-01-24 16:51:05 -06:00
|
|
|
# return what was learned, for weird chaining purposes
|
|
|
|
return line
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
def markov_reply(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
|
|
"""Generate a reply to one line, without learning it."""
|
|
|
|
|
2011-06-14 22:10:57 -05:00
|
|
|
target = event.target()
|
2011-01-18 22:30:59 -06:00
|
|
|
match = self.replyre.search(what)
|
|
|
|
if match:
|
2011-01-25 20:25:15 -06:00
|
|
|
min_size = 15
|
|
|
|
max_size = 100
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
if match.group(2):
|
2011-01-25 20:25:15 -06:00
|
|
|
min_size = int(match.group(2))
|
|
|
|
if match.group(4):
|
|
|
|
max_size = int(match.group(4))
|
|
|
|
|
|
|
|
if match.group(5) != '':
|
|
|
|
line = match.group(6)
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2011-06-15 12:29:18 -05:00
|
|
|
return self._generate_line(target, line=line, min_size=min_size, max_size=max_size)
|
2011-01-18 22:30:59 -06:00
|
|
|
else:
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2011-06-15 12:29:18 -05:00
|
|
|
return self._generate_line(target, min_size=min_size, max_size=max_size)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-04-30 15:43:59 -05:00
|
|
|
def timer_do(self):
|
2011-05-01 10:31:20 -05:00
|
|
|
"""Do various things."""
|
|
|
|
|
|
|
|
self._do_shut_up_checks()
|
|
|
|
|
|
|
|
def _do_shut_up_checks(self):
|
|
|
|
"""Check to see if we've been talking too much, and shut up if so."""
|
|
|
|
|
|
|
|
self.shut_up = False
|
|
|
|
|
|
|
|
last_30_sec_lines = []
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2011-05-01 10:31:20 -05:00
|
|
|
for (nick,then) in self.lines_seen:
|
|
|
|
rdelta = relativedelta(datetime.now(), then)
|
|
|
|
if rdelta.years == 0 and rdelta.months == 0 and rdelta.days == 0 and rdelta.hours == 0 and rdelta.minutes == 0 and rdelta.seconds <= 29:
|
|
|
|
last_30_sec_lines.append((nick,then))
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2011-05-03 22:13:49 -05:00
|
|
|
if len(last_30_sec_lines) >= 15:
|
2011-05-01 10:31:20 -05:00
|
|
|
lines_i_said = len(filter(lambda (a,b): a == '.self.said.', last_30_sec_lines))
|
2011-05-03 22:13:49 -05:00
|
|
|
if lines_i_said >= 8:
|
2011-05-01 10:31:20 -05:00
|
|
|
self.shut_up = True
|
|
|
|
targets = self._get_chatter_targets()
|
|
|
|
for t in targets:
|
|
|
|
self.sendmsg(self.connection, t, 'shutting up for 30 seconds due to last 30 seconds of activity')
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2011-06-15 12:29:18 -05:00
|
|
|
def _learn_line(self, line, target):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Create Markov chains from the provided line."""
|
|
|
|
|
|
|
|
# set up the head of the chain
|
2011-02-24 20:39:32 -06:00
|
|
|
k1 = self.start1
|
|
|
|
k2 = self.start2
|
|
|
|
|
2011-06-15 12:29:18 -05:00
|
|
|
context = target
|
2011-04-23 16:07:32 -05:00
|
|
|
|
2011-02-24 21:06:29 -06:00
|
|
|
words = line.split()
|
2011-03-14 13:14:56 -05:00
|
|
|
if len(words) <= 0:
|
|
|
|
return line
|
|
|
|
|
2011-02-24 20:39:32 -06:00
|
|
|
try:
|
|
|
|
db = self.get_db()
|
|
|
|
cur = db.cursor()
|
2011-06-15 12:29:18 -05:00
|
|
|
statement = 'INSERT INTO markov_chain (k1, k2, v, context) VALUES (?, ?, ?, ?)'
|
|
|
|
for word in words:
|
|
|
|
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower(), context))
|
|
|
|
k1, k2 = k2, word
|
|
|
|
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop, context))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-02-24 20:39:32 -06:00
|
|
|
db.commit()
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
db.rollback()
|
|
|
|
print("sqlite error: " + str(e))
|
|
|
|
raise
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-06-15 12:29:18 -05:00
|
|
|
def _generate_line(self, target, line='', min_size=15, max_size=100):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Reply to a line, using some text in the line as a point in the chain."""
|
|
|
|
|
|
|
|
# if the limit is too low, there's nothing to do
|
2011-01-19 18:35:01 -06:00
|
|
|
if (max_size <= 3):
|
|
|
|
raise Exception("max_size is too small: %d" % max_size)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-01-19 18:44:07 -06:00
|
|
|
# if the min is too large, abort
|
|
|
|
if (min_size > 20):
|
|
|
|
raise Exception("min_size is too large: %d" % min_size)
|
|
|
|
|
2011-04-30 15:37:16 -05:00
|
|
|
words = []
|
|
|
|
target_word = ''
|
2011-01-18 22:30:59 -06:00
|
|
|
# get a random word from the input
|
2011-04-30 15:37:16 -05:00
|
|
|
if line != '':
|
|
|
|
words = line.split()
|
|
|
|
target_word = words[random.randint(0, len(words)-1)]
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-06-15 12:29:18 -05:00
|
|
|
context = target
|
2011-06-14 22:10:57 -05:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
# start with an empty chain, and work from there
|
2011-04-23 16:27:07 -05:00
|
|
|
gen_words = [self.start1, self.start2]
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
# walk a chain, randomly, building the list of words
|
2011-01-19 18:35:01 -06:00
|
|
|
while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
|
2011-06-14 22:10:57 -05:00
|
|
|
key_hits = self._retrieve_chains_for_key(gen_words[-2], gen_words[-1], context)
|
2011-01-18 22:30:59 -06:00
|
|
|
# use the chain that includes the target word, if it is found
|
2011-04-30 15:37:16 -05:00
|
|
|
if target_word != '' and target_word in key_hits:
|
2011-01-18 22:30:59 -06:00
|
|
|
gen_words.append(target_word)
|
|
|
|
# generate new word
|
|
|
|
target_word = words[random.randint(0, len(words)-1)]
|
|
|
|
else:
|
2011-02-24 20:39:32 -06:00
|
|
|
if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, key_hits)) > 0:
|
|
|
|
gen_words.append(random.choice(filter(lambda a: a != self.stop, key_hits)))
|
2011-03-17 17:24:11 -05:00
|
|
|
elif len(key_hits) <= 0:
|
|
|
|
gen_words.append(self.stop)
|
2011-01-19 18:44:07 -06:00
|
|
|
else:
|
2011-02-24 20:39:32 -06:00
|
|
|
gen_words.append(random.choice(key_hits))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
# chop off the seed data at the start
|
|
|
|
gen_words = gen_words[2:]
|
|
|
|
|
|
|
|
# chop off the end text, if it was the keyword indicating an end of chain
|
|
|
|
if gen_words[-1] == self.stop:
|
|
|
|
gen_words = gen_words[:-1]
|
|
|
|
|
2011-02-25 20:59:57 -06:00
|
|
|
return ' '.join(gen_words).encode('utf-8', 'ignore')
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-06-14 22:10:57 -05:00
|
|
|
def _retrieve_chains_for_key(self, k1, k2, context):
|
2011-02-24 20:39:32 -06:00
|
|
|
"""Get the value(s) for a given key (a pair of strings)."""
|
|
|
|
|
|
|
|
values = []
|
|
|
|
try:
|
|
|
|
db = self.get_db()
|
Markov: when looking up the start-of-sentence chain, get one random one
when finding a key for (__start1,__start2), instead of fetcihng all
(which can be a lot, in chatty channels and/or over time), get the
max ID in the table, pick a random ID between 1,max, and pick the
first id >= to it, and use that. just as random, nowhere near as
intensive.
2011-04-23 21:24:23 -05:00
|
|
|
query = ''
|
|
|
|
if k1 == self.start1 and k2 == self.start2:
|
|
|
|
# hack. get a quasi-random start from the database, in
|
|
|
|
# a faster fashion than selecting all starts
|
|
|
|
max_id = self._get_max_chain_id()
|
|
|
|
rand_id = random.randint(1,max_id)
|
2011-06-14 22:10:57 -05:00
|
|
|
query = 'SELECT v FROM markov_chain WHERE k1 = ? AND k2 = ? AND (context = ? OR context IS NULL) AND id >= {0:d} LIMIT 1'.format(rand_id)
|
Markov: when looking up the start-of-sentence chain, get one random one
when finding a key for (__start1,__start2), instead of fetcihng all
(which can be a lot, in chatty channels and/or over time), get the
max ID in the table, pick a random ID between 1,max, and pick the
first id >= to it, and use that. just as random, nowhere near as
intensive.
2011-04-23 21:24:23 -05:00
|
|
|
else:
|
2011-06-14 22:10:57 -05:00
|
|
|
query = 'SELECT v FROM markov_chain WHERE k1 = ? AND k2 = ? AND (context = ? OR context IS NULL)'
|
|
|
|
cursor = db.execute(query, (k1,k2,context))
|
2011-02-24 20:39:32 -06:00
|
|
|
results = cursor.fetchall()
|
|
|
|
|
|
|
|
for result in results:
|
|
|
|
values.append(result['v'])
|
|
|
|
|
|
|
|
return values
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
|
|
|
|
2011-05-01 09:47:45 -05:00
|
|
|
def _get_chatter_targets(self):
|
|
|
|
"""Get all possible chatter targets."""
|
|
|
|
|
|
|
|
values = []
|
|
|
|
try:
|
|
|
|
# need to create our own db object, since this is likely going to be in a new thread
|
|
|
|
dbfile = self.config.get('dr.botzo', 'database')
|
2011-05-01 10:41:59 -05:00
|
|
|
db = sqlite3.connect(dbfile)
|
|
|
|
db.row_factory = sqlite3.Row
|
2011-05-01 09:47:45 -05:00
|
|
|
query = 'SELECT target FROM markov_chatter_target'
|
|
|
|
cursor = db.execute(query)
|
|
|
|
results = cursor.fetchall()
|
|
|
|
|
|
|
|
for result in results:
|
|
|
|
values.append(result['target'])
|
|
|
|
|
|
|
|
return values
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
|
|
|
|
|
|
|
def _get_one_chatter_target(self):
|
|
|
|
"""Select one random chatter target."""
|
|
|
|
|
|
|
|
targets = self._get_chatter_targets()
|
|
|
|
if targets:
|
|
|
|
return targets[random.randint(0, len(targets)-1)]
|
|
|
|
|
Markov: when looking up the start-of-sentence chain, get one random one
when finding a key for (__start1,__start2), instead of fetcihng all
(which can be a lot, in chatty channels and/or over time), get the
max ID in the table, pick a random ID between 1,max, and pick the
first id >= to it, and use that. just as random, nowhere near as
intensive.
2011-04-23 21:24:23 -05:00
|
|
|
def _get_max_chain_id(self):
|
|
|
|
"""Get the highest id in the chain table."""
|
|
|
|
|
|
|
|
try:
|
|
|
|
db = self.get_db()
|
|
|
|
query = '''
|
|
|
|
SELECT id FROM markov_chain ORDER BY id DESC LIMIT 1
|
|
|
|
'''
|
|
|
|
cursor = db.execute(query)
|
|
|
|
result = cursor.fetchone()
|
|
|
|
if result:
|
|
|
|
return result['id']
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
except sqlite3.Error as e:
|
|
|
|
print('sqlite error: ' + str(e))
|
|
|
|
raise
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
# vi:tabstop=4:expandtab:autoindent
|
|
|
|
# kate: indent-mode python;indent-width 4;replace-tabs on;
|