2011-01-18 22:30:59 -06:00
|
|
|
"""
|
|
|
|
Markov - Chatterbot via Markov chains for IRC
|
|
|
|
Copyright (C) 2010 Brian S. Stephan
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2013-02-07 23:51:41 -06:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
"""
|
|
|
|
|
2011-05-01 10:31:20 -05:00
|
|
|
from datetime import datetime
|
2011-01-18 22:30:59 -06:00
|
|
|
import random
|
|
|
|
import re
|
2011-06-20 21:18:55 -05:00
|
|
|
import thread
|
|
|
|
import time
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2013-02-07 23:51:41 -06:00
|
|
|
from dateutil.relativedelta import relativedelta
|
2012-07-27 02:18:01 -05:00
|
|
|
import MySQLdb as mdb
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
from extlib import irclib
|
|
|
|
|
|
|
|
from Module import Module
|
|
|
|
|
|
|
|
class Markov(Module):
|
|
|
|
|
2013-02-07 23:51:41 -06:00
|
|
|
"""Create a chatterbot very similar to a MegaHAL, but simpler and
|
2011-01-18 22:30:59 -06:00
|
|
|
implemented in pure Python. Proof of concept code from Ape.
|
2011-01-20 14:15:10 -06:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
Ape wrote: based on this:
|
|
|
|
http://uswaretech.com/blog/2009/06/pseudo-random-text-markov-chains-python/
|
|
|
|
and this:
|
|
|
|
http://code.activestate.com/recipes/194364-the-markov-chain-algorithm/
|
2013-02-07 23:51:41 -06:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
"""
|
2011-01-20 14:15:10 -06:00
|
|
|
|
2012-12-19 21:06:53 -06:00
|
|
|
def __init__(self, irc, config):
|
2013-02-07 23:51:41 -06:00
|
|
|
"""Create the Markov chainer, and learn text from a file if
|
|
|
|
available.
|
|
|
|
|
|
|
|
"""
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
# set up some keywords for use in the chains --- don't change these
|
|
|
|
# once you've created a brain
|
|
|
|
self.start1 = '__start1'
|
|
|
|
self.start2 = '__start2'
|
|
|
|
self.stop = '__stop'
|
|
|
|
|
|
|
|
# set up regexes, for replying to specific stuff
|
2011-01-19 10:20:20 -06:00
|
|
|
learnpattern = '^!markov\s+learn\s+(.*)$'
|
2011-01-25 20:25:15 -06:00
|
|
|
replypattern = '^!markov\s+reply(\s+min=(\d+))?(\s+max=(\d+))?(\s+(.*)$|$)'
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
self.learnre = re.compile(learnpattern)
|
|
|
|
self.replyre = re.compile(replypattern)
|
|
|
|
|
2011-04-30 15:43:59 -05:00
|
|
|
self.shut_up = False
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen = []
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2012-12-19 21:06:53 -06:00
|
|
|
Module.__init__(self, irc, config)
|
2011-02-24 20:39:32 -06:00
|
|
|
|
2011-06-20 21:18:55 -05:00
|
|
|
self.next_shut_up_check = 0
|
2011-06-20 22:49:25 -05:00
|
|
|
self.next_chatter_check = 0
|
2011-06-20 21:18:55 -05:00
|
|
|
thread.start_new_thread(self.thread_do, ())
|
|
|
|
|
2013-02-07 23:51:41 -06:00
|
|
|
irc.xmlrpc_register_function(self._generate_line,
|
|
|
|
"markov_generate_line")
|
2012-04-05 21:24:41 -05:00
|
|
|
|
2011-02-24 20:39:32 -06:00
|
|
|
def db_init(self):
|
|
|
|
"""Create the markov chain table."""
|
|
|
|
|
|
|
|
version = self.db_module_registered(self.__class__.__name__)
|
2012-07-27 02:18:01 -05:00
|
|
|
if version == None:
|
2011-02-24 20:39:32 -06:00
|
|
|
db = self.get_db()
|
|
|
|
try:
|
2012-07-27 02:18:01 -05:00
|
|
|
version = 1
|
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
|
|
|
cur.execute('''
|
2012-02-28 23:23:14 -06:00
|
|
|
CREATE TABLE markov_chatter_target (
|
2012-07-27 02:18:01 -05:00
|
|
|
id SERIAL,
|
|
|
|
target VARCHAR(256) NOT NULL,
|
2012-02-28 23:23:14 -06:00
|
|
|
chance INTEGER NOT NULL DEFAULT 99999
|
2012-07-27 14:57:41 -05:00
|
|
|
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
2012-07-27 02:18:01 -05:00
|
|
|
''')
|
|
|
|
cur.execute('''
|
2011-04-23 16:07:32 -05:00
|
|
|
CREATE TABLE markov_context (
|
2012-07-27 02:18:01 -05:00
|
|
|
id SERIAL,
|
|
|
|
context VARCHAR(256) NOT NULL
|
2012-07-27 14:57:41 -05:00
|
|
|
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
2012-07-27 02:18:01 -05:00
|
|
|
''')
|
|
|
|
cur.execute('''
|
2011-04-23 16:07:32 -05:00
|
|
|
CREATE TABLE markov_target_to_context_map (
|
2012-07-27 02:18:01 -05:00
|
|
|
id SERIAL,
|
|
|
|
target VARCHAR(256) NOT NULL,
|
|
|
|
context_id BIGINT(20) UNSIGNED NOT NULL,
|
2011-04-23 16:07:32 -05:00
|
|
|
FOREIGN KEY(context_id) REFERENCES markov_context(id)
|
2012-07-27 14:57:41 -05:00
|
|
|
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
2012-07-27 02:18:01 -05:00
|
|
|
''')
|
|
|
|
cur.execute('''
|
2012-02-28 23:23:14 -06:00
|
|
|
CREATE TABLE markov_chain (
|
2012-07-27 02:18:01 -05:00
|
|
|
id SERIAL,
|
|
|
|
k1 VARCHAR(128) NOT NULL,
|
|
|
|
k2 VARCHAR(128) NOT NULL,
|
|
|
|
v VARCHAR(128) NOT NULL,
|
|
|
|
context_id BIGINT(20) UNSIGNED NOT NULL,
|
2012-02-28 23:23:14 -06:00
|
|
|
FOREIGN KEY(context_id) REFERENCES markov_context(id)
|
2012-07-27 14:57:41 -05:00
|
|
|
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
2012-07-27 02:18:01 -05:00
|
|
|
''')
|
|
|
|
cur.execute('''
|
2012-02-28 23:23:14 -06:00
|
|
|
CREATE INDEX markov_chain_keys_and_context_id_index
|
|
|
|
ON markov_chain (k1, k2, context_id)''')
|
|
|
|
|
2012-07-27 02:18:01 -05:00
|
|
|
cur.execute('''
|
2012-02-28 23:23:14 -06:00
|
|
|
CREATE INDEX markov_chain_value_and_context_id_index
|
|
|
|
ON markov_chain (v, context_id)''')
|
|
|
|
|
2011-10-16 21:13:27 -05:00
|
|
|
db.commit()
|
2013-02-07 23:51:41 -06:00
|
|
|
self.db_register_module_version(self.__class__.__name__,
|
|
|
|
version)
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
2011-10-16 21:13:27 -05:00
|
|
|
db.rollback()
|
2012-07-27 02:18:01 -05:00
|
|
|
self.log.error("database error trying to create tables")
|
|
|
|
self.log.exception(e)
|
2011-10-16 21:13:27 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
2011-01-18 22:30:59 -06:00
|
|
|
|
rewrite recursion/alias code for the 500th time.
more of a moving of the code, actually, it now exists in (an overridden)
_handle_event, so that recursions happen against irc events directly,
rather than an already partially interpreted object.
with this change, modules don't need to implement do() nor do we have a
need for the internal_bus, which was doing an additional walk of the
modules after the irc event was already handled and turned into text. now
the core event handler does the recursion scans.
to support this, we bring back the old replypath trick and use it again,
so we know when to send a privmsg reply and when to return text so that
it may be chained in recursion. this feels old hat by now, but if you
haven't been following along, you should really look at the diff.
that's the meat of the change. the rest is updating modules to use
self.reply() and reimplementing (un)register_handlers where appropriate
2011-02-17 01:08:45 -06:00
|
|
|
def register_handlers(self):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
|
|
|
|
|
2013-02-07 23:51:41 -06:00
|
|
|
self.irc.server.add_global_handler('pubmsg', self.on_pub_or_privmsg,
|
|
|
|
self.priority())
|
|
|
|
self.irc.server.add_global_handler('privmsg', self.on_pub_or_privmsg,
|
|
|
|
self.priority())
|
|
|
|
self.irc.server.add_global_handler('pubmsg',
|
|
|
|
self.learn_from_irc_event)
|
|
|
|
self.irc.server.add_global_handler('privmsg',
|
|
|
|
self.learn_from_irc_event)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
def unregister_handlers(self):
|
2013-02-07 23:51:41 -06:00
|
|
|
self.irc.server.remove_global_handler('pubmsg',
|
|
|
|
self.on_pub_or_privmsg)
|
|
|
|
self.irc.server.remove_global_handler('privmsg',
|
|
|
|
self.on_pub_or_privmsg)
|
|
|
|
self.irc.server.remove_global_handler('pubmsg',
|
|
|
|
self.learn_from_irc_event)
|
|
|
|
self.irc.server.remove_global_handler('privmsg',
|
|
|
|
self.learn_from_irc_event)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
def learn_from_irc_event(self, connection, event):
|
|
|
|
"""Learn from IRC events."""
|
|
|
|
|
|
|
|
what = ''.join(event.arguments()[0])
|
2011-04-22 19:40:36 -05:00
|
|
|
my_nick = connection.get_nickname()
|
|
|
|
what = re.sub('^' + my_nick + '[:,]\s+', '', what)
|
2011-04-23 16:07:32 -05:00
|
|
|
target = event.target()
|
2011-05-01 10:31:20 -05:00
|
|
|
nick = irclib.nm_to_n(event.source())
|
|
|
|
|
2012-03-19 00:12:29 -05:00
|
|
|
if not irclib.is_channel(target):
|
|
|
|
target = nick
|
|
|
|
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append((nick, datetime.now()))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
# don't learn from commands
|
2012-03-19 00:12:29 -05:00
|
|
|
if self.learnre.search(what) or self.replyre.search(what):
|
2011-01-18 22:30:59 -06:00
|
|
|
return
|
|
|
|
|
2012-03-29 20:07:32 -05:00
|
|
|
self._learn_line(what, target, event)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
|
|
"""Handle commands and inputs."""
|
|
|
|
|
2011-06-14 22:10:57 -05:00
|
|
|
target = event.target()
|
|
|
|
|
2012-03-19 00:12:29 -05:00
|
|
|
if self.learnre.search(what):
|
2013-02-09 15:05:44 -06:00
|
|
|
return self.irc.reply(event, self.markov_learn(event,
|
2013-02-07 23:51:41 -06:00
|
|
|
nick, userhost, what, admin_unlocked))
|
2011-04-30 15:43:59 -05:00
|
|
|
elif self.replyre.search(what) and not self.shut_up:
|
2013-02-09 15:05:44 -06:00
|
|
|
return self.irc.reply(event, self.markov_reply(event,
|
2013-02-07 23:51:41 -06:00
|
|
|
nick, userhost, what, admin_unlocked))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-04-30 15:43:59 -05:00
|
|
|
if not self.shut_up:
|
|
|
|
# not a command, so see if i'm being mentioned
|
|
|
|
if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None:
|
|
|
|
addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)'
|
|
|
|
addressed_re = re.compile(addressed_pattern)
|
|
|
|
if addressed_re.match(what):
|
2013-02-07 23:51:41 -06:00
|
|
|
# i was addressed directly, so respond, addressing
|
|
|
|
# the speaker
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2012-12-19 20:51:35 -06:00
|
|
|
return self.irc.reply(event, '{0:s}: {1:s}'.format(nick,
|
2012-02-28 23:23:14 -06:00
|
|
|
self._generate_line(target, line=addressed_re.match(what).group(1))))
|
2011-04-30 15:43:59 -05:00
|
|
|
else:
|
|
|
|
# i wasn't addressed directly, so just respond
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2012-12-19 20:51:35 -06:00
|
|
|
return self.irc.reply(event, '{0:s}'.format(self._generate_line(target, line=what)))
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2013-02-09 15:05:44 -06:00
|
|
|
def markov_learn(self, event, nick, userhost, what, admin_unlocked):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Learn one line, as provided to the command."""
|
|
|
|
|
2011-04-23 16:07:32 -05:00
|
|
|
target = event.target()
|
2013-05-03 16:02:07 -05:00
|
|
|
|
|
|
|
if not irclib.is_channel(target):
|
|
|
|
target = nick
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
match = self.learnre.search(what)
|
|
|
|
if match:
|
|
|
|
line = match.group(1)
|
2012-03-29 20:07:32 -05:00
|
|
|
self._learn_line(line, target, event)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-01-24 16:51:05 -06:00
|
|
|
# return what was learned, for weird chaining purposes
|
|
|
|
return line
|
|
|
|
|
2013-02-09 15:05:44 -06:00
|
|
|
def markov_reply(self, event, nick, userhost, what, admin_unlocked):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Generate a reply to one line, without learning it."""
|
|
|
|
|
2011-06-14 22:10:57 -05:00
|
|
|
target = event.target()
|
2013-05-03 16:02:07 -05:00
|
|
|
|
|
|
|
if not irclib.is_channel(target):
|
|
|
|
target = nick
|
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
match = self.replyre.search(what)
|
|
|
|
if match:
|
2011-01-25 20:25:15 -06:00
|
|
|
min_size = 15
|
2013-02-09 14:44:45 -06:00
|
|
|
max_size = 30
|
2011-01-25 20:25:15 -06:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
if match.group(2):
|
2011-01-25 20:25:15 -06:00
|
|
|
min_size = int(match.group(2))
|
|
|
|
if match.group(4):
|
|
|
|
max_size = int(match.group(4))
|
|
|
|
|
|
|
|
if match.group(5) != '':
|
|
|
|
line = match.group(6)
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2011-06-15 12:29:18 -05:00
|
|
|
return self._generate_line(target, line=line, min_size=min_size, max_size=max_size)
|
2011-01-18 22:30:59 -06:00
|
|
|
else:
|
2011-05-01 10:31:20 -05:00
|
|
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
2011-06-15 12:29:18 -05:00
|
|
|
return self._generate_line(target, min_size=min_size, max_size=max_size)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-06-20 21:18:55 -05:00
|
|
|
def thread_do(self):
|
2011-05-01 10:31:20 -05:00
|
|
|
"""Do various things."""
|
|
|
|
|
2011-06-20 21:18:55 -05:00
|
|
|
while not self.is_shutdown:
|
|
|
|
self._do_shut_up_checks()
|
2011-06-20 22:49:25 -05:00
|
|
|
self._do_random_chatter_check()
|
2011-06-20 21:18:55 -05:00
|
|
|
time.sleep(1)
|
2011-05-01 10:31:20 -05:00
|
|
|
|
2011-06-20 22:49:25 -05:00
|
|
|
def _do_random_chatter_check(self):
|
|
|
|
"""Randomly say something to a channel."""
|
|
|
|
|
|
|
|
# don't immediately potentially chatter, let the bot
|
|
|
|
# join channels first
|
|
|
|
if self.next_chatter_check == 0:
|
|
|
|
self.next_chatter_check = time.time() + 600
|
|
|
|
|
|
|
|
if self.next_chatter_check < time.time():
|
|
|
|
self.next_chatter_check = time.time() + 600
|
|
|
|
|
|
|
|
targets = self._get_chatter_targets()
|
|
|
|
for t in targets:
|
2011-10-21 16:59:57 -05:00
|
|
|
if t['chance'] > 0:
|
|
|
|
a = random.randint(1, t['chance'])
|
|
|
|
if a == 1:
|
2013-02-09 15:11:38 -06:00
|
|
|
self.sendmsg(t['target'], self._generate_line(t['target']))
|
2011-06-20 22:49:25 -05:00
|
|
|
|
2011-05-01 10:31:20 -05:00
|
|
|
def _do_shut_up_checks(self):
|
|
|
|
"""Check to see if we've been talking too much, and shut up if so."""
|
|
|
|
|
2011-06-20 21:18:55 -05:00
|
|
|
if self.next_shut_up_check < time.time():
|
|
|
|
self.shut_up = False
|
|
|
|
self.next_shut_up_check = time.time() + 30
|
2011-05-01 10:31:20 -05:00
|
|
|
|
2011-06-20 21:18:55 -05:00
|
|
|
last_30_sec_lines = []
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2013-02-07 23:51:41 -06:00
|
|
|
for (nick, then) in self.lines_seen:
|
2011-06-20 21:18:55 -05:00
|
|
|
rdelta = relativedelta(datetime.now(), then)
|
2012-02-28 23:23:14 -06:00
|
|
|
if (rdelta.years == 0 and rdelta.months == 0 and rdelta.days == 0 and
|
|
|
|
rdelta.hours == 0 and rdelta.minutes == 0 and rdelta.seconds <= 29):
|
2013-02-07 23:51:41 -06:00
|
|
|
last_30_sec_lines.append((nick, then))
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2011-09-20 01:20:27 -05:00
|
|
|
if len(last_30_sec_lines) >= 8:
|
2013-02-07 23:51:41 -06:00
|
|
|
lines_i_said = len(filter(lambda (a, b): a == '.self.said.', last_30_sec_lines))
|
2011-06-20 21:18:55 -05:00
|
|
|
if lines_i_said >= 8:
|
|
|
|
self.shut_up = True
|
|
|
|
targets = self._get_chatter_targets()
|
|
|
|
for t in targets:
|
2013-02-09 15:11:38 -06:00
|
|
|
self.sendmsg(t['target'],
|
2012-02-28 23:23:14 -06:00
|
|
|
'shutting up for 30 seconds due to last 30 seconds of activity')
|
2011-04-30 15:43:59 -05:00
|
|
|
|
2012-03-29 20:07:32 -05:00
|
|
|
def _learn_line(self, line, target, event):
|
2011-01-18 22:30:59 -06:00
|
|
|
"""Create Markov chains from the provided line."""
|
|
|
|
|
|
|
|
# set up the head of the chain
|
2011-02-24 20:39:32 -06:00
|
|
|
k1 = self.start1
|
|
|
|
k2 = self.start2
|
|
|
|
|
2012-02-28 23:23:14 -06:00
|
|
|
context_id = self._get_context_id_for_target(target)
|
2011-04-23 16:07:32 -05:00
|
|
|
|
2012-03-29 20:07:32 -05:00
|
|
|
# don't learn recursion
|
|
|
|
if not event._recursing:
|
2011-06-16 21:25:22 -05:00
|
|
|
words = line.split()
|
2012-07-29 17:46:14 -05:00
|
|
|
if len(words) == 0:
|
2011-06-16 21:25:22 -05:00
|
|
|
return line
|
|
|
|
|
2012-07-27 02:18:01 -05:00
|
|
|
db = self.get_db()
|
2011-06-16 21:25:22 -05:00
|
|
|
try:
|
2012-07-27 02:18:01 -05:00
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
|
|
|
statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (%s, %s, %s, %s)'
|
2011-06-16 21:25:22 -05:00
|
|
|
for word in words:
|
2012-07-27 16:34:57 -05:00
|
|
|
cur.execute(statement, (k1, k2, word, context_id))
|
2011-06-16 21:25:22 -05:00
|
|
|
k1, k2 = k2, word
|
2012-07-27 16:34:57 -05:00
|
|
|
cur.execute(statement, (k1, k2, self.stop, context_id))
|
2011-06-16 21:25:22 -05:00
|
|
|
|
|
|
|
db.commit()
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
2011-06-16 21:25:22 -05:00
|
|
|
db.rollback()
|
2012-07-27 02:18:01 -05:00
|
|
|
self.log.error("database error learning line")
|
|
|
|
self.log.exception(e)
|
2011-06-16 21:25:22 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
def _generate_line(self, target, line='', min_size=15, max_size=30):
|
2013-02-07 23:51:41 -06:00
|
|
|
"""Create a line, optionally using some text in a seed as a point in
|
|
|
|
the chain.
|
2012-04-05 21:24:41 -05:00
|
|
|
|
|
|
|
Keyword arguments:
|
|
|
|
target - the target to retrieve the context for (i.e. a channel or nick)
|
|
|
|
line - the line to reply to, by picking a random word and seeding with it
|
2012-07-14 09:22:37 -05:00
|
|
|
min_size - the minimum desired size in words. not guaranteed
|
|
|
|
max_size - the maximum desired size in words. not guaranteed
|
2013-02-07 23:51:41 -06:00
|
|
|
|
2012-04-05 21:24:41 -05:00
|
|
|
"""
|
2011-01-18 22:30:59 -06:00
|
|
|
|
|
|
|
# if the limit is too low, there's nothing to do
|
2011-01-19 18:35:01 -06:00
|
|
|
if (max_size <= 3):
|
|
|
|
raise Exception("max_size is too small: %d" % max_size)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2011-01-19 18:44:07 -06:00
|
|
|
# if the min is too large, abort
|
|
|
|
if (min_size > 20):
|
|
|
|
raise Exception("min_size is too large: %d" % min_size)
|
|
|
|
|
2012-07-29 09:39:07 -05:00
|
|
|
seed_words = []
|
|
|
|
# shuffle the words in the input
|
|
|
|
seed_words = line.split()
|
|
|
|
random.shuffle(seed_words)
|
|
|
|
self.log.debug("seed words: {0:s}".format(seed_words))
|
|
|
|
|
|
|
|
# hit to generate a new seed word immediately if possible
|
|
|
|
seed_word = None
|
|
|
|
hit_word = None
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2012-02-28 23:23:14 -06:00
|
|
|
context_id = self._get_context_id_for_target(target)
|
2011-06-14 22:10:57 -05:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
# start with an empty chain, and work from there
|
2011-04-23 16:27:07 -05:00
|
|
|
gen_words = [self.start1, self.start2]
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
# build a response by creating multiple sentences
|
|
|
|
while len(gen_words) < max_size + 2:
|
|
|
|
# if we're past the min and on a stop, we can end
|
|
|
|
if len(gen_words) > min_size + 2:
|
|
|
|
if gen_words[-1] == self.stop:
|
|
|
|
break
|
|
|
|
|
2012-07-29 09:39:07 -05:00
|
|
|
# pick a word from the shuffled seed words, if we need a new one
|
|
|
|
if seed_word == hit_word:
|
|
|
|
if len(seed_words) > 0:
|
|
|
|
seed_word = seed_words.pop()
|
2013-02-09 14:44:45 -06:00
|
|
|
self.log.debug("picked new seed word: "
|
|
|
|
"{0:s}".format(seed_word))
|
2012-07-29 09:39:07 -05:00
|
|
|
else:
|
|
|
|
seed_word = None
|
|
|
|
self.log.debug("ran out of seed words")
|
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
# if we have a stop, the word before it might need to be
|
|
|
|
# made to look like a sentence end
|
|
|
|
if gen_words[-1] == self.stop:
|
|
|
|
# chop off the stop, temporarily
|
|
|
|
gen_words = gen_words[:-1]
|
|
|
|
|
|
|
|
# we should have a real word, make it look like a
|
|
|
|
# sentence end
|
|
|
|
sentence_end = gen_words[-1]
|
|
|
|
eos_punctuation = ['!', '?', ',', '.']
|
|
|
|
if sentence_end[-1] not in eos_punctuation:
|
|
|
|
random.shuffle(eos_punctuation)
|
|
|
|
gen_words[-1] = sentence_end + eos_punctuation.pop()
|
|
|
|
self.log.debug("monkeyed with end of sentence, it's "
|
|
|
|
"now: {0:s}".format(gen_words[-1]))
|
|
|
|
|
|
|
|
# put the stop back on
|
|
|
|
gen_words.append(self.stop)
|
|
|
|
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
|
|
|
|
|
|
# first, see if we should start a new sentence. if so,
|
|
|
|
# work backwards
|
|
|
|
if gen_words[-1] in (self.start2, self.stop) and seed_word is not None:
|
|
|
|
# drop a stop, since we're starting another sentence
|
|
|
|
if gen_words[-1] == self.stop:
|
|
|
|
gen_words = gen_words[:-1]
|
|
|
|
|
|
|
|
# work backwards from seed_word
|
2011-10-16 20:19:51 -05:00
|
|
|
working_backwards = []
|
2013-02-08 01:15:32 -06:00
|
|
|
back_k2 = self._retrieve_random_k2_for_value(seed_word, context_id)
|
|
|
|
if back_k2:
|
2012-07-29 09:39:07 -05:00
|
|
|
found_word = seed_word
|
2013-02-08 02:02:44 -06:00
|
|
|
if back_k2 == self.start2:
|
|
|
|
self.log.debug("random further back was start2, swallowing")
|
|
|
|
else:
|
|
|
|
working_backwards.append(back_k2)
|
2012-07-29 09:39:07 -05:00
|
|
|
working_backwards.append(found_word)
|
|
|
|
self.log.debug("started working backwards with: {0:s}".format(found_word))
|
2012-07-28 13:32:58 -05:00
|
|
|
self.log.debug("working_backwards: {0:s}".format(" ".join(working_backwards)))
|
2012-07-29 09:39:07 -05:00
|
|
|
|
|
|
|
# now work backwards until we randomly bump into a start
|
2013-02-08 00:01:22 -06:00
|
|
|
# to steer the chainer away from spending too much time on
|
|
|
|
# the weaker-context reverse chaining, we make max_size
|
|
|
|
# a non-linear distribution, making it more likely that
|
|
|
|
# some time is spent on better forward chains
|
2013-02-09 14:44:45 -06:00
|
|
|
max_back = min(random.randint(1, max_size/2) + random.randint(1, max_size/2),
|
|
|
|
max_size/4)
|
2013-02-08 00:01:22 -06:00
|
|
|
self.log.debug("max_back: {0:d}".format(max_back))
|
2013-02-08 01:15:32 -06:00
|
|
|
while len(working_backwards) < max_back:
|
|
|
|
back_k2 = self._retrieve_random_k2_for_value(working_backwards[0], context_id)
|
|
|
|
if back_k2 == self.start2:
|
|
|
|
self.log.debug("random further back was start2, finishing")
|
2012-07-29 22:36:11 -05:00
|
|
|
break
|
2013-02-08 01:15:32 -06:00
|
|
|
elif back_k2:
|
|
|
|
working_backwards.insert(0, back_k2)
|
|
|
|
self.log.debug("added '{0:s}' to working_backwards".format(back_k2))
|
|
|
|
self.log.debug("working_backwards: {0:s}".format(" ".join(working_backwards)))
|
|
|
|
else:
|
|
|
|
self.log.debug("nothing (at all!?) further back, finishing")
|
2011-10-16 21:13:27 -05:00
|
|
|
break
|
2011-10-16 20:19:51 -05:00
|
|
|
|
2013-02-08 02:13:15 -06:00
|
|
|
gen_words += working_backwards
|
|
|
|
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
2013-02-09 14:44:45 -06:00
|
|
|
hit_word = gen_words[-1]
|
2011-01-18 22:30:59 -06:00
|
|
|
else:
|
2013-02-09 14:44:45 -06:00
|
|
|
# we are working forward, with either:
|
|
|
|
# * a pair of words (normal path, filling out a sentence)
|
|
|
|
# * start1, start2 (completely new chain, no seed words)
|
|
|
|
# * stop (new sentence in existing chain, no seed words)
|
|
|
|
self.log.debug("working forwards")
|
|
|
|
forw_v = None
|
|
|
|
if gen_words[-1] in (self.start2, self.stop):
|
|
|
|
# case 2 or 3 above, need to work forward on a beginning
|
|
|
|
# of a sentence (this is slow)
|
2012-07-29 09:39:07 -05:00
|
|
|
if gen_words[-1] == self.stop:
|
2013-02-09 14:44:45 -06:00
|
|
|
# remove the stop if it's there
|
2012-07-29 09:39:07 -05:00
|
|
|
gen_words = gen_words[:-1]
|
2012-07-28 13:55:54 -05:00
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
new_sentence = self._create_chain_with_k1_k2(self.start1,
|
|
|
|
self.start2,
|
2013-02-15 10:24:45 -06:00
|
|
|
3, context_id,
|
|
|
|
avoid_address=True)
|
2013-02-09 14:44:45 -06:00
|
|
|
|
|
|
|
if len(new_sentence) > 0:
|
|
|
|
self.log.debug("started new sentence "
|
|
|
|
"'{0:s}'".format(" ".join(new_sentence)))
|
|
|
|
gen_words += new_sentence
|
|
|
|
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
|
|
else:
|
|
|
|
# this is a problem. we started a sentence on
|
|
|
|
# start1,start2, and still didn't find anything. to
|
|
|
|
# avoid endlessly looping we need to abort here
|
2012-07-29 09:39:07 -05:00
|
|
|
break
|
2013-02-09 14:44:45 -06:00
|
|
|
else:
|
|
|
|
if seed_word:
|
|
|
|
self.log.debug("preferring: '{0:s}'".format(seed_word))
|
|
|
|
forw_v = self._retrieve_random_v_for_k1_and_k2_with_pref(gen_words[-2],
|
|
|
|
gen_words[-1],
|
|
|
|
seed_word,
|
|
|
|
context_id)
|
2012-07-29 09:39:07 -05:00
|
|
|
else:
|
2013-02-09 14:44:45 -06:00
|
|
|
forw_v = self._retrieve_random_v_for_k1_and_k2(gen_words[-2],
|
|
|
|
gen_words[-1],
|
|
|
|
context_id)
|
|
|
|
|
|
|
|
if forw_v:
|
|
|
|
gen_words.append(forw_v)
|
|
|
|
self.log.debug("added random word '{0:s}' to gen_words".format(forw_v))
|
|
|
|
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
|
|
hit_word = gen_words[-1]
|
|
|
|
else:
|
|
|
|
# append stop. this is an end to a sentence (since
|
|
|
|
# we had non-start words to begin with)
|
|
|
|
gen_words.append(self.stop)
|
|
|
|
self.log.debug("nothing found, added stop")
|
|
|
|
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
2013-02-08 02:11:29 -06:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
# chop off the seed data at the start
|
|
|
|
gen_words = gen_words[2:]
|
|
|
|
|
2013-05-03 16:03:07 -05:00
|
|
|
if len(gen_words):
|
|
|
|
# chop off the end text, if it was the keyword indicating an end of chain
|
|
|
|
if gen_words[-1] == self.stop:
|
|
|
|
gen_words = gen_words[:-1]
|
|
|
|
else:
|
|
|
|
self.log.warning("after all this we have an empty list of words. "
|
|
|
|
"there probably isn't any data for this context")
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2012-07-15 01:11:21 -05:00
|
|
|
return ' '.join(gen_words)
|
2011-01-18 22:30:59 -06:00
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
def _retrieve_random_v_for_k1_and_k2(self, k1, k2, context_id):
|
|
|
|
"""Get one v for a given k1,k2."""
|
2011-02-24 20:39:32 -06:00
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
self.log.debug("searching with '{0:s}','{1:s}'".format(k1, k2))
|
2011-02-24 20:39:32 -06:00
|
|
|
values = []
|
2012-07-27 02:18:01 -05:00
|
|
|
db = self.get_db()
|
2011-02-24 20:39:32 -06:00
|
|
|
try:
|
2013-02-09 14:44:45 -06:00
|
|
|
query = '''
|
|
|
|
SELECT v FROM markov_chain AS r1
|
|
|
|
JOIN (
|
|
|
|
SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id
|
|
|
|
) AS r2
|
|
|
|
WHERE r1.k1 = %s
|
|
|
|
AND r1.k2 = %s
|
|
|
|
AND r1.context_id = %s
|
|
|
|
ORDER BY r1.id >= r2.id DESC, r1.id ASC
|
|
|
|
LIMIT 1
|
|
|
|
'''
|
2012-07-27 02:18:01 -05:00
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
2012-07-27 16:34:57 -05:00
|
|
|
cur.execute(query, (k1, k2, context_id))
|
2013-02-09 14:44:45 -06:00
|
|
|
result = cur.fetchone()
|
|
|
|
if result:
|
|
|
|
self.log.debug("found '{0:s}'".format(result['v']))
|
|
|
|
return result['v']
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
2013-02-09 14:44:45 -06:00
|
|
|
self.log.error("database error in _retrieve_random_v_for_k1_and_k2")
|
2012-07-27 02:18:01 -05:00
|
|
|
self.log.exception(e)
|
2011-02-24 20:39:32 -06:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
2011-02-24 20:39:32 -06:00
|
|
|
|
2013-02-08 02:07:57 -06:00
|
|
|
def _retrieve_random_v_for_k1_and_k2_with_pref(self, k1, k2, prefer, context_id):
|
|
|
|
"""Get one v for a given k1,k2.
|
|
|
|
|
|
|
|
Prefer that the result be prefer, if it's found.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2013-02-09 14:44:45 -06:00
|
|
|
self.log.debug("searching with '{0:s}','{1:s}', prefer "
|
|
|
|
"'{2:s}'".format(k1, k2, prefer))
|
2013-02-08 02:07:57 -06:00
|
|
|
values = []
|
|
|
|
db = self.get_db()
|
|
|
|
try:
|
|
|
|
query = '''
|
|
|
|
SELECT v FROM markov_chain AS r1
|
|
|
|
JOIN (
|
|
|
|
SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id
|
|
|
|
) AS r2
|
2013-02-09 14:44:45 -06:00
|
|
|
WHERE r1.k1 = %s
|
2013-02-08 02:07:57 -06:00
|
|
|
AND r1.k2 = %s
|
|
|
|
AND r1.context_id = %s
|
2013-02-09 14:44:45 -06:00
|
|
|
ORDER BY r1.id >= r2.id DESC, r1.v = %s DESC, r1.id ASC
|
2013-02-08 02:07:57 -06:00
|
|
|
LIMIT 1
|
|
|
|
'''
|
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
|
|
|
cur.execute(query, (k1, k2, context_id, prefer))
|
|
|
|
result = cur.fetchone()
|
|
|
|
if result:
|
2013-02-09 14:44:45 -06:00
|
|
|
self.log.debug("found '{0:s}'".format(result['v']))
|
2013-02-08 02:07:57 -06:00
|
|
|
return result['v']
|
|
|
|
except mdb.Error as e:
|
|
|
|
self.log.error("database error in _retrieve_random_v_for_k1_and_k2_with_pref")
|
|
|
|
self.log.exception(e)
|
|
|
|
raise
|
|
|
|
finally: cur.close()
|
|
|
|
|
2013-02-08 01:15:32 -06:00
|
|
|
def _retrieve_random_k2_for_value(self, v, context_id):
|
|
|
|
"""Get one k2 for a given value."""
|
2011-10-16 20:19:51 -05:00
|
|
|
|
|
|
|
values = []
|
2012-07-27 02:18:01 -05:00
|
|
|
db = self.get_db()
|
2011-10-16 20:19:51 -05:00
|
|
|
try:
|
2013-02-08 01:15:32 -06:00
|
|
|
query = '''
|
|
|
|
SELECT k2 FROM markov_chain AS r1
|
|
|
|
JOIN (
|
|
|
|
SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id
|
|
|
|
) AS r2
|
2013-02-09 14:44:45 -06:00
|
|
|
WHERE r1.v = %s
|
2013-02-08 01:15:32 -06:00
|
|
|
AND r1.context_id = %s
|
2013-02-09 14:44:45 -06:00
|
|
|
ORDER BY r1.id >= r2.id DESC, r1.id ASC
|
2013-02-08 01:15:32 -06:00
|
|
|
LIMIT 1
|
|
|
|
'''
|
2012-07-27 02:18:01 -05:00
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
2012-07-27 16:34:57 -05:00
|
|
|
cur.execute(query, (v, context_id))
|
2013-02-08 01:15:32 -06:00
|
|
|
result = cur.fetchone()
|
|
|
|
if result:
|
|
|
|
return result['k2']
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
2013-02-08 01:15:32 -06:00
|
|
|
self.log.error("database error in _retrieve_random_k2_for_value")
|
2012-07-27 02:18:01 -05:00
|
|
|
self.log.exception(e)
|
2011-10-16 20:19:51 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
2011-10-16 20:19:51 -05:00
|
|
|
|
2013-02-15 10:24:45 -06:00
|
|
|
def _create_chain_with_k1_k2(self, k1, k2, length, context_id,
|
|
|
|
avoid_address=False):
|
2013-02-09 14:44:45 -06:00
|
|
|
"""Create a chain of the given length, using k1,k2.
|
|
|
|
|
|
|
|
k1,k2 does not appear in the resulting chain.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
chain = [k1, k2]
|
|
|
|
self.log.debug("creating chain for {0:s},{1:s}".format(k1, k2))
|
|
|
|
|
|
|
|
for _ in range(length):
|
|
|
|
v = self._retrieve_random_v_for_k1_and_k2(chain[-2],
|
|
|
|
chain[-1],
|
|
|
|
context_id)
|
|
|
|
if v:
|
|
|
|
chain.append(v)
|
|
|
|
|
2013-02-15 10:24:45 -06:00
|
|
|
# check for addresses (the "whoever:" in
|
|
|
|
# __start1 __start2 whoever: some words)
|
|
|
|
addressing_suffixes = [':', ',']
|
2013-05-03 16:03:07 -05:00
|
|
|
if len(chain) > 2 and chain[2][-1] in addressing_suffixes and avoid_address:
|
2013-02-15 10:24:45 -06:00
|
|
|
return chain[3:]
|
2013-05-03 16:03:07 -05:00
|
|
|
elif len(chain) > 2:
|
2013-02-15 10:24:45 -06:00
|
|
|
return chain[2:]
|
2013-05-03 16:03:07 -05:00
|
|
|
else:
|
|
|
|
return []
|
2013-02-09 14:44:45 -06:00
|
|
|
|
2011-05-01 09:47:45 -05:00
|
|
|
def _get_chatter_targets(self):
|
|
|
|
"""Get all possible chatter targets."""
|
|
|
|
|
2012-07-27 02:18:01 -05:00
|
|
|
db = self.get_db()
|
2011-05-01 09:47:45 -05:00
|
|
|
try:
|
|
|
|
# need to create our own db object, since this is likely going to be in a new thread
|
2011-06-20 22:49:25 -05:00
|
|
|
query = 'SELECT target, chance FROM markov_chatter_target'
|
2012-07-27 02:18:01 -05:00
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
|
|
|
cur.execute(query)
|
|
|
|
results = cur.fetchall()
|
2011-06-20 22:49:25 -05:00
|
|
|
return results
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
|
|
|
self.log.error("database error in _get_chatter_targets")
|
|
|
|
self.log.exception(e)
|
2011-05-01 09:47:45 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
2011-05-01 09:47:45 -05:00
|
|
|
|
2012-02-28 23:23:14 -06:00
|
|
|
def _get_context_id_for_target(self, target):
|
|
|
|
"""Get the context ID for the desired/input target."""
|
|
|
|
|
2012-07-27 02:18:01 -05:00
|
|
|
db = self.get_db()
|
2012-02-28 23:23:14 -06:00
|
|
|
try:
|
|
|
|
query = '''
|
|
|
|
SELECT mc.id FROM markov_context mc
|
|
|
|
INNER JOIN markov_target_to_context_map mt
|
|
|
|
ON mt.context_id = mc.id
|
2012-07-27 02:18:01 -05:00
|
|
|
WHERE mt.target = %s
|
2012-02-28 23:23:14 -06:00
|
|
|
'''
|
2012-07-27 02:18:01 -05:00
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
|
|
|
cur.execute(query, (target,))
|
|
|
|
result = cur.fetchone()
|
2012-02-28 23:23:14 -06:00
|
|
|
db.close()
|
|
|
|
if result:
|
|
|
|
return result['id']
|
|
|
|
else:
|
2012-03-19 00:12:29 -05:00
|
|
|
# auto-generate a context to keep things private
|
|
|
|
self._add_context_for_target(target)
|
|
|
|
return self._get_context_id_for_target(target)
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
|
|
|
self.log.error("database error in _get_context_id_for_target")
|
|
|
|
self.log.exception(e)
|
2012-03-19 00:12:29 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
2012-03-19 00:12:29 -05:00
|
|
|
|
|
|
|
def _add_context_for_target(self, target):
|
|
|
|
"""Create a new context for the desired/input target."""
|
|
|
|
|
2012-07-27 02:18:01 -05:00
|
|
|
db = self.get_db()
|
2012-03-19 00:12:29 -05:00
|
|
|
try:
|
2012-07-27 02:18:01 -05:00
|
|
|
statement = 'INSERT INTO markov_context (context) VALUES (%s)'
|
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
2012-03-19 00:12:29 -05:00
|
|
|
cur.execute(statement, (target,))
|
|
|
|
statement = '''
|
|
|
|
INSERT INTO markov_target_to_context_map (target, context_id)
|
2012-07-27 02:18:01 -05:00
|
|
|
VALUES (%s, (SELECT id FROM markov_context WHERE context = %s))
|
2012-03-19 00:12:29 -05:00
|
|
|
'''
|
2013-02-07 23:51:41 -06:00
|
|
|
cur.execute(statement, (target, target))
|
2012-03-19 00:12:29 -05:00
|
|
|
db.commit()
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
2012-03-19 00:12:29 -05:00
|
|
|
db.rollback()
|
2012-07-27 02:18:01 -05:00
|
|
|
self.log.error("database error in _add_context_for_target")
|
|
|
|
self.log.exception(e)
|
2012-03-19 00:12:29 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
|
|
|
|
2012-03-19 00:12:29 -05:00
|
|
|
try:
|
|
|
|
query = '''
|
|
|
|
SELECT mc.id FROM markov_context mc
|
|
|
|
INNER JOIN markov_target_to_context_map mt
|
|
|
|
ON mt.context_id = mc.id
|
2012-07-27 02:18:01 -05:00
|
|
|
WHERE mt.target = %s
|
2012-03-19 00:12:29 -05:00
|
|
|
'''
|
2012-07-27 02:18:01 -05:00
|
|
|
cur = db.cursor(mdb.cursors.DictCursor)
|
|
|
|
cur.execute(query, (target,))
|
|
|
|
result = cur.fetchone()
|
2012-03-19 00:12:29 -05:00
|
|
|
if result:
|
|
|
|
return result['id']
|
|
|
|
else:
|
|
|
|
# auto-generate a context to keep things private
|
|
|
|
self._add_context_for_target(target)
|
|
|
|
return self._get_context_id_for_target(target)
|
2012-07-27 02:18:01 -05:00
|
|
|
except mdb.Error as e:
|
|
|
|
self.log.error("database error in _get_context_id_for_target")
|
|
|
|
self.log.exception(e)
|
Markov: when looking up the start-of-sentence chain, get one random one
when finding a key for (__start1,__start2), instead of fetcihng all
(which can be a lot, in chatty channels and/or over time), get the
max ID in the table, pick a random ID between 1,max, and pick the
first id >= to it, and use that. just as random, nowhere near as
intensive.
2011-04-23 21:24:23 -05:00
|
|
|
raise
|
2012-07-27 02:18:01 -05:00
|
|
|
finally: cur.close()
|
Markov: when looking up the start-of-sentence chain, get one random one
when finding a key for (__start1,__start2), instead of fetcihng all
(which can be a lot, in chatty channels and/or over time), get the
max ID in the table, pick a random ID between 1,max, and pick the
first id >= to it, and use that. just as random, nowhere near as
intensive.
2011-04-23 21:24:23 -05:00
|
|
|
|
2011-01-18 22:30:59 -06:00
|
|
|
# vi:tabstop=4:expandtab:autoindent
|
|
|
|
# kate: indent-mode python;indent-width 4;replace-tabs on;
|