Markov: track the context of said lines

a context is a meta-classification ('banter, 'secrets', whatever)
based on targets (channels or nicknames). when a line is being
learned from a known target, the chains are placed in that context.

this is for allowing one brain to have multiple personalities, in
a sense, for large networks or cases where there may be a more
sanitized set of channels and a couple channels where everyone lets
it rip. a later enhancement would have sentence creation choose from
context-less chains (and contexts matching the current target), but
i need to go back to the drawing board on that one a bit.

ramble ramble ramble
This commit is contained in:
Brian S. Stephan 2011-04-23 16:07:32 -05:00
parent 4e7c19a02a
commit 305625044a
1 changed files with 68 additions and 8 deletions

View File

@ -87,6 +87,32 @@ class Markov(Module):
db.rollback()
print("sqlite error: " + str(e))
raise
if (version < 2):
db = self.get_db()
try:
db.execute('''
ALTER TABLE markov_chain
ADD COLUMN context TEXT DEFAULT NULL''')
db.execute('''
CREATE TABLE markov_context (
id INTEGER PRIMARY KEY AUTOINCREMENT,
context TEXT NOT NULL
)''')
db.execute('''
CREATE TABLE markov_target_to_context_map (
id INTEGER PRIMARY KEY AUTOINCREMENT,
target TEXT NOT NULL,
context_id INTEGER NOT NULL,
FOREIGN KEY(context_id) REFERENCES markov_context(id)
)''')
db.execute('UPDATE drbotzo_modules SET version = ? WHERE module = ?',
(2, self.__class__.__name__))
db.commit()
version = 2
except sqlite3.Error as e:
db.rollback()
print('sqlite error: ' + str(e))
raise
def register_handlers(self):
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
@ -108,12 +134,13 @@ class Markov(Module):
what = ''.join(event.arguments()[0])
my_nick = connection.get_nickname()
what = re.sub('^' + my_nick + '[:,]\s+', '', what)
target = event.target()
# don't learn from commands
if self.trainre.search(what) or self.learnre.search(what) or self.replyre.search(what):
return
self._learn_line(what)
self._learn_line(what, target)
def do(self, connection, event, nick, userhost, what, admin_unlocked):
"""Handle commands and inputs."""
@ -154,10 +181,11 @@ class Markov(Module):
def markov_learn(self, connection, event, nick, userhost, what, admin_unlocked):
"""Learn one line, as provided to the command."""
target = event.target()
match = self.learnre.search(what)
if match:
line = match.group(1)
self._learn_line(line)
self._learn_line(line, target)
# return what was learned, for weird chaining purposes
return line
@ -181,13 +209,18 @@ class Markov(Module):
else:
return self._reply(min_size=min_size, max_size=max_size)
def _learn_line(self, line):
def _learn_line(self, line, target=None):
"""Create Markov chains from the provided line."""
# set up the head of the chain
k1 = self.start1
k2 = self.start2
# see if there's a context for this
context = None
if target:
context = self._get_context_for_target(target)
words = line.split()
if len(words) <= 0:
return line
@ -197,12 +230,20 @@ class Markov(Module):
try:
db = self.get_db()
cur = db.cursor()
statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
if context:
statement = 'INSERT INTO markov_chain (k1, k2, v, context) VALUES (?, ?, ?, ?)'
for word in words:
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
k1, k2 = k2, word
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
for word in words:
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower(), context))
k1, k2 = k2, word
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop, context))
else:
statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
for word in words:
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
k1, k2 = k2, word
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
db.commit()
except sqlite3.Error as e:
@ -336,5 +377,24 @@ class Markov(Module):
print('sqlite error: ' + str(e))
raise
def _get_context_for_target(self, target):
"""Get the context for a channel/nick, if defined."""
try:
db = self.get_db()
query = '''
SELECT context_id FROM markov_target_to_context_map
WHERE target = ?
'''
cursor = db.execute(query, (target,))
result = cursor.fetchone()
if result:
return result['context_id']
else:
return None
except sqlite3.Error as e:
print('sqlite error: ' + str(e))
raise
# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;