Markov: track the context of said lines

a context is a meta-classification ('banter, 'secrets', whatever) based on targets (channels or nicknames). when a line is being learned from a known target, the chains are placed in that context. this is for allowing one brain to have multiple personalities, in a sense, for large networks or cases where there may be a more sanitized set of channels and a couple channels where everyone lets it rip. a later enhancement would have sentence creation choose from context-less chains (and contexts matching the current target), but i need to go back to the drawing board on that one a bit. ramble ramble ramble
2011-04-23 16:07:32 -05:00 · 2011-04-23 16:07:32 -05:00 · 305625044a
parent 4e7c19a02a
commit 305625044a
1 changed files with 68 additions and 8 deletions
--- a/modules/Markov.py
+++ b/modules/Markov.py
@ -87,6 +87,32 @@ class Markov(Module):
                db.rollback()
                print("sqlite error: " + str(e))
                raise
        if (version < 2):
            db = self.get_db()
            try:
                db.execute('''
                    ALTER TABLE markov_chain
                        ADD COLUMN context TEXT DEFAULT NULL''')
                db.execute('''
                    CREATE TABLE markov_context (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        context TEXT NOT NULL
                    )''')
                db.execute('''
                    CREATE TABLE markov_target_to_context_map (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        target TEXT NOT NULL,
                        context_id INTEGER NOT NULL,
                        FOREIGN KEY(context_id) REFERENCES markov_context(id)
                    )''')
                db.execute('UPDATE drbotzo_modules SET version = ? WHERE module = ?',
                    (2, self.__class__.__name__))
                db.commit()
                version = 2
            except sqlite3.Error as e:
                db.rollback()
                print('sqlite error: ' + str(e))
                raise
    def register_handlers(self):
        """Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
@ -108,12 +134,13 @@ class Markov(Module):
        what = ''.join(event.arguments()[0])
        my_nick = connection.get_nickname()
        what = re.sub('^' + my_nick + '[:,]\s+', '', what)
        target = event.target()
        # don't learn from commands
        if self.trainre.search(what) or self.learnre.search(what) or self.replyre.search(what):
            return
-        self._learn_line(what)
+        self._learn_line(what, target)
    def do(self, connection, event, nick, userhost, what, admin_unlocked):
        """Handle commands and inputs."""
@ -154,10 +181,11 @@ class Markov(Module):
    def markov_learn(self, connection, event, nick, userhost, what, admin_unlocked):
        """Learn one line, as provided to the command."""
        target = event.target()
        match = self.learnre.search(what)
        if match:
            line = match.group(1)
-            self._learn_line(line)
+            self._learn_line(line, target)
            # return what was learned, for weird chaining purposes
            return line
@ -181,13 +209,18 @@ class Markov(Module):
            else:
                return self._reply(min_size=min_size, max_size=max_size)
-    def _learn_line(self, line):
+    def _learn_line(self, line, target=None):
        """Create Markov chains from the provided line."""
        # set up the head of the chain
        k1 = self.start1
        k2 = self.start2
        # see if there's a context for this
        context = None
        if target:
            context = self._get_context_for_target(target)
        words = line.split()
        if len(words) <= 0:
            return line
@ -197,12 +230,20 @@ class Markov(Module):
        try:
            db = self.get_db()
            cur = db.cursor()
-            statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
+            if context:
                statement = 'INSERT INTO markov_chain (k1, k2, v, context) VALUES (?, ?, ?, ?)'
-            for word in words:
+                for word in words:
-                cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
+                    cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower(), context))
-                k1, k2 = k2, word
+                    k1, k2 = k2, word
-            cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
+                cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop, context))
            else:
                statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
                for word in words:
                    cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
                    k1, k2 = k2, word
                cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
            db.commit()
        except sqlite3.Error as e:
@ -336,5 +377,24 @@ class Markov(Module):
            print('sqlite error: ' + str(e))
            raise
    def _get_context_for_target(self, target):
        """Get the context for a channel/nick, if defined."""
        try:
            db = self.get_db()
            query = '''
                SELECT context_id FROM markov_target_to_context_map
                    WHERE target = ?
                '''
            cursor = db.execute(query, (target,))
            result = cursor.fetchone()
            if result:
                return result['context_id']
            else:
                return None
        except sqlite3.Error as e:
            print('sqlite error: ' + str(e))
            raise
 # vi:tabstop=4:expandtab:autoindent
 # kate: indent-mode python;indent-width 4;replace-tabs on;