From 87073d7fd39fdd110eab48101e305255aef94ced Mon Sep 17 00:00:00 2001
From: "Brian S. Stephan" <bss@incorporeal.org>
Date: Thu, 24 Feb 2011 21:06:29 -0600
Subject: [PATCH] Markov: cache the first word in markov chains

this eliminates the expensive database hit on every request for a line.
the cache is loaded when the module loads and learning new lines should
add the appropriate word to the list. seemed like a pretty good compromise
---
 modules/Markov.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/modules/Markov.py b/modules/Markov.py
index 5ab03ac..c324a7c 100644
--- a/modules/Markov.py
+++ b/modules/Markov.py
@@ -59,6 +59,9 @@ class Markov(Module):
 
         Module.__init__(self, irc, config, server)
 
+        # load the existing chain starts from the database
+        self.starts = self._get_chain_beginnings()
+
     def db_init(self):
         """Create the markov chain table."""
 
@@ -183,12 +186,15 @@ class Markov(Module):
         k1 = self.start1
         k2 = self.start2
 
+        words = line.split()
+        self.starts.append(words[0])
+
         try:
             db = self.get_db()
             cur = db.cursor()
             statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
 
-            for word in line.split():
+            for word in words:
                 cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
                 k1, k2 = k2, word
             cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
@@ -211,7 +217,7 @@ class Markov(Module):
             raise Exception("min_size is too large: %d" % min_size)
 
         # start with an empty chain, and work from there
-        gen_words = [self.start1, self.start2]
+        gen_words = [self.start1, self.start2, random.choice(self.starts)]
 
         # set up the number of times we've tried to hit the specified minimum
         min_search_tries = 0
@@ -262,7 +268,7 @@ class Markov(Module):
         target_word = words[random.randint(0, len(words)-1)]
 
         # start with an empty chain, and work from there
-        gen_words = [self.start1, self.start2]
+        gen_words = [self.start1, self.start2, random.choice(self.starts)]
 
         # walk a chain, randomly, building the list of words
         while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
@@ -305,5 +311,23 @@ class Markov(Module):
             print('sqlite error: ' + str(e))
             raise
 
+    def _get_chain_beginnings(self):
+        """Get all of the first (real) words in the brain."""
+
+        values = []
+        try:
+            db = self.get_db()
+            query = 'SELECT v FROM markov_chain WHERE k1 = "__start1" AND k2 = "__start2"'
+            cursor = db.execute(query)
+            results = cursor.fetchall()
+
+            for result in results:
+                values.append(result['v'])
+
+            return values
+        except sqlite3.Error as e:
+            print('sqlite error: ' + str(e))
+            raise
+
 # vi:tabstop=4:expandtab:autoindent
 # kate: indent-mode python;indent-width 4;replace-tabs on;