From 87073d7fd39fdd110eab48101e305255aef94ced Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Thu, 24 Feb 2011 21:06:29 -0600 Subject: [PATCH] Markov: cache the first word in markov chains this eliminates the expensive database hit on every request for a line. the cache is loaded when the module loads and learning new lines should add the appropriate word to the list. seemed like a pretty good compromise --- modules/Markov.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/modules/Markov.py b/modules/Markov.py index 5ab03ac..c324a7c 100644 --- a/modules/Markov.py +++ b/modules/Markov.py @@ -59,6 +59,9 @@ class Markov(Module): Module.__init__(self, irc, config, server) + # load the existing chain starts from the database + self.starts = self._get_chain_beginnings() + def db_init(self): """Create the markov chain table.""" @@ -183,12 +186,15 @@ class Markov(Module): k1 = self.start1 k2 = self.start2 + words = line.split() + self.starts.append(words[0]) + try: db = self.get_db() cur = db.cursor() statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)' - for word in line.split(): + for word in words: cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower())) k1, k2 = k2, word cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop)) @@ -211,7 +217,7 @@ class Markov(Module): raise Exception("min_size is too large: %d" % min_size) # start with an empty chain, and work from there - gen_words = [self.start1, self.start2] + gen_words = [self.start1, self.start2, random.choice(self.starts)] # set up the number of times we've tried to hit the specified minimum min_search_tries = 0 @@ -262,7 +268,7 @@ class Markov(Module): target_word = words[random.randint(0, len(words)-1)] # start with an empty chain, and work from there - gen_words = [self.start1, self.start2] + gen_words = [self.start1, self.start2, random.choice(self.starts)] # walk a chain, randomly, building the list of words while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop: @@ -305,5 +311,23 @@ class Markov(Module): print('sqlite error: ' + str(e)) raise + def _get_chain_beginnings(self): + """Get all of the first (real) words in the brain.""" + + values = [] + try: + db = self.get_db() + query = 'SELECT v FROM markov_chain WHERE k1 = "__start1" AND k2 = "__start2"' + cursor = db.execute(query) + results = cursor.fetchall() + + for result in results: + values.append(result['v']) + + return values + except sqlite3.Error as e: + print('sqlite error: ' + str(e)) + raise + # vi:tabstop=4:expandtab:autoindent # kate: indent-mode python;indent-width 4;replace-tabs on;