Markov: cache the first word in markov chains
this eliminates the expensive database hit on every request for a line. the cache is loaded when the module loads and learning new lines should add the appropriate word to the list. seemed like a pretty good compromise
This commit is contained in:
parent
3d6ede5155
commit
87073d7fd3
|
@ -59,6 +59,9 @@ class Markov(Module):
|
|||
|
||||
Module.__init__(self, irc, config, server)
|
||||
|
||||
# load the existing chain starts from the database
|
||||
self.starts = self._get_chain_beginnings()
|
||||
|
||||
def db_init(self):
|
||||
"""Create the markov chain table."""
|
||||
|
||||
|
@ -183,12 +186,15 @@ class Markov(Module):
|
|||
k1 = self.start1
|
||||
k2 = self.start2
|
||||
|
||||
words = line.split()
|
||||
self.starts.append(words[0])
|
||||
|
||||
try:
|
||||
db = self.get_db()
|
||||
cur = db.cursor()
|
||||
statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
|
||||
|
||||
for word in line.split():
|
||||
for word in words:
|
||||
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
|
||||
k1, k2 = k2, word
|
||||
cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
|
||||
|
@ -211,7 +217,7 @@ class Markov(Module):
|
|||
raise Exception("min_size is too large: %d" % min_size)
|
||||
|
||||
# start with an empty chain, and work from there
|
||||
gen_words = [self.start1, self.start2]
|
||||
gen_words = [self.start1, self.start2, random.choice(self.starts)]
|
||||
|
||||
# set up the number of times we've tried to hit the specified minimum
|
||||
min_search_tries = 0
|
||||
|
@ -262,7 +268,7 @@ class Markov(Module):
|
|||
target_word = words[random.randint(0, len(words)-1)]
|
||||
|
||||
# start with an empty chain, and work from there
|
||||
gen_words = [self.start1, self.start2]
|
||||
gen_words = [self.start1, self.start2, random.choice(self.starts)]
|
||||
|
||||
# walk a chain, randomly, building the list of words
|
||||
while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
|
||||
|
@ -305,5 +311,23 @@ class Markov(Module):
|
|||
print('sqlite error: ' + str(e))
|
||||
raise
|
||||
|
||||
def _get_chain_beginnings(self):
|
||||
"""Get all of the first (real) words in the brain."""
|
||||
|
||||
values = []
|
||||
try:
|
||||
db = self.get_db()
|
||||
query = 'SELECT v FROM markov_chain WHERE k1 = "__start1" AND k2 = "__start2"'
|
||||
cursor = db.execute(query)
|
||||
results = cursor.fetchall()
|
||||
|
||||
for result in results:
|
||||
values.append(result['v'])
|
||||
|
||||
return values
|
||||
except sqlite3.Error as e:
|
||||
print('sqlite error: ' + str(e))
|
||||
raise
|
||||
|
||||
# vi:tabstop=4:expandtab:autoindent
|
||||
# kate: indent-mode python;indent-width 4;replace-tabs on;
|
||||
|
|
Loading…
Reference in New Issue