Markov: index on (v, context) and other enhancements for the last commit

reduce some infinite loop possibilities, and add an index with the old <= id trick
to speed up the searching for backwards chains
This commit is contained in:
Brian S. Stephan 2011-10-16 21:13:27 -05:00
parent 42962bc48d
commit cda1d43606

View File

@ -201,6 +201,21 @@ class Markov(Module):
db.close() db.close()
print('sqlite error: ' + str(e)) print('sqlite error: ' + str(e))
raise raise
if (version < 8):
db = self.get_db()
try:
version = 8
db.execute('''
CREATE INDEX markov_chain_value_and_context_index
ON markov_chain (v, context)''')
db.commit()
db.close()
self.db_register_module_version(self.__class__.__name__, version)
except sqlite3.Error as e:
db.rollback()
db.close()
print('sqlite error: ' + str(e))
raise
def register_handlers(self): def register_handlers(self):
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events.""" """Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
@ -422,6 +437,8 @@ class Markov(Module):
# we'll just pick a word and work backwards # we'll just pick a word and work backwards
if gen_words[-1] == self.start2 and target_word != '': if gen_words[-1] == self.start2 and target_word != '':
working_backwards = [] working_backwards = []
key_hits = self._retrieve_k2_for_value(target_word, context)
if len(key_hits):
working_backwards.append(target_word) working_backwards.append(target_word)
# generate new word # generate new word
found_word = '' found_word = ''
@ -432,11 +449,15 @@ class Markov(Module):
if target_word in key_hits: if target_word in key_hits:
found_word = target_word found_word = target_word
# generate new word # generate new word
target_word = words[random.randint(0, len(words)-1)] if len(filter(lambda a: a != target_word, words)) > 1 and False:
# if we have more than one target word, get a new one (otherwise give up)
target_word = random.choice(filter(lambda a: a != target_word, words))
else:
target_word = ''
else: else:
found_word = random.choice(filter(lambda a: a != self.stop, key_hits)) found_word = random.choice(filter(lambda a: a != self.stop, key_hits))
if found_word == self.start2: if found_word == self.start2 or len(working_backwards) >= max_size + 2:
gen_words = gen_words + working_backwards gen_words = gen_words + working_backwards
break break
else: else:
@ -499,8 +520,10 @@ class Markov(Module):
values = [] values = []
try: try:
db = self.get_db() db = self.get_db()
query = 'SELECT k2 FROM markov_chain WHERE v = ? AND (context = ? OR context IS NULL)' max_id = self._get_max_chain_id()
cursor = db.execute(query, (v,context)) rand_id = random.randint(1,max_id)
query = 'SELECT k2 FROM markov_chain WHERE v = ? AND (context = ? OR context IS NULL) AND id >= {0:d} UNION SELECT k2 FROM markov_chain WHERE v = ? AND (context = ? OR context IS NULL) AND id < {1:d} LIMIT 1'.format(rand_id, rand_id)
cursor = db.execute(query, (v,context,v,context))
results = cursor.fetchall() results = cursor.fetchall()
for result in results: for result in results: