Markov: implement a min_size, which tries to make a chain of at least min_size words.

note that this isn't guaranteed, if the chain is such that the
current tuple has nowhere to go but to the end of the line, then
it will follow it --- it doesn't try to go back and rebuilt a different
chain or anything.
This commit is contained in:
Brian S. Stephan 2011-01-19 18:44:07 -06:00
parent ac0429569e
commit 7c05f60ffd

View File

@ -166,19 +166,26 @@ class Markov(Module):
# cap the end of the chain
self.brain.setdefault((w1, w2), []).append(self.stop)
def _reply(self, max_size=100):
def _reply(self, min_size=15, max_size=100):
"""Generate a totally random string from the chains, of specified limit of words."""
# if the limit is too low, there's nothing to do
if (max_size <= 3):
raise Exception("max_size is too small: %d" % max_size)
# if the min is too large, abort
if (min_size > 20):
raise Exception("min_size is too large: %d" % min_size)
# start with an empty chain, and work from there
gen_words = [self.start1, self.start2]
# walk a chain, randomly, building the list of words
while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])) > 0:
gen_words.append(random.choice(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])))
else:
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
# chop off the seed data at the start
gen_words = gen_words[2:]
@ -189,13 +196,17 @@ class Markov(Module):
return ' '.join(gen_words)
def _reply_to_line(self, line, max_size=100):
def _reply_to_line(self, line, min_size=15, max_size=100):
"""Reply to a line, using some text in the line as a point in the chain."""
# if the limit is too low, there's nothing to do
if (max_size <= 3):
raise Exception("max_size is too small: %d" % max_size)
# if the min is too large, abort
if (min_size > 20):
raise Exception("min_size is too large: %d" % min_size)
# get a random word from the input
words = line.split()
target_word = words[random.randint(0, len(words)-1)]
@ -211,7 +222,10 @@ class Markov(Module):
# generate new word
target_word = words[random.randint(0, len(words)-1)]
else:
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])) > 0:
gen_words.append(random.choice(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])))
else:
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
# chop off the seed data at the start
gen_words = gen_words[2:]