Markov: improve min_size by implementing min_search_tries

if the end of a chain has been reached via __end, but min_size
has not been satisfied, discard the last couple elements in the
chain and try again. use min_search_tries so we don't do this
forever.
This commit is contained in:
Brian S. Stephan 2011-01-25 20:42:52 -06:00
parent 7b4b86dc0d
commit 28f450ab5d
1 changed files with 16 additions and 0 deletions

View File

@ -191,12 +191,28 @@ class Markov(Module):
# start with an empty chain, and work from there
gen_words = [self.start1, self.start2]
# set up the number of times we've tried to hit the specified minimum
min_search_tries = 0
# walk a chain, randomly, building the list of words
while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])) > 0:
# we aren't at min size yet and we have at least one chain path
# that isn't (yet) the end. take one of those.
gen_words.append(random.choice(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])))
min_search_tries = 0
elif len(gen_words) < min_size and min_search_tries <= 10:
# we aren't at min size yet and the only path we currently have is
# a end, but we haven't retried much yet, so chop off our current
# chain and try again.
gen_words = gen_words[0:len(gen_words)-2]
min_search_tries = min_search_tries + 1
else:
# either we have hit our min size requirement, or we haven't but
# we also exhausted min_search_tries. either way, just pick a word
# at random, knowing it may be the end of the chain
gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
min_search_tries = 0
# chop off the seed data at the start
gen_words = gen_words[2:]