Markov: improve min_size by implementing min_search_tries

if the end of a chain has been reached via __end, but min_size has not been satisfied, discard the last couple elements in the chain and try again. use min_search_tries so we don't do this forever.
2011-01-25 20:42:52 -06:00 · 2011-01-25 20:42:52 -06:00 · 28f450ab5d
commit 28f450ab5d
parent 7b4b86dc0d
1 changed files with 16 additions and 0 deletions
--- a/modules/Markov.py
+++ b/modules/Markov.py
@ -191,12 +191,28 @@ class Markov(Module):
        # start with an empty chain, and work from there
        gen_words = [self.start1, self.start2]

+        # set up the number of times we've tried to hit the specified minimum
+        min_search_tries = 0
+
        # walk a chain, randomly, building the list of words
        while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
            if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])) > 0:
+                # we aren't at min size yet and we have at least one chain path
+                # that isn't (yet) the end. take one of those.
                gen_words.append(random.choice(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])))
+                min_search_tries = 0
+            elif len(gen_words) < min_size and min_search_tries <= 10:
+                # we aren't at min size yet and the only path we currently have is
+                # a end, but we haven't retried much yet, so chop off our current
+                # chain and try again.
+                gen_words = gen_words[0:len(gen_words)-2]
+                min_search_tries = min_search_tries + 1
            else:
+                # either we have hit our min size requirement, or we haven't but
+                # we also exhausted min_search_tries. either way, just pick a word
+                # at random, knowing it may be the end of the chain
                gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
+                min_search_tries = 0

        # chop off the seed data at the start
        gen_words = gen_words[2:]