Markov: implement a min_size, which tries to make a chain of at least min_size words.

note that this isn't guaranteed, if the chain is such that the current tuple has nowhere to go but to the end of the line, then it will follow it --- it doesn't try to go back and rebuilt a different chain or anything.
2011-01-19 18:44:07 -06:00 · 2011-01-19 18:44:07 -06:00 · 7c05f60ffd
commit 7c05f60ffd
parent ac0429569e
1 changed files with 18 additions and 4 deletions
--- a/modules/Markov.py
+++ b/modules/Markov.py
@ -166,19 +166,26 @@ class Markov(Module):
        # cap the end of the chain
        self.brain.setdefault((w1, w2), []).append(self.stop)

-    def _reply(self, max_size=100):
+    def _reply(self, min_size=15, max_size=100):
        """Generate a totally random string from the chains, of specified limit of words."""

        # if the limit is too low, there's nothing to do
        if (max_size <= 3):
            raise Exception("max_size is too small: %d" % max_size)

+        # if the min is too large, abort
+        if (min_size > 20):
+            raise Exception("min_size is too large: %d" % min_size)
+
        # start with an empty chain, and work from there
        gen_words = [self.start1, self.start2]

        # walk a chain, randomly, building the list of words
        while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
-            gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
+            if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])) > 0:
+                gen_words.append(random.choice(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])))
+            else:
+                gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))

        # chop off the seed data at the start
        gen_words = gen_words[2:]
@ -189,13 +196,17 @@ class Markov(Module):

        return ' '.join(gen_words)

-    def _reply_to_line(self, line, max_size=100):
+    def _reply_to_line(self, line, min_size=15, max_size=100):
        """Reply to a line, using some text in the line as a point in the chain."""

        # if the limit is too low, there's nothing to do
        if (max_size <= 3):
            raise Exception("max_size is too small: %d" % max_size)

+        # if the min is too large, abort
+        if (min_size > 20):
+            raise Exception("min_size is too large: %d" % min_size)
+
        # get a random word from the input
        words = line.split()
        target_word = words[random.randint(0, len(words)-1)]
@ -211,7 +222,10 @@ class Markov(Module):
                # generate new word
                target_word = words[random.randint(0, len(words)-1)]
            else:
-                gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))
+                if len(gen_words) < min_size and len(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])) > 0:
+                    gen_words.append(random.choice(filter(lambda a: a != self.stop, self.brain[(gen_words[-2], gen_words[-1])])))
+                else:
+                    gen_words.append(random.choice(self.brain[(gen_words[-2], gen_words[-1])]))

        # chop off the seed data at the start
        gen_words = gen_words[2:]