Markov: try to avoid "nick:" starts to extra chaining

when starting another sentence because the main one is too short, do a bit of work in an attempt to avoid "nick: blah" starts, since they're fairly common. instead we just ignore nick: and start with "blah blah"
2012-07-29 15:43:15 -05:00 · 2012-07-29 15:43:15 -05:00 · 26ec854c67
commit 26ec854c67
parent ad1de23a7c
1 changed files with 27 additions and 6 deletions
--- a/modules/Markov.py
+++ b/modules/Markov.py
@ -414,17 +414,38 @@ class Markov(Module):
                        gen_words[-1] = sentence_end + eos_punctuation.pop()
                        self.log.debug("monkeyed with end of sentence, it's now: {0:s}".format(gen_words[:-1]))

+                    new_chain_words = []
                    # new word 1
                    key_hits = self._retrieve_chains_for_key(self.start1, self.start2, context_id)
-                    gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
-
+                    new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
                    # the database is probably empty if we got a stop from this
-                    if gen_words[-1] == self.stop:
+                    if gen_words[0] == self.stop:
                        break
+                    # new word 2
+                    key_hits = self._retrieve_chains_for_key(self.start2, new_chain_words[0], context_id)
+                    new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
+                    if gen_words[1] != self.stop:
+                        # two valid words, try for a third and check for "foo:"
+
+                        # new word 3 (which we may need below)
+                        key_hits = self._retrieve_chains_for_key(new_chain_words[0], new_chain_words[1], context_id)
+                        new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
+
+                        # if the first word is "foo:", start with the second
+                        addressing_suffixes = [':', ',']
+                        if gen_words[0][-1] in addressing_suffixes:
+                            gen_words += new_chain_words[1:]
+                            self.log.debug("appending following anti-address " \
+                                           "new_chain_words: {0:s}".format(new_chain_words[1:]))
+                        else:
+                            gen_words += new_chain_words[0:]
+                            self.log.debug("appending following extended " \
+                                           "new_chain_words: {0:s}".format(new_chain_words[0:]))
                    else:
-                        # new word 2
-                        key_hits = self._retrieve_chains_for_key(self.start2, gen_words[-1], context_id)
-                        gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
+                        # well, we got one word out of this... let's go with it
+                        # and let the loop check if we need more
+                        self.log.debug("appending following short new_chain_words: {0:s}".format(new_chain_words))
+                        gen_words += new_chain_words

        # chop off the seed data at the start
        gen_words = gen_words[2:]