From 26ec854c67b0e94696235847d6875acfbf9abfb7 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sun, 29 Jul 2012 15:43:15 -0500 Subject: [PATCH] Markov: try to avoid "nick:" starts to extra chaining when starting another sentence because the main one is too short, do a bit of work in an attempt to avoid "nick: blah" starts, since they're fairly common. instead we just ignore nick: and start with "blah blah" --- modules/Markov.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/modules/Markov.py b/modules/Markov.py index c0349ea..b35e8f7 100644 --- a/modules/Markov.py +++ b/modules/Markov.py @@ -414,17 +414,38 @@ class Markov(Module): gen_words[-1] = sentence_end + eos_punctuation.pop() self.log.debug("monkeyed with end of sentence, it's now: {0:s}".format(gen_words[:-1])) + new_chain_words = [] # new word 1 key_hits = self._retrieve_chains_for_key(self.start1, self.start2, context_id) - gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size)) - + new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size)) # the database is probably empty if we got a stop from this - if gen_words[-1] == self.stop: + if gen_words[0] == self.stop: break + # new word 2 + key_hits = self._retrieve_chains_for_key(self.start2, new_chain_words[0], context_id) + new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size)) + if gen_words[1] != self.stop: + # two valid words, try for a third and check for "foo:" + + # new word 3 (which we may need below) + key_hits = self._retrieve_chains_for_key(new_chain_words[0], new_chain_words[1], context_id) + new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size)) + + # if the first word is "foo:", start with the second + addressing_suffixes = [':', ','] + if gen_words[0][-1] in addressing_suffixes: + gen_words += new_chain_words[1:] + self.log.debug("appending following anti-address " \ + "new_chain_words: {0:s}".format(new_chain_words[1:])) + else: + gen_words += new_chain_words[0:] + self.log.debug("appending following extended " \ + "new_chain_words: {0:s}".format(new_chain_words[0:])) else: - # new word 2 - key_hits = self._retrieve_chains_for_key(self.start2, gen_words[-1], context_id) - gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size)) + # well, we got one word out of this... let's go with it + # and let the loop check if we need more + self.log.debug("appending following short new_chain_words: {0:s}".format(new_chain_words)) + gen_words += new_chain_words # chop off the seed data at the start gen_words = gen_words[2:]