Markov: try to avoid "nick:" starts to extra chaining

when starting another sentence because the main one is too short,
do a bit of work in an attempt to avoid "nick: blah" starts, since
they're fairly common. instead we just ignore nick: and start with
"blah blah"
This commit is contained in:
Brian S. Stephan 2012-07-29 15:43:15 -05:00
parent ad1de23a7c
commit 26ec854c67
1 changed files with 27 additions and 6 deletions

View File

@ -414,17 +414,38 @@ class Markov(Module):
gen_words[-1] = sentence_end + eos_punctuation.pop()
self.log.debug("monkeyed with end of sentence, it's now: {0:s}".format(gen_words[:-1]))
new_chain_words = []
# new word 1
key_hits = self._retrieve_chains_for_key(self.start1, self.start2, context_id)
gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
# the database is probably empty if we got a stop from this
if gen_words[-1] == self.stop:
if gen_words[0] == self.stop:
break
# new word 2
key_hits = self._retrieve_chains_for_key(self.start2, new_chain_words[0], context_id)
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
if gen_words[1] != self.stop:
# two valid words, try for a third and check for "foo:"
# new word 3 (which we may need below)
key_hits = self._retrieve_chains_for_key(new_chain_words[0], new_chain_words[1], context_id)
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
# if the first word is "foo:", start with the second
addressing_suffixes = [':', ',']
if gen_words[0][-1] in addressing_suffixes:
gen_words += new_chain_words[1:]
self.log.debug("appending following anti-address " \
"new_chain_words: {0:s}".format(new_chain_words[1:]))
else:
gen_words += new_chain_words[0:]
self.log.debug("appending following extended " \
"new_chain_words: {0:s}".format(new_chain_words[0:]))
else:
# new word 2
key_hits = self._retrieve_chains_for_key(self.start2, gen_words[-1], context_id)
gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
# well, we got one word out of this... let's go with it
# and let the loop check if we need more
self.log.debug("appending following short new_chain_words: {0:s}".format(new_chain_words))
gen_words += new_chain_words
# chop off the seed data at the start
gen_words = gen_words[2:]