Markov: try to avoid "nick:" starts to extra chaining
when starting another sentence because the main one is too short, do a bit of work in an attempt to avoid "nick: blah" starts, since they're fairly common. instead we just ignore nick: and start with "blah blah"
This commit is contained in:
parent
ad1de23a7c
commit
26ec854c67
@ -414,17 +414,38 @@ class Markov(Module):
|
||||
gen_words[-1] = sentence_end + eos_punctuation.pop()
|
||||
self.log.debug("monkeyed with end of sentence, it's now: {0:s}".format(gen_words[:-1]))
|
||||
|
||||
new_chain_words = []
|
||||
# new word 1
|
||||
key_hits = self._retrieve_chains_for_key(self.start1, self.start2, context_id)
|
||||
gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||
|
||||
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||
# the database is probably empty if we got a stop from this
|
||||
if gen_words[-1] == self.stop:
|
||||
if gen_words[0] == self.stop:
|
||||
break
|
||||
# new word 2
|
||||
key_hits = self._retrieve_chains_for_key(self.start2, new_chain_words[0], context_id)
|
||||
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||
if gen_words[1] != self.stop:
|
||||
# two valid words, try for a third and check for "foo:"
|
||||
|
||||
# new word 3 (which we may need below)
|
||||
key_hits = self._retrieve_chains_for_key(new_chain_words[0], new_chain_words[1], context_id)
|
||||
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||
|
||||
# if the first word is "foo:", start with the second
|
||||
addressing_suffixes = [':', ',']
|
||||
if gen_words[0][-1] in addressing_suffixes:
|
||||
gen_words += new_chain_words[1:]
|
||||
self.log.debug("appending following anti-address " \
|
||||
"new_chain_words: {0:s}".format(new_chain_words[1:]))
|
||||
else:
|
||||
gen_words += new_chain_words[0:]
|
||||
self.log.debug("appending following extended " \
|
||||
"new_chain_words: {0:s}".format(new_chain_words[0:]))
|
||||
else:
|
||||
# new word 2
|
||||
key_hits = self._retrieve_chains_for_key(self.start2, gen_words[-1], context_id)
|
||||
gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||
# well, we got one word out of this... let's go with it
|
||||
# and let the loop check if we need more
|
||||
self.log.debug("appending following short new_chain_words: {0:s}".format(new_chain_words))
|
||||
gen_words += new_chain_words
|
||||
|
||||
# chop off the seed data at the start
|
||||
gen_words = gen_words[2:]
|
||||
|
Loading…
Reference in New Issue
Block a user