Markov: try to avoid "nick:" starts to extra chaining
when starting another sentence because the main one is too short, do a bit of work in an attempt to avoid "nick: blah" starts, since they're fairly common. instead we just ignore nick: and start with "blah blah"
This commit is contained in:
parent
ad1de23a7c
commit
26ec854c67
|
@ -414,17 +414,38 @@ class Markov(Module):
|
||||||
gen_words[-1] = sentence_end + eos_punctuation.pop()
|
gen_words[-1] = sentence_end + eos_punctuation.pop()
|
||||||
self.log.debug("monkeyed with end of sentence, it's now: {0:s}".format(gen_words[:-1]))
|
self.log.debug("monkeyed with end of sentence, it's now: {0:s}".format(gen_words[:-1]))
|
||||||
|
|
||||||
|
new_chain_words = []
|
||||||
# new word 1
|
# new word 1
|
||||||
key_hits = self._retrieve_chains_for_key(self.start1, self.start2, context_id)
|
key_hits = self._retrieve_chains_for_key(self.start1, self.start2, context_id)
|
||||||
gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||||
|
|
||||||
# the database is probably empty if we got a stop from this
|
# the database is probably empty if we got a stop from this
|
||||||
if gen_words[-1] == self.stop:
|
if gen_words[0] == self.stop:
|
||||||
break
|
break
|
||||||
|
# new word 2
|
||||||
|
key_hits = self._retrieve_chains_for_key(self.start2, new_chain_words[0], context_id)
|
||||||
|
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||||
|
if gen_words[1] != self.stop:
|
||||||
|
# two valid words, try for a third and check for "foo:"
|
||||||
|
|
||||||
|
# new word 3 (which we may need below)
|
||||||
|
key_hits = self._retrieve_chains_for_key(new_chain_words[0], new_chain_words[1], context_id)
|
||||||
|
new_chain_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
||||||
|
|
||||||
|
# if the first word is "foo:", start with the second
|
||||||
|
addressing_suffixes = [':', ',']
|
||||||
|
if gen_words[0][-1] in addressing_suffixes:
|
||||||
|
gen_words += new_chain_words[1:]
|
||||||
|
self.log.debug("appending following anti-address " \
|
||||||
|
"new_chain_words: {0:s}".format(new_chain_words[1:]))
|
||||||
|
else:
|
||||||
|
gen_words += new_chain_words[0:]
|
||||||
|
self.log.debug("appending following extended " \
|
||||||
|
"new_chain_words: {0:s}".format(new_chain_words[0:]))
|
||||||
else:
|
else:
|
||||||
# new word 2
|
# well, we got one word out of this... let's go with it
|
||||||
key_hits = self._retrieve_chains_for_key(self.start2, gen_words[-1], context_id)
|
# and let the loop check if we need more
|
||||||
gen_words.append(self._get_suitable_word_from_choices(key_hits, gen_words, min_size))
|
self.log.debug("appending following short new_chain_words: {0:s}".format(new_chain_words))
|
||||||
|
gen_words += new_chain_words
|
||||||
|
|
||||||
# chop off the seed data at the start
|
# chop off the seed data at the start
|
||||||
gen_words = gen_words[2:]
|
gen_words = gen_words[2:]
|
||||||
|
|
Loading…
Reference in New Issue