markov: tweaks to the sentence generator

this tunes things a bit in the sentence generator, trying to favor
complete sentences over the min/max word counts, which are still kinda
heeded but not as militantly. this *should* create more interesting
chains, especially with topics, without really breaking things, but this
certainly needs some testing before we can see if it's actually right
This commit is contained in:
Brian S. Stephan 2016-06-30 22:21:02 -05:00
parent 70032dc42b
commit 897f29c8d4
1 changed files with 45 additions and 12 deletions

View File

@ -9,14 +9,16 @@ from markov.models import MarkovContext, MarkovState, MarkovTarget
log = logging.getLogger('markov.lib')
def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3):
def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3, max_tries=5):
"""String multiple sentences together into a coherent sentence."""
tries = 0
sentences = 0
line = []
while tries < 5:
line += generate_longish_sentence(context, topics=topics, max_words=max_words)
min_words_per_sentence = min_words / max_sentences
while tries < max_tries:
line += generate_longish_sentence(context, topics=topics, min_words=min_words_per_sentence,
max_words=max_words, max_tries=max_tries)
sentences += 1
if sentences >= max_sentences:
return line
@ -32,22 +34,23 @@ def generate_line(context, topics=None, min_words=15, max_words=30, max_sentence
return line
def generate_longish_sentence(context, topics=None, min_words=4, max_words=30):
def generate_longish_sentence(context, topics=None, min_words=15, max_words=30, max_tries=5):
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
sent = ""
tries = 0
while tries < 5:
sent = generate_sentence(context, topics=topics, max_words=max_words)
while tries < max_tries:
sent = generate_sentence(context, topics=topics, min_words=min_words, max_words=max_words)
if len(sent) >= min_words:
return sent
tries += 1
# if we got here, we need to just give up
return generate_sentence(context)
return sent
def generate_sentence(context, topics=None, max_words=30):
def generate_sentence(context, topics=None, min_words=15, max_words=30):
"""Generate a Markov chain."""
words = []
@ -64,18 +67,48 @@ def generate_sentence(context, topics=None, max_words=30):
while len(words) <= max_words and words[0] != MarkovState._start2:
log.debug("looking backwards for '{0:s}'".format(words[0]))
new_states = MarkovState.objects.filter(context=context, v=words[0])
words.insert(0, get_word_out_of_states(new_states, backwards=True))
# if we find a start, use it
if MarkovState._start2 in new_states:
log.debug("found a start2 in the results, intentionally picking it")
words.insert(0, MarkovState._start2)
else:
words.insert(0, get_word_out_of_states(new_states, backwards=True))
log.debug("picked %s", words[0])
# if we didn't get topic stuff, we need to start (forwards) here
# if what we found is too long, abandon it, sadly
if len(words) > max_words:
log.debug("%s is too long, i'm going to give up on it", words)
words.clear()
# if we didn't get topic stuff, we need to start (forwards) here, otherwise we use
# what we already put together (obviously)
if len(words) == 0:
words = [MarkovState._start1, MarkovState._start2]
i = len(words)
while len(words) <= max_words and words[-1] != MarkovState._stop:
while words[-1] != MarkovState._stop:
log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
log.debug("states retrieved")
words.append(get_word_out_of_states(new_states))
# try to find states that are in our targets
if topics and len(topics):
target_hits = list(set(words).intersection(set(topics)))
else:
target_hits = []
# if we're over min_words, and got a stop naturally, use it
if len(words) > min_words and MarkovState._stop in new_states:
log.debug("found a stop in the results, intentionally picking it")
words.append(MarkovState._stop)
elif len(target_hits) > 0:
target_hit = random.choice(target_hits)
log.debug("found a topic hit %s, using it", target_hit)
topics.remove(target_hit)
words.append(target_hit)
else:
words.append(get_word_out_of_states(new_states))
log.debug("picked %s", words[-1])
i += 1
words = [word for word in words if word not in