markov: tweaks to the sentence generator
this tunes things a bit in the sentence generator, trying to favor complete sentences over the min/max word counts, which are still kinda heeded but not as militantly. this *should* create more interesting chains, especially with topics, without really breaking things, but this certainly needs some testing before we can see if it's actually right
This commit is contained in:
parent
70032dc42b
commit
897f29c8d4
|
@ -9,14 +9,16 @@ from markov.models import MarkovContext, MarkovState, MarkovTarget
|
|||
log = logging.getLogger('markov.lib')
|
||||
|
||||
|
||||
def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3):
|
||||
def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3, max_tries=5):
|
||||
"""String multiple sentences together into a coherent sentence."""
|
||||
|
||||
tries = 0
|
||||
sentences = 0
|
||||
line = []
|
||||
while tries < 5:
|
||||
line += generate_longish_sentence(context, topics=topics, max_words=max_words)
|
||||
min_words_per_sentence = min_words / max_sentences
|
||||
while tries < max_tries:
|
||||
line += generate_longish_sentence(context, topics=topics, min_words=min_words_per_sentence,
|
||||
max_words=max_words, max_tries=max_tries)
|
||||
sentences += 1
|
||||
if sentences >= max_sentences:
|
||||
return line
|
||||
|
@ -32,22 +34,23 @@ def generate_line(context, topics=None, min_words=15, max_words=30, max_sentence
|
|||
return line
|
||||
|
||||
|
||||
def generate_longish_sentence(context, topics=None, min_words=4, max_words=30):
|
||||
def generate_longish_sentence(context, topics=None, min_words=15, max_words=30, max_tries=5):
|
||||
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
|
||||
|
||||
sent = ""
|
||||
tries = 0
|
||||
while tries < 5:
|
||||
sent = generate_sentence(context, topics=topics, max_words=max_words)
|
||||
while tries < max_tries:
|
||||
sent = generate_sentence(context, topics=topics, min_words=min_words, max_words=max_words)
|
||||
if len(sent) >= min_words:
|
||||
return sent
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to just give up
|
||||
return generate_sentence(context)
|
||||
return sent
|
||||
|
||||
|
||||
def generate_sentence(context, topics=None, max_words=30):
|
||||
def generate_sentence(context, topics=None, min_words=15, max_words=30):
|
||||
"""Generate a Markov chain."""
|
||||
|
||||
words = []
|
||||
|
@ -64,18 +67,48 @@ def generate_sentence(context, topics=None, max_words=30):
|
|||
while len(words) <= max_words and words[0] != MarkovState._start2:
|
||||
log.debug("looking backwards for '{0:s}'".format(words[0]))
|
||||
new_states = MarkovState.objects.filter(context=context, v=words[0])
|
||||
words.insert(0, get_word_out_of_states(new_states, backwards=True))
|
||||
# if we find a start, use it
|
||||
if MarkovState._start2 in new_states:
|
||||
log.debug("found a start2 in the results, intentionally picking it")
|
||||
words.insert(0, MarkovState._start2)
|
||||
else:
|
||||
words.insert(0, get_word_out_of_states(new_states, backwards=True))
|
||||
log.debug("picked %s", words[0])
|
||||
|
||||
# if we didn't get topic stuff, we need to start (forwards) here
|
||||
# if what we found is too long, abandon it, sadly
|
||||
if len(words) > max_words:
|
||||
log.debug("%s is too long, i'm going to give up on it", words)
|
||||
words.clear()
|
||||
|
||||
# if we didn't get topic stuff, we need to start (forwards) here, otherwise we use
|
||||
# what we already put together (obviously)
|
||||
if len(words) == 0:
|
||||
words = [MarkovState._start1, MarkovState._start2]
|
||||
|
||||
i = len(words)
|
||||
while len(words) <= max_words and words[-1] != MarkovState._stop:
|
||||
while words[-1] != MarkovState._stop:
|
||||
log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
|
||||
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
|
||||
log.debug("states retrieved")
|
||||
words.append(get_word_out_of_states(new_states))
|
||||
|
||||
# try to find states that are in our targets
|
||||
if topics and len(topics):
|
||||
target_hits = list(set(words).intersection(set(topics)))
|
||||
else:
|
||||
target_hits = []
|
||||
|
||||
# if we're over min_words, and got a stop naturally, use it
|
||||
if len(words) > min_words and MarkovState._stop in new_states:
|
||||
log.debug("found a stop in the results, intentionally picking it")
|
||||
words.append(MarkovState._stop)
|
||||
elif len(target_hits) > 0:
|
||||
target_hit = random.choice(target_hits)
|
||||
log.debug("found a topic hit %s, using it", target_hit)
|
||||
topics.remove(target_hit)
|
||||
words.append(target_hit)
|
||||
else:
|
||||
words.append(get_word_out_of_states(new_states))
|
||||
log.debug("picked %s", words[-1])
|
||||
i += 1
|
||||
|
||||
words = [word for word in words if word not in
|
||||
|
|
Loading…
Reference in New Issue