diff --git a/dr_botzo/markov/lib.py b/dr_botzo/markov/lib.py index a03bab6..12b794a 100644 --- a/dr_botzo/markov/lib.py +++ b/dr_botzo/markov/lib.py @@ -9,14 +9,16 @@ from markov.models import MarkovContext, MarkovState, MarkovTarget log = logging.getLogger('markov.lib') -def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3): +def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3, max_tries=5): """String multiple sentences together into a coherent sentence.""" tries = 0 sentences = 0 line = [] - while tries < 5: - line += generate_longish_sentence(context, topics=topics, max_words=max_words) + min_words_per_sentence = min_words / max_sentences + while tries < max_tries: + line += generate_longish_sentence(context, topics=topics, min_words=min_words_per_sentence, + max_words=max_words, max_tries=max_tries) sentences += 1 if sentences >= max_sentences: return line @@ -32,22 +34,23 @@ def generate_line(context, topics=None, min_words=15, max_words=30, max_sentence return line -def generate_longish_sentence(context, topics=None, min_words=4, max_words=30): +def generate_longish_sentence(context, topics=None, min_words=15, max_words=30, max_tries=5): """Generate a Markov chain, but throw away the short ones unless we get desperate.""" + sent = "" tries = 0 - while tries < 5: - sent = generate_sentence(context, topics=topics, max_words=max_words) + while tries < max_tries: + sent = generate_sentence(context, topics=topics, min_words=min_words, max_words=max_words) if len(sent) >= min_words: return sent tries += 1 # if we got here, we need to just give up - return generate_sentence(context) + return sent -def generate_sentence(context, topics=None, max_words=30): +def generate_sentence(context, topics=None, min_words=15, max_words=30): """Generate a Markov chain.""" words = [] @@ -64,18 +67,48 @@ def generate_sentence(context, topics=None, max_words=30): while len(words) <= max_words and words[0] != MarkovState._start2: log.debug("looking backwards for '{0:s}'".format(words[0])) new_states = MarkovState.objects.filter(context=context, v=words[0]) - words.insert(0, get_word_out_of_states(new_states, backwards=True)) + # if we find a start, use it + if MarkovState._start2 in new_states: + log.debug("found a start2 in the results, intentionally picking it") + words.insert(0, MarkovState._start2) + else: + words.insert(0, get_word_out_of_states(new_states, backwards=True)) + log.debug("picked %s", words[0]) - # if we didn't get topic stuff, we need to start (forwards) here + # if what we found is too long, abandon it, sadly + if len(words) > max_words: + log.debug("%s is too long, i'm going to give up on it", words) + words.clear() + + # if we didn't get topic stuff, we need to start (forwards) here, otherwise we use + # what we already put together (obviously) if len(words) == 0: words = [MarkovState._start1, MarkovState._start2] i = len(words) - while len(words) <= max_words and words[-1] != MarkovState._stop: + while words[-1] != MarkovState._stop: log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1])) new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1]) log.debug("states retrieved") - words.append(get_word_out_of_states(new_states)) + + # try to find states that are in our targets + if topics and len(topics): + target_hits = list(set(words).intersection(set(topics))) + else: + target_hits = [] + + # if we're over min_words, and got a stop naturally, use it + if len(words) > min_words and MarkovState._stop in new_states: + log.debug("found a stop in the results, intentionally picking it") + words.append(MarkovState._stop) + elif len(target_hits) > 0: + target_hit = random.choice(target_hits) + log.debug("found a topic hit %s, using it", target_hit) + topics.remove(target_hit) + words.append(target_hit) + else: + words.append(get_word_out_of_states(new_states)) + log.debug("picked %s", words[-1]) i += 1 words = [word for word in words if word not in