this shouldn't have happened, but i'm guessing some previous crash put some buggy data into my database, so let's just be careful and do this. a k1,k2 could have had any value for v, but not knowing what else to do in this corner case, we'll just use a stop and let the caller decide if they want to keep going
166 lines
5.5 KiB
166 lines
5.5 KiB
import logging
import random
from django.db.models import Sum
from markov.models import MarkovContext, MarkovState, MarkovTarget
log = logging.getLogger('markov.lib')
def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3):
"""String multiple sentences together into a coherent sentence."""
tries = 0
sentences = 0
line = []
while tries < 5:
line += generate_longish_sentence(context, topics=topics, max_words=max_words)
sentences += 1
if sentences >= max_sentences:
return line
if len(line) >= min_words:
return line
if line[-1][-1] not in [',', '.', '!']:
line[-1] += random.choice([',', '.', '!'])
tries += 1
# if we got here, we need to give up
return line
def generate_longish_sentence(context, topics=None, min_words=4, max_words=30):
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
tries = 0
while tries < 5:
sent = generate_sentence(context, topics=topics, max_words=max_words)
if len(sent) >= min_words:
return sent
tries += 1
# if we got here, we need to just give up
return generate_sentence(context)
def generate_sentence(context, topics=None, max_words=30):
"""Generate a Markov chain."""
words = []
# if we have topics, try to work from it and work backwards
if topics:
topic_word = random.choice(topics)
log.debug("looking for topic '{0:s}'".format(topic_word))
new_states = MarkovState.objects.filter(context=context, v=topic_word)
if len(new_states) > 0:
log.debug("found '{0:s}', starting backwards".format(topic_word))
words.insert(0, topic_word)
while len(words) <= max_words and words[0] != MarkovState._start2:
log.debug("looking backwards for '{0:s}'".format(words[0]))
new_states = MarkovState.objects.filter(context=context, v=words[0])
words.insert(0, get_word_out_of_states(new_states, backwards=True))
# if we didn't get topic stuff, we need to start (forwards) here
if len(words) == 0:
words = [MarkovState._start1, MarkovState._start2]
i = len(words)
while len(words) <= max_words and words[-1] != MarkovState._stop:
log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
log.debug("states retrieved")
i += 1
words = [word for word in words if word not in
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
return words
def get_or_create_target_context(target_name):
"""Return the context for a provided nick/channel, creating missing ones."""
target_name = target_name.lower()
# find the stuff, or create it
target = MarkovTarget.objects.get(name=target_name)
except MarkovTarget.DoesNotExist:
# we need to create a context and a target, and we have to make the context first
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target, c = MarkovTarget.objects.get_or_create(name=target_name, context=context)
return target.context
return target.context
except MarkovContext.DoesNotExist:
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target.context = context
return target.context
def get_word_out_of_states(states, backwards=False):
"""Pick one random word out of the given states."""
# work around possible broken data, where a k1,k2 should have a value but doesn't
if len(states) == 0:
states = MarkovState.objects.filter(v=MarkovState._stop)
new_word = ''
running = 0
count_sum = states.aggregate(Sum('count'))['count__sum']
hit = random.randint(0, count_sum)
log.debug("sum: {0:d} hit: {1:d}".format(count_sum, hit))
states_itr = states.iterator()
for state in states_itr:
running += state.count
if running >= hit:
if backwards:
new_word = state.k2
new_word = state.v
log.debug("found '{0:s}'".format(new_word))
return new_word
def learn_line(line, context):
"""Create a bunch of MarkovStates for a given line of text."""
log.debug("learning %s...", line[:40])
words = line.split()
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
for word in words:
if len(word) > MarkovState._meta.get_field('k1').max_length:
for i, word in enumerate(words):
log.debug("'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
state, created = MarkovState.objects.get_or_create(context=context,
state.count += 1
if i > len(words) - 4: