From c371df5eb0e16d05c7428b5e046d2ea284fb32b5 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Fri, 15 May 2015 08:36:17 -0500 Subject: [PATCH] markov: move shared methods into lib.py --- dr_botzo/markov/lib.py | 150 +++++++++++++++++++++++++++++++++++++++ dr_botzo/markov/views.py | 136 ++--------------------------------- ircbot/modules/Markov.py | 70 +++++------------- 3 files changed, 172 insertions(+), 184 deletions(-) create mode 100644 dr_botzo/markov/lib.py diff --git a/dr_botzo/markov/lib.py b/dr_botzo/markov/lib.py new file mode 100644 index 0000000..6c8e310 --- /dev/null +++ b/dr_botzo/markov/lib.py @@ -0,0 +1,150 @@ +import logging +import random + +from django.db.models import Sum + +from markov.models import MarkovContext, MarkovState, MarkovTarget + + +log = logging.getLogger('markov.lib') + + +def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3): + """String multiple sentences together into a coherent sentence.""" + + tries = 0 + sentences = 0 + line = [] + while tries < 5: + line += generate_longish_sentence(context, topics=topics, max_words=max_words) + sentences += 1 + if sentences >= max_sentences: + return line + if len(line) >= min_words: + return line + else: + if line[-1][-1] not in [',', '.', '!']: + line[-1] += random.choice([',', '.', '!']) + + tries += 1 + + # if we got here, we need to give up + return line + + +def generate_longish_sentence(context, topics=None, min_words=4, max_words=30): + """Generate a Markov chain, but throw away the short ones unless we get desperate.""" + + tries = 0 + while tries < 5: + sent = generate_sentence(context, topics=topics, max_words=max_words) + if len(sent) >= min_words: + return sent + + tries += 1 + + # if we got here, we need to just give up + return generate_sentence(context) + + +def generate_sentence(context, topics=None, max_words=30): + """Generate a Markov chain.""" + + words = [] + # if we have topics, try to work from it and work backwards + if topics: + topic_word = random.choice(topics) + topics.remove(topic_word) + log.debug(u"looking for topic '{0:s}'".format(topic_word)) + new_states = MarkovState.objects.filter(context=context, v=topic_word) + + if len(new_states) > 0: + log.debug(u"found '{0:s}', starting backwards".format(topic_word)) + words.insert(0, topic_word) + while len(words) <= max_words and words[0] != MarkovState._start2: + log.debug(u"looking backwards for '{0:s}'".format(words[0])) + new_states = MarkovState.objects.filter(context=context, v=words[0]) + words.insert(0, get_word_out_of_states(new_states, backwards=True)) + + # if we didn't get topic stuff, we need to start (forwards) here + if len(words) == 0: + words = [MarkovState._start1, MarkovState._start2] + + i = len(words) + while len(words) <= max_words and words[-1] != MarkovState._stop: + log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1])) + new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1]) + log.debug(u"states retrieved") + words.append(get_word_out_of_states(new_states)) + i += 1 + + words = [word for word in words if word not in + (MarkovState._start1, MarkovState._start2, MarkovState._stop)] + + return words + + +def get_or_create_target_context(target_name): + """Return the context for a provided nick/channel, creating missing ones.""" + + # find the stuff, or create it + target, c = MarkovTarget.objects.get_or_create(name=target_name) + try: + return target.context + except MarkovContext.DoesNotExist: + # make a context --- lacking a good idea, just create one with this target name until configured otherwise + context, c = MarkovContext.objects.get_or_create(name=target_name) + target.context = context + target.save() + + return target.context + + +def get_word_out_of_states(states, backwards=False): + """Pick one random word out of the given states.""" + + new_word = '' + running = 0 + count_sum = states.aggregate(Sum('count'))['count__sum'] + hit = random.randint(0, count_sum) + + log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit)) + + states_itr = states.iterator() + for state in states_itr: + running += state.count + if running >= hit: + if backwards: + new_word = state.k2 + else: + new_word = state.v + + break + + log.debug(u"found '{0:s}'".format(new_word)) + return new_word + + +def learn_line(line, context): + """Create a bunch of MarkovStates for a given line of text.""" + + log.debug(u"learning %s...", line[:40]) + + words = line.split() + words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop] + + for word in words: + if len(word) > MarkovState._meta.get_field('k1').max_length: + return + + for i, word in enumerate(words): + log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2])) + state, created = MarkovState.objects.get_or_create(context=context, + k1=words[i], + k2=words[i+1], + v=words[i+2]) + state.count += 1 + state.save() + + if i > len(words) - 4: + break diff --git a/dr_botzo/markov/views.py b/dr_botzo/markov/views.py index 13b283a..66417d0 100644 --- a/dr_botzo/markov/views.py +++ b/dr_botzo/markov/views.py @@ -1,16 +1,15 @@ """Manipulate Markov data via the Django site.""" import logging -import random import time from django.contrib.auth.decorators import permission_required -from django.db.models import Sum from django.http import HttpResponse from django.shortcuts import get_object_or_404, render from markov.forms import LogUploadForm, TeachLineForm -from markov.models import MarkovContext, MarkovTarget, MarkovState +import markov.lib as markovlib +from markov.models import MarkovContext log = logging.getLogger('markov.views') @@ -27,7 +26,7 @@ def context_index(request, context_id): start_t = time.time() context = get_object_or_404(MarkovContext, pk=context_id) - chain = u" ".join(_generate_line(context)) + chain = u" ".join(markovlib.generate_line(context)) end_t = time.time() return render(request, 'context.html', {'chain': chain, @@ -66,7 +65,7 @@ def import_file(request): # this is a line we probably care about now what = [x for x in what.rstrip().split(' ') if x not in strips] - _learn_line(' '.join(what), context) + markovlib.learn_line(' '.join(what), context) log.debug(set(whos)) else: @@ -86,133 +85,8 @@ def teach_line(request): context = form.cleaned_data['context'] strips = form.cleaned_data['strip_prefixes'].split(' ') what = [x for x in line.rstrip().split(' ') if x not in strips] - _learn_line(' '.join(what), context) + markovlib.learn_line(' '.join(what), context) else: form = TeachLineForm() return render(request, 'teach_line.html', {'form': form}) - - -def _generate_sentence(context, topics=None, max_words=30): - """Generate a Markov chain.""" - - words = [] - # if we have topics, try to work from it and work backwards - if topics: - topic_word = random.choice(topics) - topics.remove(topic_word) - log.debug(u"looking for topic '{0:s}'".format(topic_word)) - new_states = MarkovState.objects.filter(context=context, v=topic_word) - - if len(new_states) > 0: - log.debug(u"found '{0:s}', starting backwards".format(topic_word)) - words.insert(0, topic_word) - while len(words) <= max_words and words[0] != MarkovState._start2: - log.debug(u"looking backwards for '{0:s}'".format(words[0])) - new_states = MarkovState.objects.filter(context=context, v=words[0]) - words.insert(0, _get_word_out_of_states(new_states, backwards=True)) - - # if we didn't get topic stuff, we need to start (forwards) here - if len(words) == 0: - words = [MarkovState._start1, MarkovState._start2] - - i = len(words) - while len(words) <= max_words and words[-1] != MarkovState._stop: - log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1])) - new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1]) - log.debug(u"states retrieved") - words.append(_get_word_out_of_states(new_states)) - i += 1 - - words = [word for word in words if word not in - (MarkovState._start1, MarkovState._start2, MarkovState._stop)] - - return words - - -def _generate_longish_sentence(context, topics=None, min_words=4, max_words=30): - """Generate a Markov chain, but throw away the short ones unless we get desperate.""" - - tries = 0 - while tries < 5: - sent = _generate_sentence(context, topics=topics, max_words=max_words) - if len(sent) >= min_words: - return sent - - tries += 1 - - # if we got here, we need to just give up - return _generate_sentence(context) - - -def _generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3): - """String multiple sentences together into a coherent sentence.""" - - tries = 0 - sentences = 0 - line = [] - while tries < 5: - line += _generate_longish_sentence(context, topics=topics, max_words=max_words) - sentences += 1 - if sentences >= max_sentences: - return line - if len(line) >= min_words: - return line - else: - if line[-1][-1] not in [',', '.', '!']: - line[-1] += random.choice([',', '.', '!']) - - tries += 1 - - # if we got here, we need to give up - return line - - -def _get_word_out_of_states(states, backwards=False): - """Pick one random word out of the given states.""" - - new_word = '' - running = 0 - count_sum = states.aggregate(Sum('count'))['count__sum'] - hit = random.randint(0, count_sum) - - log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit)) - - states_itr = states.iterator() - for state in states_itr: - running += state.count - if running >= hit: - if backwards: - new_word = state.k2 - else: - new_word = state.v - - break - - log.debug(u"found '{0:s}'".format(new_word)) - return new_word - - -def _learn_line(line, context): - """Create a bunch of MarkovStates for a given line of text.""" - - log.debug(u"learning {0:.40s}...".format(line)) - - words = line.split() - words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop] - - for word in words: - if len(word) > MarkovState._meta.get_field('k1').max_length: - return - - for i, word in enumerate(words): - log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2])) - state, created = MarkovState.objects.get_or_create(context=context, - k1=words[i], - k2=words[i+1], - v=words[i+2]) - state.count += 1 - state.save() - - if i > len(words) - 4: - break diff --git a/ircbot/modules/Markov.py b/ircbot/modules/Markov.py index beb9dc8..d92620b 100644 --- a/ircbot/modules/Markov.py +++ b/ircbot/modules/Markov.py @@ -25,8 +25,8 @@ import time from dateutil.relativedelta import relativedelta +import markov.lib as markovlib from markov.models import MarkovContext, MarkovState, MarkovTarget -from markov.views import _generate_line, _learn_line from extlib import irclib @@ -66,10 +66,6 @@ class Markov(Module): self.next_chatter_check = 0 thread.start_new_thread(self.thread_do, ()) - # TODO: bring this back somehow - #irc.xmlrpc_register_function(self._generate_line, - # "markov_generate_line") - def register_handlers(self): """Handle pubmsg/privmsg, to learn and/or reply to IRC events.""" @@ -111,8 +107,8 @@ class Markov(Module): return if not event._recursing: - context = _get_or_create_target_context(target) - _learn_line(what, context) + context = markovlib.get_or_create_target_context(target) + markovlib.learn_line(what, context) def do(self, connection, event, nick, userhost, what, admin_unlocked): """Handle commands and inputs.""" @@ -129,7 +125,7 @@ class Markov(Module): if not self.shut_up: # not a command, so see if i'm being mentioned if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None: - context = _get_or_create_target_context(target) + context = markovlib.get_or_create_target_context(target) addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)' addressed_re = re.compile(addressed_pattern) @@ -140,15 +136,15 @@ class Markov(Module): self.lines_seen.append(('.self.said.', datetime.now())) return self.irc.reply(event, u"{0:s}: {1:s}".format(nick, - u" ".join(_generate_line(context, topics=topics, max_sentences=1)))) + u" ".join(markovlib.generate_line(context, topics=topics, max_sentences=1)))) else: # i wasn't addressed directly, so just respond topics = [x for x in what.split(' ') if len(x) >= 3] self.lines_seen.append(('.self.said.', datetime.now())) - return self.irc.reply(event, u"{0:s}".format(u" ".join(_generate_line(context, - topics=topics, - max_sentences=1)))) + return self.irc.reply(event, u"{0:s}".format(u" ".join(markovlib.generate_line(context, + topics=topics, + max_sentences=1)))) def markov_learn(self, event, nick, userhost, what, admin_unlocked): """Learn one line, as provided to the command.""" @@ -161,8 +157,8 @@ class Markov(Module): match = self.learnre.search(what) if match: line = match.group(1) - context = _get_or_create_target_context(target) - _learn_line(line, context) + context = markovlib.get_or_create_target_context(target) + markovlib.learn_line(line, context) # return what was learned, for weird chaining purposes return line @@ -179,7 +175,7 @@ class Markov(Module): if match: min_size = 15 max_size = 30 - context = _get_or_create_target_context(target) + context = markovlib.get_or_create_target_context(target) if match.group(2): min_size = int(match.group(2)) @@ -191,14 +187,14 @@ class Markov(Module): topics = [x for x in line.split(' ') if len(x) >= 3] self.lines_seen.append(('.self.said.', datetime.now())) - return u" ".join(_generate_line(context, topics=topics, - min_words=min_size, max_words=max_size, - max_sentences=1)) + return u" ".join(markovlib.generate_line(context, topics=topics, + min_words=min_size, max_words=max_size, + max_sentences=1)) else: self.lines_seen.append(('.self.said.', datetime.now())) - return u" ".join(_generate_line(context, min_words=min_size, - max_words=max_size, - max_sentences=1)) + return u" ".join(markovlib.generate_line(context, min_words=min_size, + max_words=max_size, + max_sentences=1)) def thread_do(self): """Do various things.""" @@ -237,35 +233,3 @@ class Markov(Module): for t in targets: self.sendmsg(t['target'], 'shutting up for 30 seconds due to last 30 seconds of activity') - -def _get_or_create_target_context(target_name): - """Return the context for a provided nick/channel, creating missing ones.""" - - # find the stuff, or create it - try: - target = MarkovTarget.objects.get(name=target_name) - return target.context - except MarkovContext.DoesNotExist: - # make a context - context = MarkovContext() - context.name = target_name - context.save() - - target.context = context - target.save() - - return target.context - except MarkovTarget.DoesNotExist: - # first we need to make a context for this - context = MarkovContext() - context.name = target_name - context.save() - - target = MarkovTarget() - target.name = target_name - target.context = context - target.save() - - return target.context - -# vi:tabstop=4:expandtab:autoindent