markov: move shared methods into lib.py
This commit is contained in:
parent
1570bc28f9
commit
c371df5eb0
150
dr_botzo/markov/lib.py
Normal file
150
dr_botzo/markov/lib.py
Normal file
@ -0,0 +1,150 @@
|
||||
import logging
|
||||
import random
|
||||
|
||||
from django.db.models import Sum
|
||||
|
||||
from markov.models import MarkovContext, MarkovState, MarkovTarget
|
||||
|
||||
|
||||
log = logging.getLogger('markov.lib')
|
||||
|
||||
|
||||
def generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3):
|
||||
"""String multiple sentences together into a coherent sentence."""
|
||||
|
||||
tries = 0
|
||||
sentences = 0
|
||||
line = []
|
||||
while tries < 5:
|
||||
line += generate_longish_sentence(context, topics=topics, max_words=max_words)
|
||||
sentences += 1
|
||||
if sentences >= max_sentences:
|
||||
return line
|
||||
if len(line) >= min_words:
|
||||
return line
|
||||
else:
|
||||
if line[-1][-1] not in [',', '.', '!']:
|
||||
line[-1] += random.choice([',', '.', '!'])
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to give up
|
||||
return line
|
||||
|
||||
|
||||
def generate_longish_sentence(context, topics=None, min_words=4, max_words=30):
|
||||
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
|
||||
|
||||
tries = 0
|
||||
while tries < 5:
|
||||
sent = generate_sentence(context, topics=topics, max_words=max_words)
|
||||
if len(sent) >= min_words:
|
||||
return sent
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to just give up
|
||||
return generate_sentence(context)
|
||||
|
||||
|
||||
def generate_sentence(context, topics=None, max_words=30):
|
||||
"""Generate a Markov chain."""
|
||||
|
||||
words = []
|
||||
# if we have topics, try to work from it and work backwards
|
||||
if topics:
|
||||
topic_word = random.choice(topics)
|
||||
topics.remove(topic_word)
|
||||
log.debug(u"looking for topic '{0:s}'".format(topic_word))
|
||||
new_states = MarkovState.objects.filter(context=context, v=topic_word)
|
||||
|
||||
if len(new_states) > 0:
|
||||
log.debug(u"found '{0:s}', starting backwards".format(topic_word))
|
||||
words.insert(0, topic_word)
|
||||
while len(words) <= max_words and words[0] != MarkovState._start2:
|
||||
log.debug(u"looking backwards for '{0:s}'".format(words[0]))
|
||||
new_states = MarkovState.objects.filter(context=context, v=words[0])
|
||||
words.insert(0, get_word_out_of_states(new_states, backwards=True))
|
||||
|
||||
# if we didn't get topic stuff, we need to start (forwards) here
|
||||
if len(words) == 0:
|
||||
words = [MarkovState._start1, MarkovState._start2]
|
||||
|
||||
i = len(words)
|
||||
while len(words) <= max_words and words[-1] != MarkovState._stop:
|
||||
log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
|
||||
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
|
||||
log.debug(u"states retrieved")
|
||||
words.append(get_word_out_of_states(new_states))
|
||||
i += 1
|
||||
|
||||
words = [word for word in words if word not in
|
||||
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
|
||||
|
||||
return words
|
||||
|
||||
|
||||
def get_or_create_target_context(target_name):
|
||||
"""Return the context for a provided nick/channel, creating missing ones."""
|
||||
|
||||
# find the stuff, or create it
|
||||
target, c = MarkovTarget.objects.get_or_create(name=target_name)
|
||||
try:
|
||||
return target.context
|
||||
except MarkovContext.DoesNotExist:
|
||||
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
|
||||
context, c = MarkovContext.objects.get_or_create(name=target_name)
|
||||
target.context = context
|
||||
target.save()
|
||||
|
||||
return target.context
|
||||
|
||||
|
||||
def get_word_out_of_states(states, backwards=False):
|
||||
"""Pick one random word out of the given states."""
|
||||
|
||||
new_word = ''
|
||||
running = 0
|
||||
count_sum = states.aggregate(Sum('count'))['count__sum']
|
||||
hit = random.randint(0, count_sum)
|
||||
|
||||
log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit))
|
||||
|
||||
states_itr = states.iterator()
|
||||
for state in states_itr:
|
||||
running += state.count
|
||||
if running >= hit:
|
||||
if backwards:
|
||||
new_word = state.k2
|
||||
else:
|
||||
new_word = state.v
|
||||
|
||||
break
|
||||
|
||||
log.debug(u"found '{0:s}'".format(new_word))
|
||||
return new_word
|
||||
|
||||
|
||||
def learn_line(line, context):
|
||||
"""Create a bunch of MarkovStates for a given line of text."""
|
||||
|
||||
log.debug(u"learning %s...", line[:40])
|
||||
|
||||
words = line.split()
|
||||
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
|
||||
|
||||
for word in words:
|
||||
if len(word) > MarkovState._meta.get_field('k1').max_length:
|
||||
return
|
||||
|
||||
for i, word in enumerate(words):
|
||||
log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
|
||||
state, created = MarkovState.objects.get_or_create(context=context,
|
||||
k1=words[i],
|
||||
k2=words[i+1],
|
||||
v=words[i+2])
|
||||
state.count += 1
|
||||
state.save()
|
||||
|
||||
if i > len(words) - 4:
|
||||
break
|
@ -1,16 +1,15 @@
|
||||
"""Manipulate Markov data via the Django site."""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
from django.contrib.auth.decorators import permission_required
|
||||
from django.db.models import Sum
|
||||
from django.http import HttpResponse
|
||||
from django.shortcuts import get_object_or_404, render
|
||||
|
||||
from markov.forms import LogUploadForm, TeachLineForm
|
||||
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
||||
import markov.lib as markovlib
|
||||
from markov.models import MarkovContext
|
||||
|
||||
|
||||
log = logging.getLogger('markov.views')
|
||||
@ -27,7 +26,7 @@ def context_index(request, context_id):
|
||||
|
||||
start_t = time.time()
|
||||
context = get_object_or_404(MarkovContext, pk=context_id)
|
||||
chain = u" ".join(_generate_line(context))
|
||||
chain = u" ".join(markovlib.generate_line(context))
|
||||
end_t = time.time()
|
||||
|
||||
return render(request, 'context.html', {'chain': chain,
|
||||
@ -66,7 +65,7 @@ def import_file(request):
|
||||
|
||||
# this is a line we probably care about now
|
||||
what = [x for x in what.rstrip().split(' ') if x not in strips]
|
||||
_learn_line(' '.join(what), context)
|
||||
markovlib.learn_line(' '.join(what), context)
|
||||
|
||||
log.debug(set(whos))
|
||||
else:
|
||||
@ -86,133 +85,8 @@ def teach_line(request):
|
||||
context = form.cleaned_data['context']
|
||||
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
||||
what = [x for x in line.rstrip().split(' ') if x not in strips]
|
||||
_learn_line(' '.join(what), context)
|
||||
markovlib.learn_line(' '.join(what), context)
|
||||
else:
|
||||
form = TeachLineForm()
|
||||
|
||||
return render(request, 'teach_line.html', {'form': form})
|
||||
|
||||
|
||||
def _generate_sentence(context, topics=None, max_words=30):
|
||||
"""Generate a Markov chain."""
|
||||
|
||||
words = []
|
||||
# if we have topics, try to work from it and work backwards
|
||||
if topics:
|
||||
topic_word = random.choice(topics)
|
||||
topics.remove(topic_word)
|
||||
log.debug(u"looking for topic '{0:s}'".format(topic_word))
|
||||
new_states = MarkovState.objects.filter(context=context, v=topic_word)
|
||||
|
||||
if len(new_states) > 0:
|
||||
log.debug(u"found '{0:s}', starting backwards".format(topic_word))
|
||||
words.insert(0, topic_word)
|
||||
while len(words) <= max_words and words[0] != MarkovState._start2:
|
||||
log.debug(u"looking backwards for '{0:s}'".format(words[0]))
|
||||
new_states = MarkovState.objects.filter(context=context, v=words[0])
|
||||
words.insert(0, _get_word_out_of_states(new_states, backwards=True))
|
||||
|
||||
# if we didn't get topic stuff, we need to start (forwards) here
|
||||
if len(words) == 0:
|
||||
words = [MarkovState._start1, MarkovState._start2]
|
||||
|
||||
i = len(words)
|
||||
while len(words) <= max_words and words[-1] != MarkovState._stop:
|
||||
log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
|
||||
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
|
||||
log.debug(u"states retrieved")
|
||||
words.append(_get_word_out_of_states(new_states))
|
||||
i += 1
|
||||
|
||||
words = [word for word in words if word not in
|
||||
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
|
||||
|
||||
return words
|
||||
|
||||
|
||||
def _generate_longish_sentence(context, topics=None, min_words=4, max_words=30):
|
||||
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
|
||||
|
||||
tries = 0
|
||||
while tries < 5:
|
||||
sent = _generate_sentence(context, topics=topics, max_words=max_words)
|
||||
if len(sent) >= min_words:
|
||||
return sent
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to just give up
|
||||
return _generate_sentence(context)
|
||||
|
||||
|
||||
def _generate_line(context, topics=None, min_words=15, max_words=30, max_sentences=3):
|
||||
"""String multiple sentences together into a coherent sentence."""
|
||||
|
||||
tries = 0
|
||||
sentences = 0
|
||||
line = []
|
||||
while tries < 5:
|
||||
line += _generate_longish_sentence(context, topics=topics, max_words=max_words)
|
||||
sentences += 1
|
||||
if sentences >= max_sentences:
|
||||
return line
|
||||
if len(line) >= min_words:
|
||||
return line
|
||||
else:
|
||||
if line[-1][-1] not in [',', '.', '!']:
|
||||
line[-1] += random.choice([',', '.', '!'])
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to give up
|
||||
return line
|
||||
|
||||
|
||||
def _get_word_out_of_states(states, backwards=False):
|
||||
"""Pick one random word out of the given states."""
|
||||
|
||||
new_word = ''
|
||||
running = 0
|
||||
count_sum = states.aggregate(Sum('count'))['count__sum']
|
||||
hit = random.randint(0, count_sum)
|
||||
|
||||
log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit))
|
||||
|
||||
states_itr = states.iterator()
|
||||
for state in states_itr:
|
||||
running += state.count
|
||||
if running >= hit:
|
||||
if backwards:
|
||||
new_word = state.k2
|
||||
else:
|
||||
new_word = state.v
|
||||
|
||||
break
|
||||
|
||||
log.debug(u"found '{0:s}'".format(new_word))
|
||||
return new_word
|
||||
|
||||
|
||||
def _learn_line(line, context):
|
||||
"""Create a bunch of MarkovStates for a given line of text."""
|
||||
|
||||
log.debug(u"learning {0:.40s}...".format(line))
|
||||
|
||||
words = line.split()
|
||||
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
|
||||
|
||||
for word in words:
|
||||
if len(word) > MarkovState._meta.get_field('k1').max_length:
|
||||
return
|
||||
|
||||
for i, word in enumerate(words):
|
||||
log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
|
||||
state, created = MarkovState.objects.get_or_create(context=context,
|
||||
k1=words[i],
|
||||
k2=words[i+1],
|
||||
v=words[i+2])
|
||||
state.count += 1
|
||||
state.save()
|
||||
|
||||
if i > len(words) - 4:
|
||||
break
|
||||
|
@ -25,8 +25,8 @@ import time
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import markov.lib as markovlib
|
||||
from markov.models import MarkovContext, MarkovState, MarkovTarget
|
||||
from markov.views import _generate_line, _learn_line
|
||||
|
||||
from extlib import irclib
|
||||
|
||||
@ -66,10 +66,6 @@ class Markov(Module):
|
||||
self.next_chatter_check = 0
|
||||
thread.start_new_thread(self.thread_do, ())
|
||||
|
||||
# TODO: bring this back somehow
|
||||
#irc.xmlrpc_register_function(self._generate_line,
|
||||
# "markov_generate_line")
|
||||
|
||||
def register_handlers(self):
|
||||
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
|
||||
|
||||
@ -111,8 +107,8 @@ class Markov(Module):
|
||||
return
|
||||
|
||||
if not event._recursing:
|
||||
context = _get_or_create_target_context(target)
|
||||
_learn_line(what, context)
|
||||
context = markovlib.get_or_create_target_context(target)
|
||||
markovlib.learn_line(what, context)
|
||||
|
||||
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
||||
"""Handle commands and inputs."""
|
||||
@ -129,7 +125,7 @@ class Markov(Module):
|
||||
if not self.shut_up:
|
||||
# not a command, so see if i'm being mentioned
|
||||
if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None:
|
||||
context = _get_or_create_target_context(target)
|
||||
context = markovlib.get_or_create_target_context(target)
|
||||
|
||||
addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)'
|
||||
addressed_re = re.compile(addressed_pattern)
|
||||
@ -140,15 +136,15 @@ class Markov(Module):
|
||||
|
||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||
return self.irc.reply(event, u"{0:s}: {1:s}".format(nick,
|
||||
u" ".join(_generate_line(context, topics=topics, max_sentences=1))))
|
||||
u" ".join(markovlib.generate_line(context, topics=topics, max_sentences=1))))
|
||||
else:
|
||||
# i wasn't addressed directly, so just respond
|
||||
topics = [x for x in what.split(' ') if len(x) >= 3]
|
||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||
|
||||
return self.irc.reply(event, u"{0:s}".format(u" ".join(_generate_line(context,
|
||||
topics=topics,
|
||||
max_sentences=1))))
|
||||
return self.irc.reply(event, u"{0:s}".format(u" ".join(markovlib.generate_line(context,
|
||||
topics=topics,
|
||||
max_sentences=1))))
|
||||
|
||||
def markov_learn(self, event, nick, userhost, what, admin_unlocked):
|
||||
"""Learn one line, as provided to the command."""
|
||||
@ -161,8 +157,8 @@ class Markov(Module):
|
||||
match = self.learnre.search(what)
|
||||
if match:
|
||||
line = match.group(1)
|
||||
context = _get_or_create_target_context(target)
|
||||
_learn_line(line, context)
|
||||
context = markovlib.get_or_create_target_context(target)
|
||||
markovlib.learn_line(line, context)
|
||||
|
||||
# return what was learned, for weird chaining purposes
|
||||
return line
|
||||
@ -179,7 +175,7 @@ class Markov(Module):
|
||||
if match:
|
||||
min_size = 15
|
||||
max_size = 30
|
||||
context = _get_or_create_target_context(target)
|
||||
context = markovlib.get_or_create_target_context(target)
|
||||
|
||||
if match.group(2):
|
||||
min_size = int(match.group(2))
|
||||
@ -191,14 +187,14 @@ class Markov(Module):
|
||||
topics = [x for x in line.split(' ') if len(x) >= 3]
|
||||
|
||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||
return u" ".join(_generate_line(context, topics=topics,
|
||||
min_words=min_size, max_words=max_size,
|
||||
max_sentences=1))
|
||||
return u" ".join(markovlib.generate_line(context, topics=topics,
|
||||
min_words=min_size, max_words=max_size,
|
||||
max_sentences=1))
|
||||
else:
|
||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||
return u" ".join(_generate_line(context, min_words=min_size,
|
||||
max_words=max_size,
|
||||
max_sentences=1))
|
||||
return u" ".join(markovlib.generate_line(context, min_words=min_size,
|
||||
max_words=max_size,
|
||||
max_sentences=1))
|
||||
|
||||
def thread_do(self):
|
||||
"""Do various things."""
|
||||
@ -237,35 +233,3 @@ class Markov(Module):
|
||||
for t in targets:
|
||||
self.sendmsg(t['target'],
|
||||
'shutting up for 30 seconds due to last 30 seconds of activity')
|
||||
|
||||
def _get_or_create_target_context(target_name):
|
||||
"""Return the context for a provided nick/channel, creating missing ones."""
|
||||
|
||||
# find the stuff, or create it
|
||||
try:
|
||||
target = MarkovTarget.objects.get(name=target_name)
|
||||
return target.context
|
||||
except MarkovContext.DoesNotExist:
|
||||
# make a context
|
||||
context = MarkovContext()
|
||||
context.name = target_name
|
||||
context.save()
|
||||
|
||||
target.context = context
|
||||
target.save()
|
||||
|
||||
return target.context
|
||||
except MarkovTarget.DoesNotExist:
|
||||
# first we need to make a context for this
|
||||
context = MarkovContext()
|
||||
context.name = target_name
|
||||
context.save()
|
||||
|
||||
target = MarkovTarget()
|
||||
target.name = target_name
|
||||
target.context = context
|
||||
target.save()
|
||||
|
||||
return target.context
|
||||
|
||||
# vi:tabstop=4:expandtab:autoindent
|
||||
|
Loading…
Reference in New Issue
Block a user