219 lines
6.8 KiB
Python
219 lines
6.8 KiB
Python
"""
|
|
markov/views.py --- manipulate markov data
|
|
|
|
"""
|
|
|
|
import logging
|
|
import random
|
|
import time
|
|
|
|
from django.contrib.auth.decorators import permission_required
|
|
from django.db.models import Sum
|
|
from django.http import HttpResponse
|
|
from django.shortcuts import get_object_or_404, render
|
|
|
|
from markov.forms import LogUploadForm, TeachLineForm
|
|
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
|
|
|
|
|
log = logging.getLogger('dr_botzo.markov')
|
|
|
|
|
|
def index(request):
|
|
"""Display nothing, for the moment."""
|
|
|
|
return HttpResponse()
|
|
|
|
|
|
def context_index(request, context_id):
|
|
"""Display the context index for the given context."""
|
|
|
|
start_t = time.time()
|
|
context = get_object_or_404(MarkovContext, pk=context_id)
|
|
chain = ' '.join(_generate_sentence(context))
|
|
end_t = time.time()
|
|
|
|
return render(request, 'markov/context.html', {'chain': chain,
|
|
'context': context,
|
|
'elapsed': end_t - start_t})
|
|
|
|
|
|
@permission_required('import_log_file', raise_exception=True)
|
|
def import_file(request):
|
|
"""Accept a file upload and turn it into markov stuff.
|
|
|
|
Current file formats supported:
|
|
* weechat
|
|
|
|
"""
|
|
|
|
if request.method == 'POST':
|
|
form = LogUploadForm(request.POST, request.FILES)
|
|
if form.is_valid():
|
|
log_file = request.FILES['log_file']
|
|
context = form.cleaned_data['context']
|
|
ignores = form.cleaned_data['ignore_nicks'].split(',')
|
|
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
|
|
|
whos = []
|
|
for line in log_file:
|
|
(timestamp, who, what) = line.decode('utf-8').split('\t', 2)
|
|
|
|
if who in ('-->', '<--', '--', ' *'):
|
|
continue
|
|
|
|
if who in ignores:
|
|
continue
|
|
|
|
whos.append(who)
|
|
|
|
# this is a line we probably care about now
|
|
what = [x for x in what.rstrip().split(' ') if x not in strips]
|
|
_learn_line(' '.join(what), context)
|
|
|
|
log.debug(set(whos))
|
|
else:
|
|
form = LogUploadForm()
|
|
|
|
return render(request, 'markov/import_file.html', {'form': form})
|
|
|
|
|
|
@permission_required('teach_line', raise_exception=True)
|
|
def teach_line(request):
|
|
"""Teach one line directly."""
|
|
|
|
if request.method == 'POST':
|
|
form = TeachLineForm(request.POST)
|
|
if form.is_valid():
|
|
line = form.cleaned_data['line']
|
|
context = form.cleaned_data['context']
|
|
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
|
what = [x for x in line.rstrip().split(' ') if x not in strips]
|
|
_learn_line(' '.join(what), context)
|
|
else:
|
|
form = TeachLineForm()
|
|
|
|
return render(request, 'markov/teach_line.html', {'form': form})
|
|
|
|
|
|
def _generate_line(context, topics=None, max_words=30):
|
|
"""Generate a Markov chain."""
|
|
|
|
words = []
|
|
# if we have topics, try to work from it and work backwards
|
|
if topics:
|
|
topic_word = random.choice(topics)
|
|
topics.remove(topic_word)
|
|
log.debug(u"looking for topic '{0:s}'".format(topic_word))
|
|
new_states = MarkovState.objects.filter(context=context, v=topic_word)
|
|
|
|
if len(new_states) > 0:
|
|
log.debug(u"found '{0:s}', starting backwards".format(topic_word))
|
|
words.insert(0, topic_word)
|
|
while len(words) <= max_words and words[0] != MarkovState._start2:
|
|
log.debug(u"looking backwards for '{0:s}'".format(words[0]))
|
|
new_states = MarkovState.objects.filter(context=context, v=words[0])
|
|
words.insert(0, _get_word_out_of_states(new_states, backwards=True))
|
|
|
|
# if we didn't get topic stuff, we need to start (forwards) here
|
|
if len(words) == 0:
|
|
words = [MarkovState._start1, MarkovState._start2]
|
|
|
|
i = len(words)
|
|
while len(words) <= max_words and words[-1] != MarkovState._stop:
|
|
log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
|
|
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
|
|
log.debug(u"states retrieved")
|
|
words.append(_get_word_out_of_states(new_states))
|
|
i += 1
|
|
|
|
words = [word for word in words if word not in
|
|
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
|
|
|
|
return words
|
|
|
|
|
|
def _generate_longish_line(context, topics=None, min_words=4, max_words=30):
|
|
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
|
|
|
|
tries = 0
|
|
while tries < 5:
|
|
line = _generate_line(context, topics=topics, max_words=max_words)
|
|
if len(line) >= min_words:
|
|
return line
|
|
|
|
tries += 1
|
|
|
|
# if we got here, we need to just give up
|
|
return _generate_line(context)
|
|
|
|
|
|
def _generate_sentence(context, topics=None, min_words=15, max_words=30):
|
|
"""String multiple lines together into a coherent sentence."""
|
|
|
|
tries = 0
|
|
sentence = []
|
|
while tries < 5:
|
|
sentence += _generate_longish_line(context, topics=topics, max_words=max_words)
|
|
if len(sentence) >= min_words:
|
|
return sentence
|
|
else:
|
|
sentence[-1] += random.choice([',', '.', '!'])
|
|
|
|
tries += 1
|
|
|
|
# if we got here, we need to give up
|
|
return sentence
|
|
|
|
|
|
def _get_word_out_of_states(states, backwards=False):
|
|
"""Pick one random word out of the given states."""
|
|
|
|
new_word = ''
|
|
running = 0
|
|
count_sum = states.aggregate(Sum('count'))['count__sum']
|
|
hit = random.randint(0, count_sum)
|
|
|
|
log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit))
|
|
|
|
states_itr = states.iterator()
|
|
for state in states_itr:
|
|
running += state.count
|
|
if running >= hit:
|
|
if backwards:
|
|
new_word = state.k2
|
|
else:
|
|
new_word = state.v
|
|
|
|
break
|
|
|
|
log.debug(u"found '{0:s}'".format(new_word))
|
|
return new_word
|
|
|
|
|
|
def _learn_line(line, context):
|
|
"""Create a bunch of MarkovStates for a given line of text."""
|
|
|
|
log.debug(u"learning {0:.40s}...".format(line))
|
|
|
|
words = line.split()
|
|
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
|
|
|
|
for word in words:
|
|
if len(word) > MarkovState._meta.get_field('k1').max_length:
|
|
return
|
|
|
|
for i, word in enumerate(words):
|
|
log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
|
|
state, created = MarkovState.objects.get_or_create(context=context,
|
|
k1=words[i],
|
|
k2=words[i+1],
|
|
v=words[i+2])
|
|
state.count += 1
|
|
state.save()
|
|
|
|
if i > len(words) - 4:
|
|
break
|
|
|
|
# vi:tabstop=4:expandtab:autoindent
|