Markov: generate markov chains into sentences
some view stuff to get at it through django while we're here
This commit is contained in:
parent
ad2044a178
commit
9c08a203c7
@ -7,6 +7,7 @@ from django.conf.urls import patterns, url
|
||||
|
||||
urlpatterns = patterns('markov.views',
|
||||
url(r'^$', 'index', name='markov_index'),
|
||||
url(r'^context/(?P<context_id>\d+)/$', 'context_index', name='markov_context_index'),
|
||||
url(r'^import/$', 'import_file', name='markov_import_file'),
|
||||
)
|
||||
|
||||
|
114
markov/views.py
114
markov/views.py
@ -4,10 +4,12 @@ markov/views.py --- manipulate markov data
|
||||
"""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
from django.contrib.auth.decorators import permission_required
|
||||
from django.http import HttpResponse
|
||||
from django.shortcuts import render
|
||||
from django.shortcuts import get_object_or_404, render
|
||||
|
||||
from markov.forms import LogUploadForm
|
||||
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
||||
@ -22,6 +24,19 @@ def index(request):
|
||||
return HttpResponse()
|
||||
|
||||
|
||||
def context_index(request, context_id):
|
||||
"""Display the context index for the given context."""
|
||||
|
||||
start_t = time.time()
|
||||
context = get_object_or_404(MarkovContext, pk=context_id)
|
||||
chain = ' '.join(_generate_sentence(context))
|
||||
end_t = time.time()
|
||||
|
||||
return render(request, 'markov/context.html', {'chain': chain,
|
||||
'context': context,
|
||||
'elapsed': end_t - start_t})
|
||||
|
||||
|
||||
@permission_required('import_log_file', raise_exception=True)
|
||||
def import_file(request):
|
||||
"""Accept a file upload and turn it into markov stuff.
|
||||
@ -60,6 +75,103 @@ def import_file(request):
|
||||
return render(request, 'markov/import_file.html', {'form': form})
|
||||
|
||||
|
||||
def _generate_line(context, topics=None, max_words=30):
|
||||
"""Generate a Markov chain."""
|
||||
|
||||
words = []
|
||||
# if we have topics, try to work from it and work backwards
|
||||
if topics:
|
||||
topic_word = random.choice(topics)
|
||||
topics.remove(topic_word)
|
||||
log.debug(u"looking for topic '{0:s}'".format(topic_word))
|
||||
new_states = MarkovState.objects.filter(context=context, v=topic_word)
|
||||
|
||||
if len(new_states) > 0:
|
||||
log.debug(u"found '{0:s}', starting backwards".format(topic_word))
|
||||
words.insert(0, topic_word)
|
||||
while len(words) <= max_words and words[0] != MarkovState._start2:
|
||||
log.debug(u"looking backwards for '{0:s}'".format(words[0]))
|
||||
new_states = MarkovState.objects.filter(context=context, v=words[0])
|
||||
words.insert(0, _get_word_out_of_states(new_states, backwards=True))
|
||||
|
||||
# if we didn't get topic stuff, we need to start (forwards) here
|
||||
if len(words) == 0:
|
||||
words = [MarkovState._start1, MarkovState._start2]
|
||||
|
||||
i = len(words)
|
||||
while len(words) <= max_words and words[-1] != MarkovState._stop:
|
||||
log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
|
||||
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
|
||||
words.append(_get_word_out_of_states(new_states))
|
||||
i += 1
|
||||
|
||||
words = [word for word in words if word not in
|
||||
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
|
||||
|
||||
return words
|
||||
|
||||
|
||||
def _generate_longish_line(context, topics=None, min_words=4, max_words=30):
|
||||
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
|
||||
|
||||
tries = 0
|
||||
while tries < 5:
|
||||
line = _generate_line(context, topics=topics, max_words=max_words)
|
||||
if len(line) >= min_words:
|
||||
return line
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to just give up
|
||||
return _generate_line(context)
|
||||
|
||||
|
||||
def _generate_sentence(context, topics=None, min_words=15, max_words=30):
|
||||
"""String multiple lines together into a coherent sentence."""
|
||||
|
||||
tries = 0
|
||||
sentence = []
|
||||
while tries < 5:
|
||||
sentence += _generate_longish_line(context, topics=topics, max_words=max_words)
|
||||
if len(sentence) >= min_words:
|
||||
return sentence
|
||||
else:
|
||||
sentence[-1] += random.choice([',', '.', '!'])
|
||||
|
||||
tries += 1
|
||||
|
||||
# if we got here, we need to give up
|
||||
return sentence
|
||||
|
||||
|
||||
def _get_word_out_of_states(states, backwards=False):
|
||||
"""Pick one random word out of the given states."""
|
||||
|
||||
running = 0
|
||||
weighted_words = []
|
||||
for state in states:
|
||||
running += state.count
|
||||
if backwards:
|
||||
weighted_words.append((running, state.k2))
|
||||
else:
|
||||
weighted_words.append((running, state.v))
|
||||
|
||||
log.debug(u"{0:s}".format(weighted_words))
|
||||
|
||||
hit = random.randint(0, running)
|
||||
log.debug(u"hit: {0:d}".format(hit))
|
||||
|
||||
new_word = ''
|
||||
for weight, word in weighted_words:
|
||||
new_word = word
|
||||
|
||||
if weight >= hit:
|
||||
break
|
||||
|
||||
log.debug(u"found '{0:s}'".format(new_word))
|
||||
return new_word
|
||||
|
||||
|
||||
def _learn_line(line, context):
|
||||
"""Create a bunch of MarkovStates for a given line of text."""
|
||||
|
||||
|
11
templates/markov/context.html
Normal file
11
templates/markov/context.html
Normal file
@ -0,0 +1,11 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block title %}context: {{ context.name }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<p>{{ chain }}</p>
|
||||
<p>in: {{ elapsed }}s</p>
|
||||
{% endblock %}
|
||||
<!--
|
||||
vi:tabstop=4:expandtab:autoindent
|
||||
-->
|
Loading…
Reference in New Issue
Block a user