From 9c08a203c77f93918d95cdf279ce99c1cb8cc711 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 14:26:06 -0500 Subject: [PATCH] Markov: generate markov chains into sentences some view stuff to get at it through django while we're here --- markov/urls.py | 1 + markov/views.py | 114 +++++++++++++++++++++++++++++++++- templates/markov/context.html | 11 ++++ 3 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 templates/markov/context.html diff --git a/markov/urls.py b/markov/urls.py index 39fcab8..7d3ad75 100644 --- a/markov/urls.py +++ b/markov/urls.py @@ -7,6 +7,7 @@ from django.conf.urls import patterns, url urlpatterns = patterns('markov.views', url(r'^$', 'index', name='markov_index'), + url(r'^context/(?P\d+)/$', 'context_index', name='markov_context_index'), url(r'^import/$', 'import_file', name='markov_import_file'), ) diff --git a/markov/views.py b/markov/views.py index 3b1e57c..da442fc 100644 --- a/markov/views.py +++ b/markov/views.py @@ -4,10 +4,12 @@ markov/views.py --- manipulate markov data """ import logging +import random +import time from django.contrib.auth.decorators import permission_required from django.http import HttpResponse -from django.shortcuts import render +from django.shortcuts import get_object_or_404, render from markov.forms import LogUploadForm from markov.models import MarkovContext, MarkovTarget, MarkovState @@ -22,6 +24,19 @@ def index(request): return HttpResponse() +def context_index(request, context_id): + """Display the context index for the given context.""" + + start_t = time.time() + context = get_object_or_404(MarkovContext, pk=context_id) + chain = ' '.join(_generate_sentence(context)) + end_t = time.time() + + return render(request, 'markov/context.html', {'chain': chain, + 'context': context, + 'elapsed': end_t - start_t}) + + @permission_required('import_log_file', raise_exception=True) def import_file(request): """Accept a file upload and turn it into markov stuff. @@ -60,6 +75,103 @@ def import_file(request): return render(request, 'markov/import_file.html', {'form': form}) +def _generate_line(context, topics=None, max_words=30): + """Generate a Markov chain.""" + + words = [] + # if we have topics, try to work from it and work backwards + if topics: + topic_word = random.choice(topics) + topics.remove(topic_word) + log.debug(u"looking for topic '{0:s}'".format(topic_word)) + new_states = MarkovState.objects.filter(context=context, v=topic_word) + + if len(new_states) > 0: + log.debug(u"found '{0:s}', starting backwards".format(topic_word)) + words.insert(0, topic_word) + while len(words) <= max_words and words[0] != MarkovState._start2: + log.debug(u"looking backwards for '{0:s}'".format(words[0])) + new_states = MarkovState.objects.filter(context=context, v=words[0]) + words.insert(0, _get_word_out_of_states(new_states, backwards=True)) + + # if we didn't get topic stuff, we need to start (forwards) here + if len(words) == 0: + words = [MarkovState._start1, MarkovState._start2] + + i = len(words) + while len(words) <= max_words and words[-1] != MarkovState._stop: + log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1])) + new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1]) + words.append(_get_word_out_of_states(new_states)) + i += 1 + + words = [word for word in words if word not in + (MarkovState._start1, MarkovState._start2, MarkovState._stop)] + + return words + + +def _generate_longish_line(context, topics=None, min_words=4, max_words=30): + """Generate a Markov chain, but throw away the short ones unless we get desperate.""" + + tries = 0 + while tries < 5: + line = _generate_line(context, topics=topics, max_words=max_words) + if len(line) >= min_words: + return line + + tries += 1 + + # if we got here, we need to just give up + return _generate_line(context) + + +def _generate_sentence(context, topics=None, min_words=15, max_words=30): + """String multiple lines together into a coherent sentence.""" + + tries = 0 + sentence = [] + while tries < 5: + sentence += _generate_longish_line(context, topics=topics, max_words=max_words) + if len(sentence) >= min_words: + return sentence + else: + sentence[-1] += random.choice([',', '.', '!']) + + tries += 1 + + # if we got here, we need to give up + return sentence + + +def _get_word_out_of_states(states, backwards=False): + """Pick one random word out of the given states.""" + + running = 0 + weighted_words = [] + for state in states: + running += state.count + if backwards: + weighted_words.append((running, state.k2)) + else: + weighted_words.append((running, state.v)) + + log.debug(u"{0:s}".format(weighted_words)) + + hit = random.randint(0, running) + log.debug(u"hit: {0:d}".format(hit)) + + new_word = '' + for weight, word in weighted_words: + new_word = word + + if weight >= hit: + break + + log.debug(u"found '{0:s}'".format(new_word)) + return new_word + + def _learn_line(line, context): """Create a bunch of MarkovStates for a given line of text.""" diff --git a/templates/markov/context.html b/templates/markov/context.html new file mode 100644 index 0000000..5112050 --- /dev/null +++ b/templates/markov/context.html @@ -0,0 +1,11 @@ +{% extends 'base.html' %} + +{% block title %}context: {{ context.name }}{% endblock %} + +{% block content %} +

{{ chain }}

+

in: {{ elapsed }}s

+{% endblock %} +