Markov: generate markov chains into sentences

some view stuff to get at it through django while we're here
2014-04-05 14:26:06 -05:00 · 2014-04-05 14:26:06 -05:00 · 9c08a203c7
commit 9c08a203c7
parent ad2044a178
3 changed files with 125 additions and 1 deletions
--- a/markov/urls.py
+++ b/markov/urls.py
@ -7,6 +7,7 @@ from django.conf.urls import patterns, url

 urlpatterns = patterns('markov.views',
    url(r'^$', 'index', name='markov_index'),
+    url(r'^context/(?P<context_id>\d+)/$', 'context_index', name='markov_context_index'),
    url(r'^import/$', 'import_file', name='markov_import_file'),
 )

--- a/markov/views.py
+++ b/markov/views.py
@ -4,10 +4,12 @@ markov/views.py --- manipulate markov data
 """

 import logging
+import random
+import time

 from django.contrib.auth.decorators import permission_required
 from django.http import HttpResponse
-from django.shortcuts import render
+from django.shortcuts import get_object_or_404, render

 from markov.forms import LogUploadForm
 from markov.models import MarkovContext, MarkovTarget, MarkovState
@ -22,6 +24,19 @@ def index(request):
    return HttpResponse()


+def context_index(request, context_id):
+    """Display the context index for the given context."""
+
+    start_t = time.time()
+    context = get_object_or_404(MarkovContext, pk=context_id)
+    chain = ' '.join(_generate_sentence(context))
+    end_t = time.time()
+
+    return render(request, 'markov/context.html', {'chain': chain,
+                                                   'context': context,
+                                                   'elapsed': end_t - start_t})
+
+
@permission_required('import_log_file', raise_exception=True)
 def import_file(request):
    """Accept a file upload and turn it into markov stuff.
@ -60,6 +75,103 @@ def import_file(request):
    return render(request, 'markov/import_file.html', {'form': form})


+def _generate_line(context, topics=None, max_words=30):
+    """Generate a Markov chain."""
+
+    words = []
+    # if we have topics, try to work from it and work backwards
+    if topics:
+        topic_word = random.choice(topics)
+        topics.remove(topic_word)
+        log.debug(u"looking for topic '{0:s}'".format(topic_word))
+        new_states = MarkovState.objects.filter(context=context, v=topic_word)
+
+        if len(new_states) > 0:
+            log.debug(u"found '{0:s}', starting backwards".format(topic_word))
+            words.insert(0, topic_word)
+            while len(words) <= max_words and words[0] != MarkovState._start2:
+                log.debug(u"looking backwards for '{0:s}'".format(words[0]))
+                new_states = MarkovState.objects.filter(context=context, v=words[0])
+                words.insert(0, _get_word_out_of_states(new_states, backwards=True))
+
+    # if we didn't get topic stuff, we need to start (forwards) here
+    if len(words) == 0:
+        words = [MarkovState._start1, MarkovState._start2]
+
+    i = len(words)
+    while len(words) <= max_words and words[-1] != MarkovState._stop:
+        log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
+        new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
+        words.append(_get_word_out_of_states(new_states))
+        i += 1
+
+    words = [word for word in words if word not in
+             (MarkovState._start1, MarkovState._start2, MarkovState._stop)]
+
+    return words
+
+
+def _generate_longish_line(context, topics=None, min_words=4, max_words=30):
+    """Generate a Markov chain, but throw away the short ones unless we get desperate."""
+
+    tries = 0
+    while tries < 5:
+        line = _generate_line(context, topics=topics, max_words=max_words)
+        if len(line) >= min_words:
+            return line
+
+        tries += 1
+
+    # if we got here, we need to just give up
+    return _generate_line(context)
+
+
+def _generate_sentence(context, topics=None, min_words=15, max_words=30):
+    """String multiple lines together into a coherent sentence."""
+
+    tries = 0
+    sentence = []
+    while tries < 5:
+        sentence += _generate_longish_line(context, topics=topics, max_words=max_words)
+        if len(sentence) >= min_words:
+            return sentence
+        else:
+            sentence[-1] += random.choice([',', '.', '!'])
+
+        tries += 1
+
+    # if we got here, we need to give up
+    return sentence
+
+
+def _get_word_out_of_states(states, backwards=False):
+    """Pick one random word out of the given states."""
+
+    running = 0
+    weighted_words = []
+    for state in states:
+        running += state.count
+        if backwards:
+            weighted_words.append((running, state.k2))
+        else:
+            weighted_words.append((running, state.v))
+
+    log.debug(u"{0:s}".format(weighted_words))
+
+    hit = random.randint(0, running)
+    log.debug(u"hit: {0:d}".format(hit))
+
+    new_word = ''
+    for weight, word in weighted_words:
+        new_word = word
+
+        if weight >= hit:
+            break
+
+    log.debug(u"found '{0:s}'".format(new_word))
+    return new_word
+
+
 def _learn_line(line, context):
    """Create a bunch of MarkovStates for a given line of text."""

--- a/templates/markov/context.html
+++ b/templates/markov/context.html
@ -0,0 +1,11 @@
+{% extends 'base.html' %}
+
+{% block title %}context: {{ context.name }}{% endblock %}
+
+{% block content %}
+<p>{{ chain }}</p>
+<p>in: {{ elapsed }}s</p>
+{% endblock %}
+<!--
+    vi:tabstop=4:expandtab:autoindent
+ -->