From 77e52acc7518169a723dc18790632c7251d43b7c Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 10:52:29 -0500 Subject: [PATCH] Markov: first cut of markov in django this is just a basic port of the tables into django models right now. there's some serious slowness in the state creation that i need to fix before this does anything, but i want to get this in a real database on a real linode before i go too much further, so here it is --- dr_botzo/settings.py | 1 + dr_botzo/urls.py | 1 + markov/__init__.py | 0 markov/admin.py | 9 +++ markov/forms.py | 24 ++++++ ...001_markov_context_and_target_and_state.py | 80 ++++++++++++++++++ markov/migrations/__init__.py | 0 markov/models.py | 59 ++++++++++++++ markov/urls.py | 13 +++ markov/views.py | 81 +++++++++++++++++++ templates/markov/import_file.html | 16 ++++ 11 files changed, 284 insertions(+) create mode 100644 markov/__init__.py create mode 100644 markov/admin.py create mode 100644 markov/forms.py create mode 100644 markov/migrations/0001_markov_context_and_target_and_state.py create mode 100644 markov/migrations/__init__.py create mode 100644 markov/models.py create mode 100644 markov/urls.py create mode 100644 markov/views.py create mode 100644 templates/markov/import_file.html diff --git a/dr_botzo/settings.py b/dr_botzo/settings.py index c1e9f17..5f6f495 100644 --- a/dr_botzo/settings.py +++ b/dr_botzo/settings.py @@ -38,6 +38,7 @@ INSTALLED_APPS = ( 'django.contrib.staticfiles', 'django_extensions', 'south', + 'markov', 'races', 'seen', ) diff --git a/dr_botzo/urls.py b/dr_botzo/urls.py index c39a3b2..622ff6b 100644 --- a/dr_botzo/urls.py +++ b/dr_botzo/urls.py @@ -6,6 +6,7 @@ admin.autodiscover() urlpatterns = patterns('', url(r'^$', 'dr_botzo.views.home', name='home'), + url(r'^markov/', include('markov.urls')), url(r'^races/', include('races.urls')), url(r'^admin/', include(admin.site.urls)), diff --git a/markov/__init__.py b/markov/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markov/admin.py b/markov/admin.py new file mode 100644 index 0000000..d822365 --- /dev/null +++ b/markov/admin.py @@ -0,0 +1,9 @@ +from django.contrib import admin + +from markov.models import MarkovContext, MarkovTarget, MarkovState + +admin.site.register(MarkovContext) +admin.site.register(MarkovTarget) +admin.site.register(MarkovState) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/forms.py b/markov/forms.py new file mode 100644 index 0000000..6aca9b9 --- /dev/null +++ b/markov/forms.py @@ -0,0 +1,24 @@ +""" +markov/forms.py --- forms for manipulating markov data + +""" + +import logging + +from django.forms import Form, CharField, FileField, ModelChoiceField + +from markov.models import MarkovContext + +log = logging.getLogger('dr_botzo.markov') + + +class LogUploadForm(Form): + + """Accept a file upload that will be imported into Markov stuff.""" + + log_file = FileField(help_text="Weechat log format.") + context = ModelChoiceField(queryset=MarkovContext.objects.all()) + ignore = CharField(help_text="Comma-separated list of nicks to ignore.", + required=False) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/migrations/0001_markov_context_and_target_and_state.py b/markov/migrations/0001_markov_context_and_target_and_state.py new file mode 100644 index 0000000..80b8314 --- /dev/null +++ b/markov/migrations/0001_markov_context_and_target_and_state.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'MarkovContext' + db.create_table(u'markov_markovcontext', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('name', self.gf('django.db.models.fields.CharField')(max_length=32)), + )) + db.send_create_signal(u'markov', ['MarkovContext']) + + # Adding model 'MarkovTarget' + db.create_table(u'markov_markovtarget', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('target', self.gf('django.db.models.fields.CharField')(max_length=64)), + ('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])), + ('chatter_chance', self.gf('django.db.models.fields.IntegerField')(default=0)), + )) + db.send_create_signal(u'markov', ['MarkovTarget']) + + # Adding model 'MarkovState' + db.create_table(u'markov_markovstate', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('k1', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('k2', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('v', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('count', self.gf('django.db.models.fields.IntegerField')(default=0)), + ('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])), + )) + db.send_create_signal(u'markov', ['MarkovState']) + + # Adding unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v'] + db.create_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v']) + + + def backwards(self, orm): + # Removing unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v'] + db.delete_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v']) + + # Deleting model 'MarkovContext' + db.delete_table(u'markov_markovcontext') + + # Deleting model 'MarkovTarget' + db.delete_table(u'markov_markovtarget') + + # Deleting model 'MarkovState' + db.delete_table(u'markov_markovstate') + + + models = { + u'markov.markovcontext': { + 'Meta': {'object_name': 'MarkovContext'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '32'}) + }, + u'markov.markovstate': { + 'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'}, + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + 'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'v': ('django.db.models.fields.CharField', [], {'max_length': '128'}) + }, + u'markov.markovtarget': { + 'Meta': {'object_name': 'MarkovTarget'}, + 'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'target': ('django.db.models.fields.CharField', [], {'max_length': '64'}) + } + } + + complete_apps = ['markov'] \ No newline at end of file diff --git a/markov/migrations/__init__.py b/markov/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markov/models.py b/markov/models.py new file mode 100644 index 0000000..8a568f6 --- /dev/null +++ b/markov/models.py @@ -0,0 +1,59 @@ +""" +markov/models.py --- save brain pieces for chaining + +""" + +import logging + +from django.db import models + + +log = logging.getLogger('dr_botzo.markov') + + +class MarkovContext(models.Model): + + """Define contexts for Markov chains.""" + + name = models.CharField(max_length=32) + + def __unicode__(self): + """String representation.""" + + return u"{0:s}".format(self.name) + + +class MarkovTarget(models.Model): + + """Define IRC targets that relate to a context, and can occasionally be talked to.""" + + target = models.CharField(max_length=64) + context = models.ForeignKey(MarkovContext) + + chatter_chance = models.IntegerField(default=0) + + +class MarkovState(models.Model): + + """One element in a Markov chain, some text or something.""" + + _start1 = '__start1' + _start2 = '__start2' + _stop = '__stop' + + k1 = models.CharField(max_length=128) + k2 = models.CharField(max_length=128) + v = models.CharField(max_length=128) + + count = models.IntegerField(default=0) + context = models.ForeignKey(MarkovContext) + + class Meta: + unique_together = ('context', 'k1', 'k2', 'v') + + def __unicode__(self): + """String representation.""" + + return u"{0:s},{1:s} -> {2:s} (count: {3:d})".format(self.k1, self.k2, self.v, self.count) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/urls.py b/markov/urls.py new file mode 100644 index 0000000..39fcab8 --- /dev/null +++ b/markov/urls.py @@ -0,0 +1,13 @@ +""" +markov/urls.py --- url patterns for markov stuff + +""" + +from django.conf.urls import patterns, url + +urlpatterns = patterns('markov.views', + url(r'^$', 'index', name='markov_index'), + url(r'^import/$', 'import_file', name='markov_import_file'), +) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/views.py b/markov/views.py new file mode 100644 index 0000000..c25df84 --- /dev/null +++ b/markov/views.py @@ -0,0 +1,81 @@ +""" +markov/views.py --- manipulate markov data + +""" + +import logging + +from django.http import HttpResponse +from django.shortcuts import render + +from markov.forms import LogUploadForm +from markov.models import MarkovContext, MarkovTarget, MarkovState + + +log = logging.getLogger('dr_botzo.markov') + + +def index(request): + """Display nothing, for the moment.""" + + return HttpResponse() + + +def import_file(request): + """Accept a file upload and turn it into markov stuff. + + Current file formats supported: + * weechat + + """ + + if request.method == 'POST': + form = LogUploadForm(request.POST, request.FILES) + if form.is_valid(): + log_file = request.FILES['log_file'] + context = form.cleaned_data['context'] + ignores = form.cleaned_data['ignore'].split(',') + + whos = [] + for line in log_file: + (timestamp, who, what) = line.split('\t') + + if who in ('-->', '<--', '--', ' *'): + continue + + if who in ignores: + continue + + whos.append(who) + + # this is a line we probably care about now + _learn_line(what.rstrip(), context) + + log.debug(set(whos)) + else: + form = LogUploadForm() + + return render(request, 'markov/import_file.html', {'form': form}) + + +def _learn_line(line, context): + """Create a bunch of MarkovStates for a given line of text.""" + + log.debug("learning {0:.40s}...".format(line)) + + words = line.split() + words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop] + + for i, word in enumerate(words): + log.debug("{0:s},{1:s} -> {2:s}".format(words[i], words[i+1], words[i+2])) + state, created = MarkovState.objects.get_or_create(context=context, + k1=words[i], + k2=words[i+1], + v=words[i+2]) + state.count += 1 + state.save() + + if i > len(words) - 4: + break + +# vi:tabstop=4:expandtab:autoindent diff --git a/templates/markov/import_file.html b/templates/markov/import_file.html new file mode 100644 index 0000000..4d07627 --- /dev/null +++ b/templates/markov/import_file.html @@ -0,0 +1,16 @@ +{% extends 'base.html' %} + +{% block title %}markov import{% endblock %} + +{% block content %} +
+ {% csrf_token %} + + {{ form }} +
+ +
+{% endblock %} +