Markov: first cut of markov in django
this is just a basic port of the tables into django models right now. there's some serious slowness in the state creation that i need to fix before this does anything, but i want to get this in a real database on a real linode before i go too much further, so here it is
This commit is contained in:
parent
197f9908e6
commit
77e52acc75
|
@ -38,6 +38,7 @@ INSTALLED_APPS = (
|
||||||
'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
'django_extensions',
|
'django_extensions',
|
||||||
'south',
|
'south',
|
||||||
|
'markov',
|
||||||
'races',
|
'races',
|
||||||
'seen',
|
'seen',
|
||||||
)
|
)
|
||||||
|
|
|
@ -6,6 +6,7 @@ admin.autodiscover()
|
||||||
urlpatterns = patterns('',
|
urlpatterns = patterns('',
|
||||||
url(r'^$', 'dr_botzo.views.home', name='home'),
|
url(r'^$', 'dr_botzo.views.home', name='home'),
|
||||||
|
|
||||||
|
url(r'^markov/', include('markov.urls')),
|
||||||
url(r'^races/', include('races.urls')),
|
url(r'^races/', include('races.urls')),
|
||||||
|
|
||||||
url(r'^admin/', include(admin.site.urls)),
|
url(r'^admin/', include(admin.site.urls)),
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
from django.contrib import admin
|
||||||
|
|
||||||
|
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
||||||
|
|
||||||
|
admin.site.register(MarkovContext)
|
||||||
|
admin.site.register(MarkovTarget)
|
||||||
|
admin.site.register(MarkovState)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,24 @@
|
||||||
|
"""
|
||||||
|
markov/forms.py --- forms for manipulating markov data
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.forms import Form, CharField, FileField, ModelChoiceField
|
||||||
|
|
||||||
|
from markov.models import MarkovContext
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.markov')
|
||||||
|
|
||||||
|
|
||||||
|
class LogUploadForm(Form):
|
||||||
|
|
||||||
|
"""Accept a file upload that will be imported into Markov stuff."""
|
||||||
|
|
||||||
|
log_file = FileField(help_text="Weechat log format.")
|
||||||
|
context = ModelChoiceField(queryset=MarkovContext.objects.all())
|
||||||
|
ignore = CharField(help_text="Comma-separated list of nicks to ignore.",
|
||||||
|
required=False)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,80 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from south.utils import datetime_utils as datetime
|
||||||
|
from south.db import db
|
||||||
|
from south.v2 import SchemaMigration
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(SchemaMigration):
|
||||||
|
|
||||||
|
def forwards(self, orm):
|
||||||
|
# Adding model 'MarkovContext'
|
||||||
|
db.create_table(u'markov_markovcontext', (
|
||||||
|
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||||
|
('name', self.gf('django.db.models.fields.CharField')(max_length=32)),
|
||||||
|
))
|
||||||
|
db.send_create_signal(u'markov', ['MarkovContext'])
|
||||||
|
|
||||||
|
# Adding model 'MarkovTarget'
|
||||||
|
db.create_table(u'markov_markovtarget', (
|
||||||
|
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||||
|
('target', self.gf('django.db.models.fields.CharField')(max_length=64)),
|
||||||
|
('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])),
|
||||||
|
('chatter_chance', self.gf('django.db.models.fields.IntegerField')(default=0)),
|
||||||
|
))
|
||||||
|
db.send_create_signal(u'markov', ['MarkovTarget'])
|
||||||
|
|
||||||
|
# Adding model 'MarkovState'
|
||||||
|
db.create_table(u'markov_markovstate', (
|
||||||
|
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||||
|
('k1', self.gf('django.db.models.fields.CharField')(max_length=128)),
|
||||||
|
('k2', self.gf('django.db.models.fields.CharField')(max_length=128)),
|
||||||
|
('v', self.gf('django.db.models.fields.CharField')(max_length=128)),
|
||||||
|
('count', self.gf('django.db.models.fields.IntegerField')(default=0)),
|
||||||
|
('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])),
|
||||||
|
))
|
||||||
|
db.send_create_signal(u'markov', ['MarkovState'])
|
||||||
|
|
||||||
|
# Adding unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v']
|
||||||
|
db.create_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v'])
|
||||||
|
|
||||||
|
|
||||||
|
def backwards(self, orm):
|
||||||
|
# Removing unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v']
|
||||||
|
db.delete_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v'])
|
||||||
|
|
||||||
|
# Deleting model 'MarkovContext'
|
||||||
|
db.delete_table(u'markov_markovcontext')
|
||||||
|
|
||||||
|
# Deleting model 'MarkovTarget'
|
||||||
|
db.delete_table(u'markov_markovtarget')
|
||||||
|
|
||||||
|
# Deleting model 'MarkovState'
|
||||||
|
db.delete_table(u'markov_markovstate')
|
||||||
|
|
||||||
|
|
||||||
|
models = {
|
||||||
|
u'markov.markovcontext': {
|
||||||
|
'Meta': {'object_name': 'MarkovContext'},
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'max_length': '32'})
|
||||||
|
},
|
||||||
|
u'markov.markovstate': {
|
||||||
|
'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'},
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'v': ('django.db.models.fields.CharField', [], {'max_length': '128'})
|
||||||
|
},
|
||||||
|
u'markov.markovtarget': {
|
||||||
|
'Meta': {'object_name': 'MarkovTarget'},
|
||||||
|
'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'target': ('django.db.models.fields.CharField', [], {'max_length': '64'})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
complete_apps = ['markov']
|
|
@ -0,0 +1,59 @@
|
||||||
|
"""
|
||||||
|
markov/models.py --- save brain pieces for chaining
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.markov')
|
||||||
|
|
||||||
|
|
||||||
|
class MarkovContext(models.Model):
|
||||||
|
|
||||||
|
"""Define contexts for Markov chains."""
|
||||||
|
|
||||||
|
name = models.CharField(max_length=32)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""String representation."""
|
||||||
|
|
||||||
|
return u"{0:s}".format(self.name)
|
||||||
|
|
||||||
|
|
||||||
|
class MarkovTarget(models.Model):
|
||||||
|
|
||||||
|
"""Define IRC targets that relate to a context, and can occasionally be talked to."""
|
||||||
|
|
||||||
|
target = models.CharField(max_length=64)
|
||||||
|
context = models.ForeignKey(MarkovContext)
|
||||||
|
|
||||||
|
chatter_chance = models.IntegerField(default=0)
|
||||||
|
|
||||||
|
|
||||||
|
class MarkovState(models.Model):
|
||||||
|
|
||||||
|
"""One element in a Markov chain, some text or something."""
|
||||||
|
|
||||||
|
_start1 = '__start1'
|
||||||
|
_start2 = '__start2'
|
||||||
|
_stop = '__stop'
|
||||||
|
|
||||||
|
k1 = models.CharField(max_length=128)
|
||||||
|
k2 = models.CharField(max_length=128)
|
||||||
|
v = models.CharField(max_length=128)
|
||||||
|
|
||||||
|
count = models.IntegerField(default=0)
|
||||||
|
context = models.ForeignKey(MarkovContext)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
unique_together = ('context', 'k1', 'k2', 'v')
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""String representation."""
|
||||||
|
|
||||||
|
return u"{0:s},{1:s} -> {2:s} (count: {3:d})".format(self.k1, self.k2, self.v, self.count)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""
|
||||||
|
markov/urls.py --- url patterns for markov stuff
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.conf.urls import patterns, url
|
||||||
|
|
||||||
|
urlpatterns = patterns('markov.views',
|
||||||
|
url(r'^$', 'index', name='markov_index'),
|
||||||
|
url(r'^import/$', 'import_file', name='markov_import_file'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,81 @@
|
||||||
|
"""
|
||||||
|
markov/views.py --- manipulate markov data
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.http import HttpResponse
|
||||||
|
from django.shortcuts import render
|
||||||
|
|
||||||
|
from markov.forms import LogUploadForm
|
||||||
|
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.markov')
|
||||||
|
|
||||||
|
|
||||||
|
def index(request):
|
||||||
|
"""Display nothing, for the moment."""
|
||||||
|
|
||||||
|
return HttpResponse()
|
||||||
|
|
||||||
|
|
||||||
|
def import_file(request):
|
||||||
|
"""Accept a file upload and turn it into markov stuff.
|
||||||
|
|
||||||
|
Current file formats supported:
|
||||||
|
* weechat
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if request.method == 'POST':
|
||||||
|
form = LogUploadForm(request.POST, request.FILES)
|
||||||
|
if form.is_valid():
|
||||||
|
log_file = request.FILES['log_file']
|
||||||
|
context = form.cleaned_data['context']
|
||||||
|
ignores = form.cleaned_data['ignore'].split(',')
|
||||||
|
|
||||||
|
whos = []
|
||||||
|
for line in log_file:
|
||||||
|
(timestamp, who, what) = line.split('\t')
|
||||||
|
|
||||||
|
if who in ('-->', '<--', '--', ' *'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if who in ignores:
|
||||||
|
continue
|
||||||
|
|
||||||
|
whos.append(who)
|
||||||
|
|
||||||
|
# this is a line we probably care about now
|
||||||
|
_learn_line(what.rstrip(), context)
|
||||||
|
|
||||||
|
log.debug(set(whos))
|
||||||
|
else:
|
||||||
|
form = LogUploadForm()
|
||||||
|
|
||||||
|
return render(request, 'markov/import_file.html', {'form': form})
|
||||||
|
|
||||||
|
|
||||||
|
def _learn_line(line, context):
|
||||||
|
"""Create a bunch of MarkovStates for a given line of text."""
|
||||||
|
|
||||||
|
log.debug("learning {0:.40s}...".format(line))
|
||||||
|
|
||||||
|
words = line.split()
|
||||||
|
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
|
||||||
|
|
||||||
|
for i, word in enumerate(words):
|
||||||
|
log.debug("{0:s},{1:s} -> {2:s}".format(words[i], words[i+1], words[i+2]))
|
||||||
|
state, created = MarkovState.objects.get_or_create(context=context,
|
||||||
|
k1=words[i],
|
||||||
|
k2=words[i+1],
|
||||||
|
v=words[i+2])
|
||||||
|
state.count += 1
|
||||||
|
state.save()
|
||||||
|
|
||||||
|
if i > len(words) - 4:
|
||||||
|
break
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,16 @@
|
||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block title %}markov import{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<form id="markov_import_file_form" enctype="multipart/form-data" action="{% url 'markov_import_file' %}" method="post">
|
||||||
|
{% csrf_token %}
|
||||||
|
<table>
|
||||||
|
{{ form }}
|
||||||
|
</table>
|
||||||
|
<input class="submit-button" type="submit" value="Import"/>
|
||||||
|
</form>
|
||||||
|
{% endblock %}
|
||||||
|
<!--
|
||||||
|
vi:tabstop=4:expandtab:autoindent
|
||||||
|
-->
|
Loading…
Reference in New Issue