113 lines
4.2 KiB
Python
113 lines
4.2 KiB
Python
"""Manage Markov models and administrative commands."""
|
|
|
|
import logging
|
|
|
|
from django.contrib import admin
|
|
from django.contrib.auth.decorators import permission_required
|
|
from django.shortcuts import render
|
|
|
|
from markov.forms import LogUploadForm, TeachLineForm
|
|
import markov.lib as markovlib
|
|
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
|
|
|
|
|
log = logging.getLogger('markov.admin')
|
|
|
|
|
|
admin.site.register(MarkovContext)
|
|
admin.site.register(MarkovTarget)
|
|
admin.site.register(MarkovState)
|
|
|
|
|
|
@permission_required('import_text_file', raise_exception=True)
|
|
def import_file(request):
|
|
"""Accept a file upload and turn it into markov stuff.
|
|
|
|
Current file formats supported:
|
|
* weechat
|
|
"""
|
|
|
|
if request.method == 'POST':
|
|
form = LogUploadForm(request.POST, request.FILES)
|
|
if form.is_valid():
|
|
if form.cleaned_data['text_file_format'] == LogUploadForm.FILE_FORMAT_WEECHAT:
|
|
text_file = request.FILES['text_file']
|
|
context = form.cleaned_data['context']
|
|
ignores = form.cleaned_data['ignore_nicks'].split(',')
|
|
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
|
|
|
whos = []
|
|
for line in text_file:
|
|
log.debug(line)
|
|
(timestamp, who, what) = line.decode('utf-8').split('\t', 2)
|
|
|
|
if who in ('-->', '<--', '--', ' *'):
|
|
continue
|
|
|
|
if who in ignores:
|
|
continue
|
|
|
|
whos.append(who)
|
|
|
|
# this is a line we probably care about now
|
|
what = [x for x in what.rstrip().split(' ') if x not in strips]
|
|
markovlib.learn_line(' '.join(what), context)
|
|
log.debug("learned")
|
|
|
|
log.debug(set(whos))
|
|
form = LogUploadForm()
|
|
elif form.cleaned_data['text_file_format'] == LogUploadForm.FILE_FORMAT_RAW_TEXT:
|
|
text_file = request.FILES['text_file']
|
|
context = form.cleaned_data['context']
|
|
|
|
k1 = MarkovState._start1
|
|
k2 = MarkovState._start2
|
|
for line in text_file:
|
|
for word in [x for x in line.decode('utf-8') .rstrip().split(' ')]:
|
|
log.info(word)
|
|
if word:
|
|
state, created = MarkovState.objects.get_or_create(context=context, k1=k1,
|
|
k2=k2, v=word)
|
|
state.count += 1
|
|
state.save()
|
|
|
|
if word[-1] in ['.', '?', '!']:
|
|
state, created = MarkovState.objects.get_or_create(context=context, k1=k2,
|
|
k2=word, v=MarkovState._stop)
|
|
state.count += 1
|
|
state.save()
|
|
|
|
k1 = MarkovState._start1
|
|
k2 = MarkovState._start2
|
|
else:
|
|
k1 = k2
|
|
k2 = word
|
|
else:
|
|
form = LogUploadForm()
|
|
|
|
return render(request, 'markov/import_file.html', {'form': form})
|
|
|
|
|
|
@permission_required('teach_line', raise_exception=True)
|
|
def teach_line(request):
|
|
"""Teach one line directly."""
|
|
|
|
if request.method == 'POST':
|
|
form = TeachLineForm(request.POST)
|
|
if form.is_valid():
|
|
line = form.cleaned_data['line']
|
|
context = form.cleaned_data['context']
|
|
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
|
what = [x for x in line.rstrip().split(' ') if x not in strips]
|
|
markovlib.learn_line(' '.join(what), context)
|
|
form = TeachLineForm()
|
|
else:
|
|
form = TeachLineForm()
|
|
|
|
return render(request, 'markov/teach_line.html', {'form': form})
|
|
|
|
|
|
admin.site.register_view('markov/importfile/', "Markov - Import log file", view=import_file,
|
|
urlname='markov_import_file')
|
|
admin.site.register_view('markov/teach/', "Markov - Teach line", view=teach_line, urlname='markov_teach_line')
|