markov: import generic text files
This commit is contained in:
parent
8fded6ba6c
commit
6e21416791
@ -19,7 +19,7 @@ admin.site.register(MarkovTarget)
|
|||||||
admin.site.register(MarkovState)
|
admin.site.register(MarkovState)
|
||||||
|
|
||||||
|
|
||||||
@permission_required('import_log_file', raise_exception=True)
|
@permission_required('import_text_file', raise_exception=True)
|
||||||
def import_file(request):
|
def import_file(request):
|
||||||
"""Accept a file upload and turn it into markov stuff.
|
"""Accept a file upload and turn it into markov stuff.
|
||||||
|
|
||||||
@ -30,31 +30,58 @@ def import_file(request):
|
|||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
form = LogUploadForm(request.POST, request.FILES)
|
form = LogUploadForm(request.POST, request.FILES)
|
||||||
if form.is_valid():
|
if form.is_valid():
|
||||||
log_file = request.FILES['log_file']
|
if form.cleaned_data['text_file_format'] == LogUploadForm.FILE_FORMAT_WEECHAT:
|
||||||
context = form.cleaned_data['context']
|
text_file = request.FILES['text_file']
|
||||||
ignores = form.cleaned_data['ignore_nicks'].split(',')
|
context = form.cleaned_data['context']
|
||||||
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
ignores = form.cleaned_data['ignore_nicks'].split(',')
|
||||||
|
strips = form.cleaned_data['strip_prefixes'].split(' ')
|
||||||
|
|
||||||
whos = []
|
whos = []
|
||||||
for line in log_file:
|
for line in text_file:
|
||||||
log.debug(line)
|
log.debug(line)
|
||||||
(timestamp, who, what) = line.decode('utf-8').split('\t', 2)
|
(timestamp, who, what) = line.decode('utf-8').split('\t', 2)
|
||||||
|
|
||||||
if who in ('-->', '<--', '--', ' *'):
|
if who in ('-->', '<--', '--', ' *'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if who in ignores:
|
if who in ignores:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
whos.append(who)
|
whos.append(who)
|
||||||
|
|
||||||
# this is a line we probably care about now
|
# this is a line we probably care about now
|
||||||
what = [x for x in what.rstrip().split(' ') if x not in strips]
|
what = [x for x in what.rstrip().split(' ') if x not in strips]
|
||||||
markovlib.learn_line(' '.join(what), context)
|
markovlib.learn_line(' '.join(what), context)
|
||||||
log.debug("learned")
|
log.debug("learned")
|
||||||
|
|
||||||
log.debug(set(whos))
|
log.debug(set(whos))
|
||||||
form = LogUploadForm()
|
form = LogUploadForm()
|
||||||
|
elif form.cleaned_data['text_file_format'] == LogUploadForm.FILE_FORMAT_RAW_TEXT:
|
||||||
|
text_file = request.FILES['text_file']
|
||||||
|
context = form.cleaned_data['context']
|
||||||
|
|
||||||
|
k1 = MarkovState._start1
|
||||||
|
k2 = MarkovState._start2
|
||||||
|
for line in text_file:
|
||||||
|
for word in [x for x in line.decode('utf-8') .rstrip().split(' ')]:
|
||||||
|
log.info(word)
|
||||||
|
if word:
|
||||||
|
state, created = MarkovState.objects.get_or_create(context=context, k1=k1,
|
||||||
|
k2=k2, v=word)
|
||||||
|
state.count += 1
|
||||||
|
state.save()
|
||||||
|
|
||||||
|
if word[-1] in ['.', '?', '!']:
|
||||||
|
state, created = MarkovState.objects.get_or_create(context=context, k1=k2,
|
||||||
|
k2=word, v=MarkovState._stop)
|
||||||
|
state.count += 1
|
||||||
|
state.save()
|
||||||
|
|
||||||
|
k1 = MarkovState._start1
|
||||||
|
k2 = MarkovState._start2
|
||||||
|
else:
|
||||||
|
k1 = k2
|
||||||
|
k2 = word
|
||||||
else:
|
else:
|
||||||
form = LogUploadForm()
|
form = LogUploadForm()
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from django.forms import Form, CharField, FileField, ModelChoiceField
|
from django.forms import Form, CharField, ChoiceField, FileField, ModelChoiceField
|
||||||
|
|
||||||
from markov.models import MarkovContext
|
from markov.models import MarkovContext
|
||||||
|
|
||||||
@ -13,11 +13,20 @@ class LogUploadForm(Form):
|
|||||||
|
|
||||||
"""Accept a file upload that will be imported into Markov stuff."""
|
"""Accept a file upload that will be imported into Markov stuff."""
|
||||||
|
|
||||||
log_file = FileField(help_text="Weechat log format.")
|
FILE_FORMAT_WEECHAT = 'WEECHAT'
|
||||||
|
FILE_FORMAT_RAW_TEXT = 'RAW'
|
||||||
|
|
||||||
|
FILE_FORMAT_CHOICES = (
|
||||||
|
(FILE_FORMAT_WEECHAT, "Weechat"),
|
||||||
|
(FILE_FORMAT_RAW_TEXT, "Raw text file"),
|
||||||
|
)
|
||||||
|
|
||||||
|
text_file = FileField()
|
||||||
|
text_file_format = ChoiceField(choices=FILE_FORMAT_CHOICES)
|
||||||
context = ModelChoiceField(queryset=MarkovContext.objects.all())
|
context = ModelChoiceField(queryset=MarkovContext.objects.all())
|
||||||
ignore_nicks = CharField(help_text="Comma-separated list of nicks to ignore.",
|
ignore_nicks = CharField(help_text="Comma-separated list of nicks to ignore. For Weechat logs.",
|
||||||
required=False)
|
required=False)
|
||||||
strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip.",
|
strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip. For Weechat logs.",
|
||||||
required=False)
|
required=False)
|
||||||
|
|
||||||
|
|
||||||
|
18
dr_botzo/markov/migrations/0003_auto_20161112_2348.py
Normal file
18
dr_botzo/markov/migrations/0003_auto_20161112_2348.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('markov', '0002_auto_20150514_2317'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='markovstate',
|
||||||
|
options={'permissions': set([('import_text_file', 'Can import states from a text file'), ('teach_line', 'Can teach lines')])},
|
||||||
|
),
|
||||||
|
]
|
@ -59,7 +59,7 @@ class MarkovState(models.Model):
|
|||||||
['context', 'v'],
|
['context', 'v'],
|
||||||
]
|
]
|
||||||
permissions = {
|
permissions = {
|
||||||
('import_log_file', "Can import states from a log file"),
|
('import_text_file', "Can import states from a text file"),
|
||||||
('teach_line', "Can teach lines"),
|
('teach_line', "Can teach lines"),
|
||||||
}
|
}
|
||||||
unique_together = ('context', 'k1', 'k2', 'v')
|
unique_together = ('context', 'k1', 'k2', 'v')
|
||||||
|
Loading…
Reference in New Issue
Block a user