From c193b7f4beb96cf8a99a02b8e6c9090f04d47311 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 18:00:45 -0500 Subject: [PATCH] Markov: support ignoring prefixes in import/learn this is so we can ignore/strip "botname: " ish lines --- markov/forms.py | 8 ++++++-- markov/views.py | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/markov/forms.py b/markov/forms.py index c18935c..00a80e4 100644 --- a/markov/forms.py +++ b/markov/forms.py @@ -18,8 +18,10 @@ class LogUploadForm(Form): log_file = FileField(help_text="Weechat log format.") context = ModelChoiceField(queryset=MarkovContext.objects.all()) - ignore = CharField(help_text="Comma-separated list of nicks to ignore.", - required=False) + ignore_nicks = CharField(help_text="Comma-separated list of nicks to ignore.", + required=False) + strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip.", + required=False) class TeachLineForm(Form): @@ -28,5 +30,7 @@ class TeachLineForm(Form): context = ModelChoiceField(queryset=MarkovContext.objects.all()) line = CharField() + strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip.", + required=False) # vi:tabstop=4:expandtab:autoindent diff --git a/markov/views.py b/markov/views.py index 09b4413..da83050 100644 --- a/markov/views.py +++ b/markov/views.py @@ -51,7 +51,8 @@ def import_file(request): if form.is_valid(): log_file = request.FILES['log_file'] context = form.cleaned_data['context'] - ignores = form.cleaned_data['ignore'].split(',') + ignores = form.cleaned_data['ignore_nicks'].split(',') + strips = form.cleaned_data['strip_prefixes'].split(' ') whos = [] for line in log_file: @@ -66,7 +67,8 @@ def import_file(request): whos.append(who) # this is a line we probably care about now - _learn_line(what.rstrip(), context) + what = [x for x in what.rstrip().split(' ') if x not in strips] + _learn_line(' '.join(what), context) log.debug(set(whos)) else: @@ -84,7 +86,9 @@ def teach_line(request): if form.is_valid(): line = form.cleaned_data['line'] context = form.cleaned_data['context'] - _learn_line(line.rstrip(), context) + strips = form.cleaned_data['strip_prefixes'].split(' ') + what = [x for x in line.rstrip().split(' ') if x not in strips] + _learn_line(' '.join(what), context) else: form = TeachLineForm()