Markov: support ignoring prefixes in import/learn

this is so we can ignore/strip "botname: " ish lines
2014-04-05 18:00:45 -05:00 · 2014-04-05 18:00:45 -05:00 · c193b7f4be
commit c193b7f4be
parent 1fc13b011d
2 changed files with 13 additions and 5 deletions
--- a/markov/forms.py
+++ b/markov/forms.py
@ -18,8 +18,10 @@ class LogUploadForm(Form):

    log_file = FileField(help_text="Weechat log format.")
    context = ModelChoiceField(queryset=MarkovContext.objects.all())
-    ignore = CharField(help_text="Comma-separated list of nicks to ignore.",
-                       required=False)
+    ignore_nicks = CharField(help_text="Comma-separated list of nicks to ignore.",
+                             required=False)
+    strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip.",
+                               required=False)


 class TeachLineForm(Form):
@ -28,5 +30,7 @@ class TeachLineForm(Form):

    context = ModelChoiceField(queryset=MarkovContext.objects.all())
    line = CharField()
+    strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip.",
+                               required=False)

 # vi:tabstop=4:expandtab:autoindent
--- a/markov/views.py
+++ b/markov/views.py
@ -51,7 +51,8 @@ def import_file(request):
        if form.is_valid():
            log_file = request.FILES['log_file']
            context = form.cleaned_data['context']
-            ignores = form.cleaned_data['ignore'].split(',')
+            ignores = form.cleaned_data['ignore_nicks'].split(',')
+            strips = form.cleaned_data['strip_prefixes'].split(' ')

            whos = []
            for line in log_file:
@ -66,7 +67,8 @@ def import_file(request):
                whos.append(who)

                # this is a line we probably care about now
-                _learn_line(what.rstrip(), context)
+                what = [x for x in what.rstrip().split(' ') if x not in strips]
+                _learn_line(' '.join(what), context)

            log.debug(set(whos))
    else:
@ -84,7 +86,9 @@ def teach_line(request):
        if form.is_valid():
            line = form.cleaned_data['line']
            context = form.cleaned_data['context']
-            _learn_line(line.rstrip(), context)
+            strips = form.cleaned_data['strip_prefixes'].split(' ')
+            what = [x for x in line.rstrip().split(' ') if x not in strips]
+            _learn_line(' '.join(what), context)
    else:
        form = TeachLineForm()