From f9c7388bae6d4820f9b4ba2a1d7e195f79dab51a Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 10:49:23 -0500 Subject: [PATCH 01/13] just return a blank page for the index i'll keep the index.html around for when we eventually use it, though --- dr_botzo/views.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dr_botzo/views.py b/dr_botzo/views.py index b69d91a..af16b5d 100644 --- a/dr_botzo/views.py +++ b/dr_botzo/views.py @@ -1,8 +1,15 @@ +""" +dr_botzo/views.py --- various random views + +""" + +from django.http import HttpResponse from django.shortcuts import render -def home(request): - """Site index, nothing special (or at all).""" - return render(request, 'index.html', {}) +def home(request): + """Site index, nothing special (or at all, right now).""" + + return HttpResponse() # vi:tabstop=4:expandtab:autoindent From c1fadb7b742efd4300a4058d9875306658aec69c Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 10:50:01 -0500 Subject: [PATCH 02/13] Races: add some logging stuff for later use --- races/models.py | 10 ++++++++++ races/views.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/races/models.py b/races/models.py index b943993..bbe879e 100644 --- a/races/models.py +++ b/races/models.py @@ -1,7 +1,17 @@ +""" +races/models.py --- models for managing competitive races + +""" + +import logging + from django.db import models from django.utils import timezone +log = logging.getLogger('dr_botzo.races') + + class Race(models.Model): """Track a race.""" diff --git a/races/views.py b/races/views.py index eed7ee7..b8f97b3 100644 --- a/races/views.py +++ b/races/views.py @@ -1,8 +1,18 @@ +""" +races/views.py --- display race statuses and whatnot + +""" + +import logging + from django.shortcuts import get_object_or_404, render from races.models import Race, Racer, RaceUpdate +log = logging.getLogger('dr_botzo.races') + + def index(request): """Display a list of races.""" From 197f9908e6b71909bbc58a956e7c42df502f0afd Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 10:50:33 -0500 Subject: [PATCH 03/13] .gitignore *.log --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8e891ab..93bedde 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.facts *.json +*.log *.pyc *.sqlite3 *.swo From 77e52acc7518169a723dc18790632c7251d43b7c Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 10:52:29 -0500 Subject: [PATCH 04/13] Markov: first cut of markov in django this is just a basic port of the tables into django models right now. there's some serious slowness in the state creation that i need to fix before this does anything, but i want to get this in a real database on a real linode before i go too much further, so here it is --- dr_botzo/settings.py | 1 + dr_botzo/urls.py | 1 + markov/__init__.py | 0 markov/admin.py | 9 +++ markov/forms.py | 24 ++++++ ...001_markov_context_and_target_and_state.py | 80 ++++++++++++++++++ markov/migrations/__init__.py | 0 markov/models.py | 59 ++++++++++++++ markov/urls.py | 13 +++ markov/views.py | 81 +++++++++++++++++++ templates/markov/import_file.html | 16 ++++ 11 files changed, 284 insertions(+) create mode 100644 markov/__init__.py create mode 100644 markov/admin.py create mode 100644 markov/forms.py create mode 100644 markov/migrations/0001_markov_context_and_target_and_state.py create mode 100644 markov/migrations/__init__.py create mode 100644 markov/models.py create mode 100644 markov/urls.py create mode 100644 markov/views.py create mode 100644 templates/markov/import_file.html diff --git a/dr_botzo/settings.py b/dr_botzo/settings.py index c1e9f17..5f6f495 100644 --- a/dr_botzo/settings.py +++ b/dr_botzo/settings.py @@ -38,6 +38,7 @@ INSTALLED_APPS = ( 'django.contrib.staticfiles', 'django_extensions', 'south', + 'markov', 'races', 'seen', ) diff --git a/dr_botzo/urls.py b/dr_botzo/urls.py index c39a3b2..622ff6b 100644 --- a/dr_botzo/urls.py +++ b/dr_botzo/urls.py @@ -6,6 +6,7 @@ admin.autodiscover() urlpatterns = patterns('', url(r'^$', 'dr_botzo.views.home', name='home'), + url(r'^markov/', include('markov.urls')), url(r'^races/', include('races.urls')), url(r'^admin/', include(admin.site.urls)), diff --git a/markov/__init__.py b/markov/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markov/admin.py b/markov/admin.py new file mode 100644 index 0000000..d822365 --- /dev/null +++ b/markov/admin.py @@ -0,0 +1,9 @@ +from django.contrib import admin + +from markov.models import MarkovContext, MarkovTarget, MarkovState + +admin.site.register(MarkovContext) +admin.site.register(MarkovTarget) +admin.site.register(MarkovState) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/forms.py b/markov/forms.py new file mode 100644 index 0000000..6aca9b9 --- /dev/null +++ b/markov/forms.py @@ -0,0 +1,24 @@ +""" +markov/forms.py --- forms for manipulating markov data + +""" + +import logging + +from django.forms import Form, CharField, FileField, ModelChoiceField + +from markov.models import MarkovContext + +log = logging.getLogger('dr_botzo.markov') + + +class LogUploadForm(Form): + + """Accept a file upload that will be imported into Markov stuff.""" + + log_file = FileField(help_text="Weechat log format.") + context = ModelChoiceField(queryset=MarkovContext.objects.all()) + ignore = CharField(help_text="Comma-separated list of nicks to ignore.", + required=False) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/migrations/0001_markov_context_and_target_and_state.py b/markov/migrations/0001_markov_context_and_target_and_state.py new file mode 100644 index 0000000..80b8314 --- /dev/null +++ b/markov/migrations/0001_markov_context_and_target_and_state.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'MarkovContext' + db.create_table(u'markov_markovcontext', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('name', self.gf('django.db.models.fields.CharField')(max_length=32)), + )) + db.send_create_signal(u'markov', ['MarkovContext']) + + # Adding model 'MarkovTarget' + db.create_table(u'markov_markovtarget', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('target', self.gf('django.db.models.fields.CharField')(max_length=64)), + ('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])), + ('chatter_chance', self.gf('django.db.models.fields.IntegerField')(default=0)), + )) + db.send_create_signal(u'markov', ['MarkovTarget']) + + # Adding model 'MarkovState' + db.create_table(u'markov_markovstate', ( + (u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('k1', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('k2', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('v', self.gf('django.db.models.fields.CharField')(max_length=128)), + ('count', self.gf('django.db.models.fields.IntegerField')(default=0)), + ('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])), + )) + db.send_create_signal(u'markov', ['MarkovState']) + + # Adding unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v'] + db.create_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v']) + + + def backwards(self, orm): + # Removing unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v'] + db.delete_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v']) + + # Deleting model 'MarkovContext' + db.delete_table(u'markov_markovcontext') + + # Deleting model 'MarkovTarget' + db.delete_table(u'markov_markovtarget') + + # Deleting model 'MarkovState' + db.delete_table(u'markov_markovstate') + + + models = { + u'markov.markovcontext': { + 'Meta': {'object_name': 'MarkovContext'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '32'}) + }, + u'markov.markovstate': { + 'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'}, + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + 'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'v': ('django.db.models.fields.CharField', [], {'max_length': '128'}) + }, + u'markov.markovtarget': { + 'Meta': {'object_name': 'MarkovTarget'}, + 'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'target': ('django.db.models.fields.CharField', [], {'max_length': '64'}) + } + } + + complete_apps = ['markov'] \ No newline at end of file diff --git a/markov/migrations/__init__.py b/markov/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/markov/models.py b/markov/models.py new file mode 100644 index 0000000..8a568f6 --- /dev/null +++ b/markov/models.py @@ -0,0 +1,59 @@ +""" +markov/models.py --- save brain pieces for chaining + +""" + +import logging + +from django.db import models + + +log = logging.getLogger('dr_botzo.markov') + + +class MarkovContext(models.Model): + + """Define contexts for Markov chains.""" + + name = models.CharField(max_length=32) + + def __unicode__(self): + """String representation.""" + + return u"{0:s}".format(self.name) + + +class MarkovTarget(models.Model): + + """Define IRC targets that relate to a context, and can occasionally be talked to.""" + + target = models.CharField(max_length=64) + context = models.ForeignKey(MarkovContext) + + chatter_chance = models.IntegerField(default=0) + + +class MarkovState(models.Model): + + """One element in a Markov chain, some text or something.""" + + _start1 = '__start1' + _start2 = '__start2' + _stop = '__stop' + + k1 = models.CharField(max_length=128) + k2 = models.CharField(max_length=128) + v = models.CharField(max_length=128) + + count = models.IntegerField(default=0) + context = models.ForeignKey(MarkovContext) + + class Meta: + unique_together = ('context', 'k1', 'k2', 'v') + + def __unicode__(self): + """String representation.""" + + return u"{0:s},{1:s} -> {2:s} (count: {3:d})".format(self.k1, self.k2, self.v, self.count) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/urls.py b/markov/urls.py new file mode 100644 index 0000000..39fcab8 --- /dev/null +++ b/markov/urls.py @@ -0,0 +1,13 @@ +""" +markov/urls.py --- url patterns for markov stuff + +""" + +from django.conf.urls import patterns, url + +urlpatterns = patterns('markov.views', + url(r'^$', 'index', name='markov_index'), + url(r'^import/$', 'import_file', name='markov_import_file'), +) + +# vi:tabstop=4:expandtab:autoindent diff --git a/markov/views.py b/markov/views.py new file mode 100644 index 0000000..c25df84 --- /dev/null +++ b/markov/views.py @@ -0,0 +1,81 @@ +""" +markov/views.py --- manipulate markov data + +""" + +import logging + +from django.http import HttpResponse +from django.shortcuts import render + +from markov.forms import LogUploadForm +from markov.models import MarkovContext, MarkovTarget, MarkovState + + +log = logging.getLogger('dr_botzo.markov') + + +def index(request): + """Display nothing, for the moment.""" + + return HttpResponse() + + +def import_file(request): + """Accept a file upload and turn it into markov stuff. + + Current file formats supported: + * weechat + + """ + + if request.method == 'POST': + form = LogUploadForm(request.POST, request.FILES) + if form.is_valid(): + log_file = request.FILES['log_file'] + context = form.cleaned_data['context'] + ignores = form.cleaned_data['ignore'].split(',') + + whos = [] + for line in log_file: + (timestamp, who, what) = line.split('\t') + + if who in ('-->', '<--', '--', ' *'): + continue + + if who in ignores: + continue + + whos.append(who) + + # this is a line we probably care about now + _learn_line(what.rstrip(), context) + + log.debug(set(whos)) + else: + form = LogUploadForm() + + return render(request, 'markov/import_file.html', {'form': form}) + + +def _learn_line(line, context): + """Create a bunch of MarkovStates for a given line of text.""" + + log.debug("learning {0:.40s}...".format(line)) + + words = line.split() + words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop] + + for i, word in enumerate(words): + log.debug("{0:s},{1:s} -> {2:s}".format(words[i], words[i+1], words[i+2])) + state, created = MarkovState.objects.get_or_create(context=context, + k1=words[i], + k2=words[i+1], + v=words[i+2]) + state.count += 1 + state.save() + + if i > len(words) - 4: + break + +# vi:tabstop=4:expandtab:autoindent diff --git a/templates/markov/import_file.html b/templates/markov/import_file.html new file mode 100644 index 0000000..4d07627 --- /dev/null +++ b/templates/markov/import_file.html @@ -0,0 +1,16 @@ +{% extends 'base.html' %} + +{% block title %}markov import{% endblock %} + +{% block content %} +
+ {% csrf_token %} + + {{ form }} +
+ +
+{% endblock %} + From 399d4198240db36fd9d383057dd9a1ec1c8e2699 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 11:09:03 -0500 Subject: [PATCH 05/13] Markov: heed k1/k2/v length limit when importing --- markov/views.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/markov/views.py b/markov/views.py index c25df84..715cdb5 100644 --- a/markov/views.py +++ b/markov/views.py @@ -66,6 +66,10 @@ def _learn_line(line, context): words = line.split() words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop] + for word in words: + if len(word) > MarkovState._meta.get_field('k1').max_length: + return + for i, word in enumerate(words): log.debug("{0:s},{1:s} -> {2:s}".format(words[i], words[i+1], words[i+2])) state, created = MarkovState.objects.get_or_create(context=context, From acfcd574b53b9a459af0004dad0b782daebe1637 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 11:20:59 -0500 Subject: [PATCH 06/13] Markov: add a permission for importing from a log --- markov/models.py | 3 +++ markov/views.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/markov/models.py b/markov/models.py index 8a568f6..1e38219 100644 --- a/markov/models.py +++ b/markov/models.py @@ -49,6 +49,9 @@ class MarkovState(models.Model): context = models.ForeignKey(MarkovContext) class Meta: + permissions = { + ('import_log_file', "Can import states from a log file"), + } unique_together = ('context', 'k1', 'k2', 'v') def __unicode__(self): diff --git a/markov/views.py b/markov/views.py index 715cdb5..662a186 100644 --- a/markov/views.py +++ b/markov/views.py @@ -5,6 +5,7 @@ markov/views.py --- manipulate markov data import logging +from django.contrib.auth.decorators import permission_required from django.http import HttpResponse from django.shortcuts import render @@ -21,6 +22,7 @@ def index(request): return HttpResponse() +@permission_required('import_log_file', raise_exception=True) def import_file(request): """Accept a file upload and turn it into markov stuff. From ad2044a1781df4d79166f047b2711257412879e4 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 14:24:30 -0500 Subject: [PATCH 07/13] Markov: unicode debugging lines --- markov/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markov/views.py b/markov/views.py index 662a186..3b1e57c 100644 --- a/markov/views.py +++ b/markov/views.py @@ -63,7 +63,7 @@ def import_file(request): def _learn_line(line, context): """Create a bunch of MarkovStates for a given line of text.""" - log.debug("learning {0:.40s}...".format(line)) + log.debug(u"learning {0:.40s}...".format(line)) words = line.split() words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop] @@ -73,7 +73,7 @@ def _learn_line(line, context): return for i, word in enumerate(words): - log.debug("{0:s},{1:s} -> {2:s}".format(words[i], words[i+1], words[i+2])) + log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2])) state, created = MarkovState.objects.get_or_create(context=context, k1=words[i], k2=words[i+1], From 9c08a203c77f93918d95cdf279ce99c1cb8cc711 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 14:26:06 -0500 Subject: [PATCH 08/13] Markov: generate markov chains into sentences some view stuff to get at it through django while we're here --- markov/urls.py | 1 + markov/views.py | 114 +++++++++++++++++++++++++++++++++- templates/markov/context.html | 11 ++++ 3 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 templates/markov/context.html diff --git a/markov/urls.py b/markov/urls.py index 39fcab8..7d3ad75 100644 --- a/markov/urls.py +++ b/markov/urls.py @@ -7,6 +7,7 @@ from django.conf.urls import patterns, url urlpatterns = patterns('markov.views', url(r'^$', 'index', name='markov_index'), + url(r'^context/(?P\d+)/$', 'context_index', name='markov_context_index'), url(r'^import/$', 'import_file', name='markov_import_file'), ) diff --git a/markov/views.py b/markov/views.py index 3b1e57c..da442fc 100644 --- a/markov/views.py +++ b/markov/views.py @@ -4,10 +4,12 @@ markov/views.py --- manipulate markov data """ import logging +import random +import time from django.contrib.auth.decorators import permission_required from django.http import HttpResponse -from django.shortcuts import render +from django.shortcuts import get_object_or_404, render from markov.forms import LogUploadForm from markov.models import MarkovContext, MarkovTarget, MarkovState @@ -22,6 +24,19 @@ def index(request): return HttpResponse() +def context_index(request, context_id): + """Display the context index for the given context.""" + + start_t = time.time() + context = get_object_or_404(MarkovContext, pk=context_id) + chain = ' '.join(_generate_sentence(context)) + end_t = time.time() + + return render(request, 'markov/context.html', {'chain': chain, + 'context': context, + 'elapsed': end_t - start_t}) + + @permission_required('import_log_file', raise_exception=True) def import_file(request): """Accept a file upload and turn it into markov stuff. @@ -60,6 +75,103 @@ def import_file(request): return render(request, 'markov/import_file.html', {'form': form}) +def _generate_line(context, topics=None, max_words=30): + """Generate a Markov chain.""" + + words = [] + # if we have topics, try to work from it and work backwards + if topics: + topic_word = random.choice(topics) + topics.remove(topic_word) + log.debug(u"looking for topic '{0:s}'".format(topic_word)) + new_states = MarkovState.objects.filter(context=context, v=topic_word) + + if len(new_states) > 0: + log.debug(u"found '{0:s}', starting backwards".format(topic_word)) + words.insert(0, topic_word) + while len(words) <= max_words and words[0] != MarkovState._start2: + log.debug(u"looking backwards for '{0:s}'".format(words[0])) + new_states = MarkovState.objects.filter(context=context, v=words[0]) + words.insert(0, _get_word_out_of_states(new_states, backwards=True)) + + # if we didn't get topic stuff, we need to start (forwards) here + if len(words) == 0: + words = [MarkovState._start1, MarkovState._start2] + + i = len(words) + while len(words) <= max_words and words[-1] != MarkovState._stop: + log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1])) + new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1]) + words.append(_get_word_out_of_states(new_states)) + i += 1 + + words = [word for word in words if word not in + (MarkovState._start1, MarkovState._start2, MarkovState._stop)] + + return words + + +def _generate_longish_line(context, topics=None, min_words=4, max_words=30): + """Generate a Markov chain, but throw away the short ones unless we get desperate.""" + + tries = 0 + while tries < 5: + line = _generate_line(context, topics=topics, max_words=max_words) + if len(line) >= min_words: + return line + + tries += 1 + + # if we got here, we need to just give up + return _generate_line(context) + + +def _generate_sentence(context, topics=None, min_words=15, max_words=30): + """String multiple lines together into a coherent sentence.""" + + tries = 0 + sentence = [] + while tries < 5: + sentence += _generate_longish_line(context, topics=topics, max_words=max_words) + if len(sentence) >= min_words: + return sentence + else: + sentence[-1] += random.choice([',', '.', '!']) + + tries += 1 + + # if we got here, we need to give up + return sentence + + +def _get_word_out_of_states(states, backwards=False): + """Pick one random word out of the given states.""" + + running = 0 + weighted_words = [] + for state in states: + running += state.count + if backwards: + weighted_words.append((running, state.k2)) + else: + weighted_words.append((running, state.v)) + + log.debug(u"{0:s}".format(weighted_words)) + + hit = random.randint(0, running) + log.debug(u"hit: {0:d}".format(hit)) + + new_word = '' + for weight, word in weighted_words: + new_word = word + + if weight >= hit: + break + + log.debug(u"found '{0:s}'".format(new_word)) + return new_word + + def _learn_line(line, context): """Create a bunch of MarkovStates for a given line of text.""" diff --git a/templates/markov/context.html b/templates/markov/context.html new file mode 100644 index 0000000..5112050 --- /dev/null +++ b/templates/markov/context.html @@ -0,0 +1,11 @@ +{% extends 'base.html' %} + +{% block title %}context: {{ context.name }}{% endblock %} + +{% block content %} +

{{ chain }}

+

in: {{ elapsed }}s

+{% endblock %} + From 169f73cb360579bbcc8c7d3a642c85efb4c2d023 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 14:53:05 -0500 Subject: [PATCH 09/13] Markov: view/form/etc for teaching lines --- markov/forms.py | 8 ++++++++ markov/models.py | 1 + markov/urls.py | 1 + markov/views.py | 18 +++++++++++++++++- templates/markov/teach_line.html | 16 ++++++++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 templates/markov/teach_line.html diff --git a/markov/forms.py b/markov/forms.py index 6aca9b9..c18935c 100644 --- a/markov/forms.py +++ b/markov/forms.py @@ -21,4 +21,12 @@ class LogUploadForm(Form): ignore = CharField(help_text="Comma-separated list of nicks to ignore.", required=False) + +class TeachLineForm(Form): + + """Accept a line that will be imported into Markov stuff.""" + + context = ModelChoiceField(queryset=MarkovContext.objects.all()) + line = CharField() + # vi:tabstop=4:expandtab:autoindent diff --git a/markov/models.py b/markov/models.py index 1e38219..480d4e9 100644 --- a/markov/models.py +++ b/markov/models.py @@ -51,6 +51,7 @@ class MarkovState(models.Model): class Meta: permissions = { ('import_log_file', "Can import states from a log file"), + ('teach_line', "Can teach lines"), } unique_together = ('context', 'k1', 'k2', 'v') diff --git a/markov/urls.py b/markov/urls.py index 7d3ad75..1daf766 100644 --- a/markov/urls.py +++ b/markov/urls.py @@ -9,6 +9,7 @@ urlpatterns = patterns('markov.views', url(r'^$', 'index', name='markov_index'), url(r'^context/(?P\d+)/$', 'context_index', name='markov_context_index'), url(r'^import/$', 'import_file', name='markov_import_file'), + url(r'^teach/$', 'teach_line', name='markov_teach_line'), ) # vi:tabstop=4:expandtab:autoindent diff --git a/markov/views.py b/markov/views.py index da442fc..320e112 100644 --- a/markov/views.py +++ b/markov/views.py @@ -11,7 +11,7 @@ from django.contrib.auth.decorators import permission_required from django.http import HttpResponse from django.shortcuts import get_object_or_404, render -from markov.forms import LogUploadForm +from markov.forms import LogUploadForm, TeachLineForm from markov.models import MarkovContext, MarkovTarget, MarkovState @@ -75,6 +75,22 @@ def import_file(request): return render(request, 'markov/import_file.html', {'form': form}) +@permission_required('teach_line', raise_exception=True) +def teach_line(request): + """Teach one line directly.""" + + if request.method == 'POST': + form = TeachLineForm(request.POST) + if form.is_valid(): + line = form.cleaned_data['line'] + context = form.cleaned_data['context'] + _learn_line(line.rstrip(), context) + else: + form = TeachLineForm() + + return render(request, 'markov/teach_line.html', {'form': form}) + + def _generate_line(context, topics=None, max_words=30): """Generate a Markov chain.""" diff --git a/templates/markov/teach_line.html b/templates/markov/teach_line.html new file mode 100644 index 0000000..e1164e4 --- /dev/null +++ b/templates/markov/teach_line.html @@ -0,0 +1,16 @@ +{% extends 'base.html' %} + +{% block title %}markov teach{% endblock %} + +{% block content %} +
+ {% csrf_token %} + + {{ form }} +
+ +
+{% endblock %} + From 6a50db0e3d0de286b56e0ada1cc59ef295bf23b8 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 15:55:06 -0500 Subject: [PATCH 10/13] Markov: field changes: uniqueness, max_length --- ...002_unique_target_name_and_context_name.py | 60 +++++++++++++++++++ .../0003_markov_context_max_length_64.py | 44 ++++++++++++++ markov/models.py | 9 ++- 3 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 markov/migrations/0002_unique_target_name_and_context_name.py create mode 100644 markov/migrations/0003_markov_context_max_length_64.py diff --git a/markov/migrations/0002_unique_target_name_and_context_name.py b/markov/migrations/0002_unique_target_name_and_context_name.py new file mode 100644 index 0000000..23dffe9 --- /dev/null +++ b/markov/migrations/0002_unique_target_name_and_context_name.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Deleting field 'MarkovTarget.target' + db.delete_column(u'markov_markovtarget', 'target') + + # Adding field 'MarkovTarget.name' + db.add_column(u'markov_markovtarget', 'name', + self.gf('django.db.models.fields.CharField')(default='', unique=True, max_length=64), + keep_default=False) + + # Adding unique constraint on 'MarkovContext', fields ['name'] + db.create_unique(u'markov_markovcontext', ['name']) + + + def backwards(self, orm): + # Removing unique constraint on 'MarkovContext', fields ['name'] + db.delete_unique(u'markov_markovcontext', ['name']) + + # Adding field 'MarkovTarget.target' + db.add_column(u'markov_markovtarget', 'target', + self.gf('django.db.models.fields.CharField')(default='', max_length=64), + keep_default=False) + + # Deleting field 'MarkovTarget.name' + db.delete_column(u'markov_markovtarget', 'name') + + + models = { + u'markov.markovcontext': { + 'Meta': {'object_name': 'MarkovContext'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'}) + }, + u'markov.markovstate': { + 'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'}, + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + 'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'v': ('django.db.models.fields.CharField', [], {'max_length': '128'}) + }, + u'markov.markovtarget': { + 'Meta': {'object_name': 'MarkovTarget'}, + 'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}) + } + } + + complete_apps = ['markov'] \ No newline at end of file diff --git a/markov/migrations/0003_markov_context_max_length_64.py b/markov/migrations/0003_markov_context_max_length_64.py new file mode 100644 index 0000000..31ec152 --- /dev/null +++ b/markov/migrations/0003_markov_context_max_length_64.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Changing field 'MarkovContext.name' + db.alter_column(u'markov_markovcontext', 'name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=64)) + + def backwards(self, orm): + + # Changing field 'MarkovContext.name' + db.alter_column(u'markov_markovcontext', 'name', self.gf('django.db.models.fields.CharField')(max_length=32, unique=True)) + + models = { + u'markov.markovcontext': { + 'Meta': {'object_name': 'MarkovContext'}, + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}) + }, + u'markov.markovstate': { + 'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'}, + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + 'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'v': ('django.db.models.fields.CharField', [], {'max_length': '128'}) + }, + u'markov.markovtarget': { + 'Meta': {'object_name': 'MarkovTarget'}, + 'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}) + } + } + + complete_apps = ['markov'] \ No newline at end of file diff --git a/markov/models.py b/markov/models.py index 480d4e9..c86845c 100644 --- a/markov/models.py +++ b/markov/models.py @@ -15,7 +15,7 @@ class MarkovContext(models.Model): """Define contexts for Markov chains.""" - name = models.CharField(max_length=32) + name = models.CharField(max_length=64, unique=True) def __unicode__(self): """String representation.""" @@ -27,11 +27,16 @@ class MarkovTarget(models.Model): """Define IRC targets that relate to a context, and can occasionally be talked to.""" - target = models.CharField(max_length=64) + name = models.CharField(max_length=64, unique=True) context = models.ForeignKey(MarkovContext) chatter_chance = models.IntegerField(default=0) + def __unicode__(self): + """String representation.""" + + return u"{0:s}".format(self.name) + class MarkovState(models.Model): From f1ff281da29221971bb81db1f911a45071b84319 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 15:55:33 -0500 Subject: [PATCH 11/13] Markov: port IRC bot module to django models work in progress, not everything works yet, but the important stuff should --- ircbot/modules/Markov.py | 554 ++++----------------------------------- 1 file changed, 51 insertions(+), 503 deletions(-) diff --git a/ircbot/modules/Markov.py b/ircbot/modules/Markov.py index a367423..1d7faf4 100644 --- a/ircbot/modules/Markov.py +++ b/ircbot/modules/Markov.py @@ -24,7 +24,9 @@ import thread import time from dateutil.relativedelta import relativedelta -import MySQLdb as mdb + +from markov.models import MarkovContext, MarkovState, MarkovTarget +from markov.views import _generate_sentence, _learn_line from extlib import irclib @@ -48,12 +50,6 @@ class Markov(Module): """ - # set up some keywords for use in the chains --- don't change these - # once you've created a brain - self.start1 = '__start1' - self.start2 = '__start2' - self.stop = '__stop' - # set up regexes, for replying to specific stuff learnpattern = '^!markov\s+learn\s+(.*)$' replypattern = '^!markov\s+reply(\s+min=(\d+))?(\s+max=(\d+))?(\s+(.*)$|$)' @@ -70,66 +66,9 @@ class Markov(Module): self.next_chatter_check = 0 thread.start_new_thread(self.thread_do, ()) - irc.xmlrpc_register_function(self._generate_line, - "markov_generate_line") - - def db_init(self): - """Create the markov chain table.""" - - version = self.db_module_registered(self.__class__.__name__) - if version == None: - db = self.get_db() - try: - version = 1 - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(''' - CREATE TABLE markov_chatter_target ( - id SERIAL, - target VARCHAR(256) NOT NULL, - chance INTEGER NOT NULL DEFAULT 99999 - ) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin - ''') - cur.execute(''' - CREATE TABLE markov_context ( - id SERIAL, - context VARCHAR(256) NOT NULL - ) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin - ''') - cur.execute(''' - CREATE TABLE markov_target_to_context_map ( - id SERIAL, - target VARCHAR(256) NOT NULL, - context_id BIGINT(20) UNSIGNED NOT NULL, - FOREIGN KEY(context_id) REFERENCES markov_context(id) - ) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin - ''') - cur.execute(''' - CREATE TABLE markov_chain ( - id SERIAL, - k1 VARCHAR(128) NOT NULL, - k2 VARCHAR(128) NOT NULL, - v VARCHAR(128) NOT NULL, - context_id BIGINT(20) UNSIGNED NOT NULL, - FOREIGN KEY(context_id) REFERENCES markov_context(id) - ) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin - ''') - cur.execute(''' - CREATE INDEX markov_chain_keys_and_context_id_index - ON markov_chain (k1, k2, context_id)''') - - cur.execute(''' - CREATE INDEX markov_chain_value_and_context_id_index - ON markov_chain (v, context_id)''') - - db.commit() - self.db_register_module_version(self.__class__.__name__, - version) - except mdb.Error as e: - db.rollback() - self.log.error("database error trying to create tables") - self.log.exception(e) - raise - finally: cur.close() + # TODO: bring this back somehow + #irc.xmlrpc_register_function(self._generate_line, + # "markov_generate_line") def register_handlers(self): """Handle pubmsg/privmsg, to learn and/or reply to IRC events.""" @@ -171,7 +110,9 @@ class Markov(Module): if self.learnre.search(what) or self.replyre.search(what): return - self._learn_line(what, target, event) + if not event._recursing: + context = _get_or_create_target_context(target) + _learn_line(what, context) def do(self, connection, event, nick, userhost, what, admin_unlocked): """Handle commands and inputs.""" @@ -188,18 +129,25 @@ class Markov(Module): if not self.shut_up: # not a command, so see if i'm being mentioned if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None: + context = _get_or_create_target_context(target) + addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)' addressed_re = re.compile(addressed_pattern) if addressed_re.match(what): # i was addressed directly, so respond, addressing # the speaker + topics = [x for x in addressed_re.match(what).group(1).split(' ') if len(x) >= 3] + self.lines_seen.append(('.self.said.', datetime.now())) return self.irc.reply(event, '{0:s}: {1:s}'.format(nick, - self._generate_line(target, line=addressed_re.match(what).group(1)))) + ' '.join(_generate_sentence(context, topics=topics)))) else: # i wasn't addressed directly, so just respond + topics = [x for x in what.split(' ') if len(x) >= 3] self.lines_seen.append(('.self.said.', datetime.now())) - return self.irc.reply(event, '{0:s}'.format(self._generate_line(target, line=what))) + + return self.irc.reply(event, '{0:s}'.format(' '.join(_generate_sentence(context, + topics=topics)))) def markov_learn(self, event, nick, userhost, what, admin_unlocked): """Learn one line, as provided to the command.""" @@ -212,7 +160,8 @@ class Markov(Module): match = self.learnre.search(what) if match: line = match.group(1) - self._learn_line(line, target, event) + context = _get_or_create_target_context(target) + _learn_line(line, context) # return what was learned, for weird chaining purposes return line @@ -229,6 +178,7 @@ class Markov(Module): if match: min_size = 15 max_size = 30 + context = _get_or_create_target_context(target) if match.group(2): min_size = int(match.group(2)) @@ -237,11 +187,13 @@ class Markov(Module): if match.group(5) != '': line = match.group(6) + topics = [x for x in line.split(' ') if len(x) >= 3] + self.lines_seen.append(('.self.said.', datetime.now())) - return self._generate_line(target, line=line, min_size=min_size, max_size=max_size) + return ' '.join(_generate_sentence(context, topics=topics, min_words=min_size, max_words=max_size)) else: self.lines_seen.append(('.self.said.', datetime.now())) - return self._generate_line(target, min_size=min_size, max_size=max_size) + return ' '.join(_generate_sentence(context, min_words=min_size, max_words=max_size)) def thread_do(self): """Do various things.""" @@ -254,20 +206,8 @@ class Markov(Module): def _do_random_chatter_check(self): """Randomly say something to a channel.""" - # don't immediately potentially chatter, let the bot - # join channels first - if self.next_chatter_check == 0: - self.next_chatter_check = time.time() + 600 - - if self.next_chatter_check < time.time(): - self.next_chatter_check = time.time() + 600 - - targets = self._get_chatter_targets() - for t in targets: - if t['chance'] > 0: - a = random.randint(1, t['chance']) - if a == 1: - self.sendmsg(t['target'], self._generate_line(t['target'])) + # TODO: make this do stuff again + return def _do_shut_up_checks(self): """Check to see if we've been talking too much, and shut up if so.""" @@ -293,426 +233,34 @@ class Markov(Module): self.sendmsg(t['target'], 'shutting up for 30 seconds due to last 30 seconds of activity') - def _learn_line(self, line, target, event): - """Create Markov chains from the provided line.""" +def _get_or_create_target_context(target_name): + """Return the context for a provided nick/channel, creating missing ones.""" - # set up the head of the chain - k1 = self.start1 - k2 = self.start2 + # find the stuff, or create it + try: + target = MarkovTarget.objects.get(name=target_name) + return target.context + except MarkovContext.DoesNotExist: + # make a context + context = MarkovContext() + context.name = target_name + context.save() - context_id = self._get_context_id_for_target(target) + target.context = context + target.save() - # don't learn recursion - if not event._recursing: - words = line.split() - if len(words) == 0: - return line + return target.context + except MarkovTarget.DoesNotExist: + # first we need to make a context for this + context = MarkovContext() + context.name = target_name + context.save() - db = self.get_db() - try: - cur = db.cursor(mdb.cursors.DictCursor) - statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (%s, %s, %s, %s)' - for word in words: - cur.execute(statement, (k1, k2, word, context_id)) - k1, k2 = k2, word - cur.execute(statement, (k1, k2, self.stop, context_id)) + target = MarkovTarget() + target.name = target_name + target.context = context + target.save() - db.commit() - except mdb.Error as e: - db.rollback() - self.log.error("database error learning line") - self.log.exception(e) - raise - finally: cur.close() - - def _generate_line(self, target, line='', min_size=15, max_size=30): - """Create a line, optionally using some text in a seed as a point in - the chain. - - Keyword arguments: - target - the target to retrieve the context for (i.e. a channel or nick) - line - the line to reply to, by picking a random word and seeding with it - min_size - the minimum desired size in words. not guaranteed - max_size - the maximum desired size in words. not guaranteed - - """ - - # if the limit is too low, there's nothing to do - if (max_size <= 3): - raise Exception("max_size is too small: %d" % max_size) - - # if the min is too large, abort - if (min_size > 20): - raise Exception("min_size is too large: %d" % min_size) - - seed_words = [] - # shuffle the words in the input - seed_words = line.split() - random.shuffle(seed_words) - self.log.debug("seed words: {0:s}".format(seed_words)) - - # hit to generate a new seed word immediately if possible - seed_word = None - hit_word = None - - context_id = self._get_context_id_for_target(target) - - # start with an empty chain, and work from there - gen_words = [self.start1, self.start2] - - # build a response by creating multiple sentences - while len(gen_words) < max_size + 2: - # if we're past the min and on a stop, we can end - if len(gen_words) > min_size + 2: - if gen_words[-1] == self.stop: - break - - # pick a word from the shuffled seed words, if we need a new one - if seed_word == hit_word: - if len(seed_words) > 0: - seed_word = seed_words.pop() - self.log.debug("picked new seed word: " - "{0:s}".format(seed_word)) - else: - seed_word = None - self.log.debug("ran out of seed words") - - # if we have a stop, the word before it might need to be - # made to look like a sentence end - if gen_words[-1] == self.stop: - # chop off the stop, temporarily - gen_words = gen_words[:-1] - - # we should have a real word, make it look like a - # sentence end - sentence_end = gen_words[-1] - eos_punctuation = ['!', '?', ',', '.'] - if sentence_end[-1] not in eos_punctuation: - random.shuffle(eos_punctuation) - gen_words[-1] = sentence_end + eos_punctuation.pop() - self.log.debug("monkeyed with end of sentence, it's " - "now: {0:s}".format(gen_words[-1])) - - # put the stop back on - gen_words.append(self.stop) - self.log.debug("gen_words: {0:s}".format(" ".join(gen_words))) - - # first, see if we should start a new sentence. if so, - # work backwards - if gen_words[-1] in (self.start2, self.stop) and seed_word is not None and 0 == 1: - # drop a stop, since we're starting another sentence - if gen_words[-1] == self.stop: - gen_words = gen_words[:-1] - - # work backwards from seed_word - working_backwards = [] - back_k2 = self._retrieve_random_k2_for_value(seed_word, context_id) - if back_k2: - found_word = seed_word - if back_k2 == self.start2: - self.log.debug("random further back was start2, swallowing") - else: - working_backwards.append(back_k2) - working_backwards.append(found_word) - self.log.debug("started working backwards with: {0:s}".format(found_word)) - self.log.debug("working_backwards: {0:s}".format(" ".join(working_backwards))) - - # now work backwards until we randomly bump into a start - # to steer the chainer away from spending too much time on - # the weaker-context reverse chaining, we make max_size - # a non-linear distribution, making it more likely that - # some time is spent on better forward chains - max_back = min(random.randint(1, max_size/2) + random.randint(1, max_size/2), - max_size/4) - self.log.debug("max_back: {0:d}".format(max_back)) - while len(working_backwards) < max_back: - back_k2 = self._retrieve_random_k2_for_value(working_backwards[0], context_id) - if back_k2 == self.start2: - self.log.debug("random further back was start2, finishing") - break - elif back_k2: - working_backwards.insert(0, back_k2) - self.log.debug("added '{0:s}' to working_backwards".format(back_k2)) - self.log.debug("working_backwards: {0:s}".format(" ".join(working_backwards))) - else: - self.log.debug("nothing (at all!?) further back, finishing") - break - - gen_words += working_backwards - self.log.debug("gen_words: {0:s}".format(" ".join(gen_words))) - hit_word = gen_words[-1] - else: - # we are working forward, with either: - # * a pair of words (normal path, filling out a sentence) - # * start1, start2 (completely new chain, no seed words) - # * stop (new sentence in existing chain, no seed words) - self.log.debug("working forwards") - forw_v = None - if gen_words[-1] in (self.start2, self.stop): - # case 2 or 3 above, need to work forward on a beginning - # of a sentence (this is slow) - if gen_words[-1] == self.stop: - # remove the stop if it's there - gen_words = gen_words[:-1] - - new_sentence = self._create_chain_with_k1_k2(self.start1, - self.start2, - 3, context_id, - avoid_address=True) - - if len(new_sentence) > 0: - self.log.debug("started new sentence " - "'{0:s}'".format(" ".join(new_sentence))) - gen_words += new_sentence - self.log.debug("gen_words: {0:s}".format(" ".join(gen_words))) - else: - # this is a problem. we started a sentence on - # start1,start2, and still didn't find anything. to - # avoid endlessly looping we need to abort here - break - else: - if seed_word: - self.log.debug("preferring: '{0:s}'".format(seed_word)) - forw_v = self._retrieve_random_v_for_k1_and_k2_with_pref(gen_words[-2], - gen_words[-1], - seed_word, - context_id) - else: - forw_v = self._retrieve_random_v_for_k1_and_k2(gen_words[-2], - gen_words[-1], - context_id) - - if forw_v: - gen_words.append(forw_v) - self.log.debug("added random word '{0:s}' to gen_words".format(forw_v)) - self.log.debug("gen_words: {0:s}".format(" ".join(gen_words))) - hit_word = gen_words[-1] - else: - # append stop. this is an end to a sentence (since - # we had non-start words to begin with) - gen_words.append(self.stop) - self.log.debug("nothing found, added stop") - self.log.debug("gen_words: {0:s}".format(" ".join(gen_words))) - - # chop off the seed data at the start - gen_words = gen_words[2:] - - if len(gen_words): - # chop off the end text, if it was the keyword indicating an end of chain - if gen_words[-1] == self.stop: - gen_words = gen_words[:-1] - else: - self.log.warning("after all this we have an empty list of words. " - "there probably isn't any data for this context") - - return ' '.join(gen_words) - - def _retrieve_random_v_for_k1_and_k2(self, k1, k2, context_id): - """Get one v for a given k1,k2.""" - - self.log.debug("searching with '{0:s}','{1:s}'".format(k1, k2)) - values = [] - db = self.get_db() - try: - query = ''' - SELECT v FROM markov_chain AS r1 - JOIN ( - SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id - ) AS r2 - WHERE r1.k1 = %s - AND r1.k2 = %s - AND r1.context_id = %s - ORDER BY r1.id >= r2.id DESC, r1.id ASC - LIMIT 1 - ''' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(query, (k1, k2, context_id)) - result = cur.fetchone() - if result: - self.log.debug("found '{0:s}'".format(result['v'])) - return result['v'] - except mdb.Error as e: - self.log.error("database error in _retrieve_random_v_for_k1_and_k2") - self.log.exception(e) - raise - finally: cur.close() - - def _retrieve_random_v_for_k1_and_k2_with_pref(self, k1, k2, prefer, context_id): - """Get one v for a given k1,k2. - - Prefer that the result be prefer, if it's found. - - """ - - self.log.debug("searching with '{0:s}','{1:s}', prefer " - "'{2:s}'".format(k1, k2, prefer)) - values = [] - db = self.get_db() - try: - query = ''' - SELECT v FROM markov_chain AS r1 - JOIN ( - SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id - ) AS r2 - WHERE r1.k1 = %s - AND r1.k2 = %s - AND r1.context_id = %s - ORDER BY r1.id >= r2.id DESC, r1.v = %s DESC, r1.id ASC - LIMIT 1 - ''' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(query, (k1, k2, context_id, prefer)) - result = cur.fetchone() - if result: - self.log.debug("found '{0:s}'".format(result['v'])) - return result['v'] - except mdb.Error as e: - self.log.error("database error in _retrieve_random_v_for_k1_and_k2_with_pref") - self.log.exception(e) - raise - finally: cur.close() - - def _retrieve_random_k2_for_value(self, v, context_id): - """Get one k2 for a given value.""" - - values = [] - db = self.get_db() - try: - query = ''' - SELECT k2 FROM markov_chain AS r1 - JOIN ( - SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id - ) AS r2 - WHERE r1.v = %s - AND r1.context_id = %s - ORDER BY r1.id >= r2.id DESC, r1.id ASC - LIMIT 1 - ''' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(query, (v, context_id)) - result = cur.fetchone() - if result: - return result['k2'] - except mdb.Error as e: - self.log.error("database error in _retrieve_random_k2_for_value") - self.log.exception(e) - raise - finally: cur.close() - - def _create_chain_with_k1_k2(self, k1, k2, length, context_id, - avoid_address=False): - """Create a chain of the given length, using k1,k2. - - k1,k2 does not appear in the resulting chain. - - """ - - chain = [k1, k2] - self.log.debug("creating chain for {0:s},{1:s}".format(k1, k2)) - - for _ in range(length): - v = self._retrieve_random_v_for_k1_and_k2(chain[-2], - chain[-1], - context_id) - if v: - chain.append(v) - - # check for addresses (the "whoever:" in - # __start1 __start2 whoever: some words) - addressing_suffixes = [':', ','] - if len(chain) > 2 and chain[2][-1] in addressing_suffixes and avoid_address: - return chain[3:] - elif len(chain) > 2: - return chain[2:] - else: - return [] - - def _get_chatter_targets(self): - """Get all possible chatter targets.""" - - db = self.get_db() - try: - # need to create our own db object, since this is likely going to be in a new thread - query = 'SELECT target, chance FROM markov_chatter_target' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(query) - results = cur.fetchall() - return results - except mdb.Error as e: - self.log.error("database error in _get_chatter_targets") - self.log.exception(e) - raise - finally: cur.close() - - def _get_context_id_for_target(self, target): - """Get the context ID for the desired/input target.""" - - db = self.get_db() - try: - query = ''' - SELECT mc.id FROM markov_context mc - INNER JOIN markov_target_to_context_map mt - ON mt.context_id = mc.id - WHERE mt.target = %s - ''' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(query, (target,)) - result = cur.fetchone() - db.close() - if result: - return result['id'] - else: - # auto-generate a context to keep things private - self._add_context_for_target(target) - return self._get_context_id_for_target(target) - except mdb.Error as e: - self.log.error("database error in _get_context_id_for_target") - self.log.exception(e) - raise - finally: cur.close() - - def _add_context_for_target(self, target): - """Create a new context for the desired/input target.""" - - db = self.get_db() - try: - statement = 'INSERT INTO markov_context (context) VALUES (%s)' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(statement, (target,)) - statement = ''' - INSERT INTO markov_target_to_context_map (target, context_id) - VALUES (%s, (SELECT id FROM markov_context WHERE context = %s)) - ''' - cur.execute(statement, (target, target)) - db.commit() - except mdb.Error as e: - db.rollback() - self.log.error("database error in _add_context_for_target") - self.log.exception(e) - raise - finally: cur.close() - - try: - query = ''' - SELECT mc.id FROM markov_context mc - INNER JOIN markov_target_to_context_map mt - ON mt.context_id = mc.id - WHERE mt.target = %s - ''' - cur = db.cursor(mdb.cursors.DictCursor) - cur.execute(query, (target,)) - result = cur.fetchone() - if result: - return result['id'] - else: - # auto-generate a context to keep things private - self._add_context_for_target(target) - return self._get_context_id_for_target(target) - except mdb.Error as e: - self.log.error("database error in _get_context_id_for_target") - self.log.exception(e) - raise - finally: cur.close() + return target.context # vi:tabstop=4:expandtab:autoindent -# kate: indent-mode python;indent-width 4;replace-tabs on; From 97070d495f166ed6f5037bfa9a90f6c62bfe0092 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 16:18:48 -0500 Subject: [PATCH 12/13] Markov: assuming file uploads are utf8 --- markov/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markov/views.py b/markov/views.py index 320e112..baa586b 100644 --- a/markov/views.py +++ b/markov/views.py @@ -55,7 +55,7 @@ def import_file(request): whos = [] for line in log_file: - (timestamp, who, what) = line.split('\t') + (timestamp, who, what) = line.decode('utf-8').split('\t') if who in ('-->', '<--', '--', ' *'): continue From 54efe617d0c4ea8846c709fa6ea2ba83b4677637 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 16:39:44 -0500 Subject: [PATCH 13/13] Markov: only split two \ts on file import --- markov/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markov/views.py b/markov/views.py index baa586b..09b4413 100644 --- a/markov/views.py +++ b/markov/views.py @@ -55,7 +55,7 @@ def import_file(request): whos = [] for line in log_file: - (timestamp, who, what) = line.decode('utf-8').split('\t') + (timestamp, who, what) = line.decode('utf-8').split('\t', 2) if who in ('-->', '<--', '--', ' *'): continue