collapsing all of dr_botzo one directory

This commit is contained in:
2017-02-04 11:48:55 -06:00
parent 38d14bb0d2
commit cd23f062a9
194 changed files with 0 additions and 0 deletions

0
markov/__init__.py Normal file
View File

112
markov/admin.py Normal file
View File

@@ -0,0 +1,112 @@
"""Manage Markov models and administrative commands."""
import logging
from django.contrib import admin
from django.contrib.auth.decorators import permission_required
from django.shortcuts import render
from markov.forms import LogUploadForm, TeachLineForm
import markov.lib as markovlib
from markov.models import MarkovContext, MarkovTarget, MarkovState
log = logging.getLogger('markov.admin')
admin.site.register(MarkovContext)
admin.site.register(MarkovTarget)
admin.site.register(MarkovState)
@permission_required('import_text_file', raise_exception=True)
def import_file(request):
"""Accept a file upload and turn it into markov stuff.
Current file formats supported:
* weechat
"""
if request.method == 'POST':
form = LogUploadForm(request.POST, request.FILES)
if form.is_valid():
if form.cleaned_data['text_file_format'] == LogUploadForm.FILE_FORMAT_WEECHAT:
text_file = request.FILES['text_file']
context = form.cleaned_data['context']
ignores = form.cleaned_data['ignore_nicks'].split(',')
strips = form.cleaned_data['strip_prefixes'].split(' ')
whos = []
for line in text_file:
log.debug(line)
(timestamp, who, what) = line.decode('utf-8').split('\t', 2)
if who in ('-->', '<--', '--', ' *'):
continue
if who in ignores:
continue
whos.append(who)
# this is a line we probably care about now
what = [x for x in what.rstrip().split(' ') if x not in strips]
markovlib.learn_line(' '.join(what), context)
log.debug("learned")
log.debug(set(whos))
form = LogUploadForm()
elif form.cleaned_data['text_file_format'] == LogUploadForm.FILE_FORMAT_RAW_TEXT:
text_file = request.FILES['text_file']
context = form.cleaned_data['context']
k1 = MarkovState._start1
k2 = MarkovState._start2
for line in text_file:
for word in [x for x in line.decode('utf-8') .rstrip().split(' ')]:
log.info(word)
if word:
state, created = MarkovState.objects.get_or_create(context=context, k1=k1,
k2=k2, v=word)
state.count += 1
state.save()
if word[-1] in ['.', '?', '!']:
state, created = MarkovState.objects.get_or_create(context=context, k1=k2,
k2=word, v=MarkovState._stop)
state.count += 1
state.save()
k1 = MarkovState._start1
k2 = MarkovState._start2
else:
k1 = k2
k2 = word
else:
form = LogUploadForm()
return render(request, 'markov/import_file.html', {'form': form})
@permission_required('teach_line', raise_exception=True)
def teach_line(request):
"""Teach one line directly."""
if request.method == 'POST':
form = TeachLineForm(request.POST)
if form.is_valid():
line = form.cleaned_data['line']
context = form.cleaned_data['context']
strips = form.cleaned_data['strip_prefixes'].split(' ')
what = [x for x in line.rstrip().split(' ') if x not in strips]
markovlib.learn_line(' '.join(what), context)
form = TeachLineForm()
else:
form = TeachLineForm()
return render(request, 'markov/teach_line.html', {'form': form})
admin.site.register_view('markov/importfile/', "Markov - Import log file", view=import_file,
urlname='markov_import_file')
admin.site.register_view('markov/teach/', "Markov - Teach line", view=teach_line, urlname='markov_teach_line')

40
markov/forms.py Normal file
View File

@@ -0,0 +1,40 @@
"""Forms for manipulating markov data."""
import logging
from django.forms import Form, CharField, ChoiceField, FileField, ModelChoiceField
from markov.models import MarkovContext
log = logging.getLogger('markov.forms')
class LogUploadForm(Form):
"""Accept a file upload that will be imported into Markov stuff."""
FILE_FORMAT_WEECHAT = 'WEECHAT'
FILE_FORMAT_RAW_TEXT = 'RAW'
FILE_FORMAT_CHOICES = (
(FILE_FORMAT_WEECHAT, "Weechat"),
(FILE_FORMAT_RAW_TEXT, "Raw text file"),
)
text_file = FileField()
text_file_format = ChoiceField(choices=FILE_FORMAT_CHOICES)
context = ModelChoiceField(queryset=MarkovContext.objects.all())
ignore_nicks = CharField(help_text="Comma-separated list of nicks to ignore. For Weechat logs.",
required=False)
strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip. For Weechat logs.",
required=False)
class TeachLineForm(Form):
"""Accept a line that will be imported into Markov stuff."""
context = ModelChoiceField(queryset=MarkovContext.objects.all())
line = CharField()
strip_prefixes = CharField(help_text="Space-separated list of line prefixes to strip.",
required=False)

108
markov/ircplugin.py Normal file
View File

@@ -0,0 +1,108 @@
import logging
import re
import irc.client
from ircbot.lib import Plugin, reply_destination_for_event
from ircbot.models import IrcChannel
import markov.lib as markovlib
log = logging.getLogger('markov.ircplugin')
class Markov(Plugin):
"""Build Markov chains and reply with them."""
def start(self):
"""Set up the handlers."""
self.connection.add_global_handler('pubmsg', self.handle_chatter, -20)
self.connection.add_global_handler('privmsg', self.handle_chatter, -20)
self.connection.reactor.add_global_regex_handler(['pubmsg', 'privmsg'],
r'^!markov\s+reply(\s+min=(\d+))?(\s+max=(\d+))?(\s+(.*)$|$)',
self.handle_reply, -20)
super(Markov, self).start()
def stop(self):
"""Tear down handlers."""
self.connection.remove_global_handler('pubmsg', self.handle_chatter)
self.connection.remove_global_handler('privmsg', self.handle_chatter)
self.connection.reactor.remove_global_regex_handler(['pubmsg', 'privmsg'], self.handle_reply)
super(Markov, self).stop()
def handle_reply(self, connection, event, match):
"""Generate a reply to one line, without learning it."""
target = reply_destination_for_event(event)
min_size = 15
max_size = 30
context = markovlib.get_or_create_target_context(target)
if match.group(2):
min_size = int(match.group(2))
if match.group(4):
max_size = int(match.group(4))
if match.group(5) != '':
line = match.group(6)
topics = [x for x in line.split(' ') if len(x) >= 3]
return self.bot.reply(event, " ".join(markovlib.generate_line(context, topics=topics,
min_words=min_size, max_words=max_size)))
else:
return self.bot.reply(event, " ".join(markovlib.generate_line(context, min_words=min_size,
max_words=max_size)))
def handle_chatter(self, connection, event):
"""Learn from IRC chatter."""
what = event.arguments[0]
my_nick = connection.get_nickname()
trimmed_what = re.sub(r'^{0:s}[:,]\s+'.format(my_nick), '', what)
nick = irc.client.NickMask(event.source).nick
target = reply_destination_for_event(event)
# check to see whether or not we should learn from this channel
channel = None
if irc.client.is_channel(target):
channel, c = IrcChannel.objects.get_or_create(name=target)
if channel and not channel.markov_learn_from_channel:
log.debug("not learning from %s as i've been told to ignore it", channel)
else:
# learn the line
recursing = getattr(event, '_recursing', False)
if not recursing:
log.debug("learning %s", trimmed_what)
context = markovlib.get_or_create_target_context(target)
markovlib.learn_line(trimmed_what, context)
log.debug("searching '%s' for '%s'", what, my_nick)
if re.search(my_nick, what, re.IGNORECASE) is not None:
context = markovlib.get_or_create_target_context(target)
addressed_pattern = r'^{0:s}[:,]\s+(.*)'.format(my_nick)
addressed_re = re.compile(addressed_pattern)
if addressed_re.match(what):
# i was addressed directly, so respond, addressing
# the speaker
topics = [x for x in addressed_re.match(what).group(1).split(' ') if len(x) >= 3]
return self.bot.reply(event, "{0:s}: {1:s}"
"".format(nick, " ".join(markovlib.generate_line(context, topics=topics))))
else:
# i wasn't addressed directly, so just respond
topics = [x for x in what.split(' ') if len(x) >= 3]
return self.bot.reply(event, "{0:s}"
"".format(" ".join(markovlib.generate_line(context, topics=topics))))
plugin = Markov

208
markov/lib.py Normal file
View File

@@ -0,0 +1,208 @@
import logging
import random
from django.db.models import Sum
from markov.models import MarkovContext, MarkovState, MarkovTarget
log = logging.getLogger('markov.lib')
def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bias=2, max_tries=5):
"""String multiple sentences together into a coherent sentence."""
tries = 0
line = []
min_words_per_sentence = min_words / sentence_bias
while tries < max_tries:
line += generate_longish_sentence(context, topics=topics, min_words=min_words_per_sentence,
max_words=max_words, max_tries=max_tries)
if len(line) >= min_words:
return line
else:
if len(line) > 0:
if line[-1][-1] not in [',', '.', '!', '?', ':']:
line[-1] += random.choice(['?', '.', '!'])
tries += 1
# if we got here, we need to give up
return line
def generate_longish_sentence(context, topics=None, min_words=15, max_words=30, max_tries=100):
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
sent = ""
tries = 0
while tries < max_tries:
sent = generate_sentence(context, topics=topics, min_words=min_words, max_words=max_words)
if len(sent) >= min_words:
log.debug("found a longish sentence, %s", sent)
return sent
else:
log.debug("%s isn't long enough, going to try again", sent)
tries += 1
# if we got here, we need to just give up
return sent
def generate_sentence(context, topics=None, min_words=15, max_words=30):
"""Generate a Markov chain."""
words = []
# if we have topics, try to work from it and work backwards
if topics:
topic_word = random.choice(topics)
topics.remove(topic_word)
log.debug("looking for topic '{0:s}'".format(topic_word))
new_states = MarkovState.objects.filter(context=context, v=topic_word)
if len(new_states) > 0:
log.debug("found '{0:s}', starting backwards".format(topic_word))
words.insert(0, topic_word)
while len(words) <= max_words and words[0] != MarkovState._start2:
log.debug("looking backwards for '{0:s}'".format(words[0]))
new_states = MarkovState.objects.filter(context=context, v=words[0])
# if we find a start, use it
if MarkovState._start2 in new_states:
log.debug("found a start2 in the results, intentionally picking it")
words.insert(0, MarkovState._start2)
else:
words.insert(0, get_word_out_of_states(new_states, backwards=True))
log.debug("picked %s", words[0])
# if what we found is too long, abandon it, sadly
if len(words) > max_words:
log.debug("%s is too long, i'm going to give up on it", words)
words.clear()
# if we didn't get topic stuff, we need to start (forwards) here, otherwise we use
# what we already put together (obviously)
if len(words) == 0:
words = [MarkovState._start1, MarkovState._start2]
i = len(words)
while words[-1] != MarkovState._stop:
log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
log.debug("states retrieved")
# try to find states that are in our targets
if topics and len(topics):
target_hits = list(set(words).intersection(set(topics)))
else:
target_hits = []
if len(words) > min_words and MarkovState._stop in new_states:
# if we're over min_words, and got a stop naturally, use it
log.debug("found a stop in the results, intentionally picking it")
words.append(MarkovState._stop)
elif len(target_hits) > 0:
# if there's a target word in the states, pick it
target_hit = random.choice(target_hits)
log.debug("found a topic hit %s, using it", target_hit)
topics.remove(target_hit)
words.append(target_hit)
elif len(words) <= min_words:
# if we still need more words, intentionally avoid stop
words.append(get_word_out_of_states(new_states.exclude(v=MarkovState._stop)))
log.debug("picked (stop avoidance) %s", words[-1])
else:
words.append(get_word_out_of_states(new_states))
log.debug("picked %s", words[-1])
i += 1
words = [word for word in words if word not in
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
# if what we found is too long, abandon it, sadly
if len(words) > max_words:
log.debug("%s is too long, i'm going to give up on it", words)
words.clear()
return words
def get_or_create_target_context(target_name):
"""Return the context for a provided nick/channel, creating missing ones."""
target_name = target_name.lower()
# find the stuff, or create it
try:
target = MarkovTarget.objects.get(name=target_name)
except MarkovTarget.DoesNotExist:
# we need to create a context and a target, and we have to make the context first
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target, c = MarkovTarget.objects.get_or_create(name=target_name, context=context)
return target.context
try:
return target.context
except MarkovContext.DoesNotExist:
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target.context = context
target.save()
return target.context
def get_word_out_of_states(states, backwards=False):
"""Pick one random word out of the given states."""
# work around possible broken data, where a k1,k2 should have a value but doesn't
if len(states) == 0:
states = MarkovState.objects.filter(v=MarkovState._stop)
new_word = ''
running = 0
count_sum = states.aggregate(Sum('count'))['count__sum']
hit = random.randint(0, count_sum)
log.debug("sum: {0:d} hit: {1:d}".format(count_sum, hit))
states_itr = states.iterator()
for state in states_itr:
running += state.count
if running >= hit:
if backwards:
new_word = state.k2
else:
new_word = state.v
break
log.debug("found '{0:s}'".format(new_word))
return new_word
def learn_line(line, context):
"""Create a bunch of MarkovStates for a given line of text."""
log.debug("learning %s...", line[:40])
words = line.split()
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
for word in words:
if len(word) > MarkovState._meta.get_field('k1').max_length:
return
for i, word in enumerate(words):
log.debug("'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
state, created = MarkovState.objects.get_or_create(context=context,
k1=words[i],
k2=words[i+1],
v=words[i+2])
state.count += 1
state.save()
if i > len(words) - 4:
break

View File

@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
]
operations = [
migrations.CreateModel(
name='MarkovContext',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('name', models.CharField(unique=True, max_length=64)),
],
options={
},
bases=(models.Model,),
),
migrations.CreateModel(
name='MarkovState',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('k1', models.CharField(max_length=128)),
('k2', models.CharField(max_length=128)),
('v', models.CharField(max_length=128)),
('count', models.IntegerField(default=0)),
('context', models.ForeignKey(to='markov.MarkovContext')),
],
options={
'permissions': set([('teach_line', 'Can teach lines'), ('import_log_file', 'Can import states from a log file')]),
},
bases=(models.Model,),
),
migrations.CreateModel(
name='MarkovTarget',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('name', models.CharField(unique=True, max_length=64)),
('chatter_chance', models.IntegerField(default=0)),
('context', models.ForeignKey(to='markov.MarkovContext')),
],
options={
},
bases=(models.Model,),
),
migrations.AlterUniqueTogether(
name='markovstate',
unique_together=set([('context', 'k1', 'k2', 'v')]),
),
migrations.AlterIndexTogether(
name='markovstate',
index_together=set([('context', 'k1', 'k2'), ('context', 'v')]),
),
]

View File

@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('markov', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='markovcontext',
name='name',
field=models.CharField(unique=True, max_length=200),
),
migrations.AlterField(
model_name='markovtarget',
name='name',
field=models.CharField(unique=True, max_length=200),
),
]

View File

@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('markov', '0002_auto_20150514_2317'),
]
operations = [
migrations.AlterModelOptions(
name='markovstate',
options={'permissions': set([('import_text_file', 'Can import states from a text file'), ('teach_line', 'Can teach lines')])},
),
]

View File

70
markov/models.py Normal file
View File

@@ -0,0 +1,70 @@
"""
markov/models.py --- save brain pieces for chaining
"""
import logging
from django.db import models
log = logging.getLogger('markov.models')
class MarkovContext(models.Model):
"""Define contexts for Markov chains."""
name = models.CharField(max_length=200, unique=True)
def __str__(self):
"""String representation."""
return "{0:s}".format(self.name)
class MarkovTarget(models.Model):
"""Define IRC targets that relate to a context, and can occasionally be talked to."""
name = models.CharField(max_length=200, unique=True)
context = models.ForeignKey(MarkovContext)
chatter_chance = models.IntegerField(default=0)
def __str__(self):
"""String representation."""
return "{0:s} -> {1:s}".format(self.name, self.context.name)
class MarkovState(models.Model):
"""One element in a Markov chain, some text or something."""
_start1 = '__start1'
_start2 = '__start2'
_stop = '__stop'
k1 = models.CharField(max_length=128)
k2 = models.CharField(max_length=128)
v = models.CharField(max_length=128)
count = models.IntegerField(default=0)
context = models.ForeignKey(MarkovContext)
class Meta:
index_together = [
['context', 'k1', 'k2'],
['context', 'v'],
]
permissions = {
('import_text_file', "Can import states from a text file"),
('teach_line', "Can teach lines"),
}
unique_together = ('context', 'k1', 'k2', 'v')
def __str__(self):
"""String representation."""
return "{0:s},{1:s} -> {2:s} (count: {3:d})".format(self.k1, self.k2, self.v, self.count)

View File

@@ -0,0 +1,8 @@
{% extends 'base.html' %}
{% block title %}context: {{ context.name }}{% endblock %}
{% block content %}
<p>{{ chain }}</p>
<p>in: {{ elapsed }}s</p>
{% endblock %}

View File

@@ -0,0 +1,15 @@
{% extends 'adminplus/index.html' %}
{% block title %}Markov - Import log file{% endblock %}
{% block content %}
<div id="content-main">
<form id="markov_import_file_form" enctype="multipart/form-data" action="{% url 'admin:markov_import_file' %}" method="post">
{% csrf_token %}
<table>
{{ form }}
</table>
<input class="submit-button" type="submit" value="Import"/>
</form>
</div>
{% endblock %}

View File

@@ -0,0 +1,15 @@
{% extends 'adminplus/index.html' %}
{% block title %}Markov - Teach line{% endblock %}
{% block content %}
<div id="content-main">
<form id="markov_teach_line_form" action="{% url 'admin:markov_teach_line' %}" method="post">
{% csrf_token %}
<table>
{{ form }}
</table>
<input class="submit-button" type="submit" value="Teach"/>
</form>
</div>
{% endblock %}

9
markov/urls.py Normal file
View File

@@ -0,0 +1,9 @@
"""URL patterns for markov stuff."""
from django.conf.urls import patterns, url
from django.views.generic import TemplateView
urlpatterns = patterns('markov.views',
url(r'^$', TemplateView.as_view(template_name='index.html'), name='markov_index'),
url(r'^context/(?P<context_id>\d+)/$', 'context_index', name='markov_context_index'),
)

30
markov/views.py Normal file
View File

@@ -0,0 +1,30 @@
"""Manipulate Markov data via the Django site."""
import logging
import time
from django.http import HttpResponse
from django.shortcuts import get_object_or_404, render
import markov.lib as markovlib
from markov.models import MarkovContext
log = logging.getLogger('markov.views')
def index(request):
"""Display nothing, for the moment."""
return HttpResponse()
def context_index(request, context_id):
"""Display the context index for the given context."""
start_t = time.time()
context = get_object_or_404(MarkovContext, pk=context_id)
chain = " ".join(markovlib.generate_line(context))
end_t = time.time()
return render(request, 'markov/context.html', {'chain': chain, 'context': context, 'elapsed': end_t - start_t})