Merge branch 'master' of git.incorporeal.org:dr.botzo
This commit is contained in:
commit
1fc13b011d
|
@ -1,5 +1,6 @@
|
||||||
*.facts
|
*.facts
|
||||||
*.json
|
*.json
|
||||||
|
*.log
|
||||||
*.pyc
|
*.pyc
|
||||||
*.sqlite3
|
*.sqlite3
|
||||||
*.swo
|
*.swo
|
||||||
|
|
|
@ -38,6 +38,7 @@ INSTALLED_APPS = (
|
||||||
'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
'django_extensions',
|
'django_extensions',
|
||||||
'south',
|
'south',
|
||||||
|
'markov',
|
||||||
'races',
|
'races',
|
||||||
'seen',
|
'seen',
|
||||||
)
|
)
|
||||||
|
|
|
@ -6,6 +6,7 @@ admin.autodiscover()
|
||||||
urlpatterns = patterns('',
|
urlpatterns = patterns('',
|
||||||
url(r'^$', 'dr_botzo.views.home', name='home'),
|
url(r'^$', 'dr_botzo.views.home', name='home'),
|
||||||
|
|
||||||
|
url(r'^markov/', include('markov.urls')),
|
||||||
url(r'^races/', include('races.urls')),
|
url(r'^races/', include('races.urls')),
|
||||||
|
|
||||||
url(r'^admin/', include(admin.site.urls)),
|
url(r'^admin/', include(admin.site.urls)),
|
||||||
|
|
|
@ -1,8 +1,15 @@
|
||||||
|
"""
|
||||||
|
dr_botzo/views.py --- various random views
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.http import HttpResponse
|
||||||
from django.shortcuts import render
|
from django.shortcuts import render
|
||||||
|
|
||||||
def home(request):
|
|
||||||
"""Site index, nothing special (or at all)."""
|
|
||||||
|
|
||||||
return render(request, 'index.html', {})
|
def home(request):
|
||||||
|
"""Site index, nothing special (or at all, right now)."""
|
||||||
|
|
||||||
|
return HttpResponse()
|
||||||
|
|
||||||
# vi:tabstop=4:expandtab:autoindent
|
# vi:tabstop=4:expandtab:autoindent
|
||||||
|
|
|
@ -24,7 +24,9 @@ import thread
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
import MySQLdb as mdb
|
|
||||||
|
from markov.models import MarkovContext, MarkovState, MarkovTarget
|
||||||
|
from markov.views import _generate_sentence, _learn_line
|
||||||
|
|
||||||
from extlib import irclib
|
from extlib import irclib
|
||||||
|
|
||||||
|
@ -48,12 +50,6 @@ class Markov(Module):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# set up some keywords for use in the chains --- don't change these
|
|
||||||
# once you've created a brain
|
|
||||||
self.start1 = '__start1'
|
|
||||||
self.start2 = '__start2'
|
|
||||||
self.stop = '__stop'
|
|
||||||
|
|
||||||
# set up regexes, for replying to specific stuff
|
# set up regexes, for replying to specific stuff
|
||||||
learnpattern = '^!markov\s+learn\s+(.*)$'
|
learnpattern = '^!markov\s+learn\s+(.*)$'
|
||||||
replypattern = '^!markov\s+reply(\s+min=(\d+))?(\s+max=(\d+))?(\s+(.*)$|$)'
|
replypattern = '^!markov\s+reply(\s+min=(\d+))?(\s+max=(\d+))?(\s+(.*)$|$)'
|
||||||
|
@ -70,66 +66,9 @@ class Markov(Module):
|
||||||
self.next_chatter_check = 0
|
self.next_chatter_check = 0
|
||||||
thread.start_new_thread(self.thread_do, ())
|
thread.start_new_thread(self.thread_do, ())
|
||||||
|
|
||||||
irc.xmlrpc_register_function(self._generate_line,
|
# TODO: bring this back somehow
|
||||||
"markov_generate_line")
|
#irc.xmlrpc_register_function(self._generate_line,
|
||||||
|
# "markov_generate_line")
|
||||||
def db_init(self):
|
|
||||||
"""Create the markov chain table."""
|
|
||||||
|
|
||||||
version = self.db_module_registered(self.__class__.__name__)
|
|
||||||
if version == None:
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
version = 1
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute('''
|
|
||||||
CREATE TABLE markov_chatter_target (
|
|
||||||
id SERIAL,
|
|
||||||
target VARCHAR(256) NOT NULL,
|
|
||||||
chance INTEGER NOT NULL DEFAULT 99999
|
|
||||||
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
|
||||||
''')
|
|
||||||
cur.execute('''
|
|
||||||
CREATE TABLE markov_context (
|
|
||||||
id SERIAL,
|
|
||||||
context VARCHAR(256) NOT NULL
|
|
||||||
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
|
||||||
''')
|
|
||||||
cur.execute('''
|
|
||||||
CREATE TABLE markov_target_to_context_map (
|
|
||||||
id SERIAL,
|
|
||||||
target VARCHAR(256) NOT NULL,
|
|
||||||
context_id BIGINT(20) UNSIGNED NOT NULL,
|
|
||||||
FOREIGN KEY(context_id) REFERENCES markov_context(id)
|
|
||||||
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
|
||||||
''')
|
|
||||||
cur.execute('''
|
|
||||||
CREATE TABLE markov_chain (
|
|
||||||
id SERIAL,
|
|
||||||
k1 VARCHAR(128) NOT NULL,
|
|
||||||
k2 VARCHAR(128) NOT NULL,
|
|
||||||
v VARCHAR(128) NOT NULL,
|
|
||||||
context_id BIGINT(20) UNSIGNED NOT NULL,
|
|
||||||
FOREIGN KEY(context_id) REFERENCES markov_context(id)
|
|
||||||
) ENGINE=InnoDB CHARACTER SET utf8 COLLATE utf8_bin
|
|
||||||
''')
|
|
||||||
cur.execute('''
|
|
||||||
CREATE INDEX markov_chain_keys_and_context_id_index
|
|
||||||
ON markov_chain (k1, k2, context_id)''')
|
|
||||||
|
|
||||||
cur.execute('''
|
|
||||||
CREATE INDEX markov_chain_value_and_context_id_index
|
|
||||||
ON markov_chain (v, context_id)''')
|
|
||||||
|
|
||||||
db.commit()
|
|
||||||
self.db_register_module_version(self.__class__.__name__,
|
|
||||||
version)
|
|
||||||
except mdb.Error as e:
|
|
||||||
db.rollback()
|
|
||||||
self.log.error("database error trying to create tables")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def register_handlers(self):
|
def register_handlers(self):
|
||||||
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
|
"""Handle pubmsg/privmsg, to learn and/or reply to IRC events."""
|
||||||
|
@ -171,7 +110,9 @@ class Markov(Module):
|
||||||
if self.learnre.search(what) or self.replyre.search(what):
|
if self.learnre.search(what) or self.replyre.search(what):
|
||||||
return
|
return
|
||||||
|
|
||||||
self._learn_line(what, target, event)
|
if not event._recursing:
|
||||||
|
context = _get_or_create_target_context(target)
|
||||||
|
_learn_line(what, context)
|
||||||
|
|
||||||
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
||||||
"""Handle commands and inputs."""
|
"""Handle commands and inputs."""
|
||||||
|
@ -188,18 +129,25 @@ class Markov(Module):
|
||||||
if not self.shut_up:
|
if not self.shut_up:
|
||||||
# not a command, so see if i'm being mentioned
|
# not a command, so see if i'm being mentioned
|
||||||
if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None:
|
if re.search(connection.get_nickname(), what, re.IGNORECASE) is not None:
|
||||||
|
context = _get_or_create_target_context(target)
|
||||||
|
|
||||||
addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)'
|
addressed_pattern = '^' + connection.get_nickname() + '[:,]\s+(.*)'
|
||||||
addressed_re = re.compile(addressed_pattern)
|
addressed_re = re.compile(addressed_pattern)
|
||||||
if addressed_re.match(what):
|
if addressed_re.match(what):
|
||||||
# i was addressed directly, so respond, addressing
|
# i was addressed directly, so respond, addressing
|
||||||
# the speaker
|
# the speaker
|
||||||
|
topics = [x for x in addressed_re.match(what).group(1).split(' ') if len(x) >= 3]
|
||||||
|
|
||||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||||
return self.irc.reply(event, '{0:s}: {1:s}'.format(nick,
|
return self.irc.reply(event, '{0:s}: {1:s}'.format(nick,
|
||||||
self._generate_line(target, line=addressed_re.match(what).group(1))))
|
' '.join(_generate_sentence(context, topics=topics))))
|
||||||
else:
|
else:
|
||||||
# i wasn't addressed directly, so just respond
|
# i wasn't addressed directly, so just respond
|
||||||
|
topics = [x for x in what.split(' ') if len(x) >= 3]
|
||||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||||
return self.irc.reply(event, '{0:s}'.format(self._generate_line(target, line=what)))
|
|
||||||
|
return self.irc.reply(event, '{0:s}'.format(' '.join(_generate_sentence(context,
|
||||||
|
topics=topics))))
|
||||||
|
|
||||||
def markov_learn(self, event, nick, userhost, what, admin_unlocked):
|
def markov_learn(self, event, nick, userhost, what, admin_unlocked):
|
||||||
"""Learn one line, as provided to the command."""
|
"""Learn one line, as provided to the command."""
|
||||||
|
@ -212,7 +160,8 @@ class Markov(Module):
|
||||||
match = self.learnre.search(what)
|
match = self.learnre.search(what)
|
||||||
if match:
|
if match:
|
||||||
line = match.group(1)
|
line = match.group(1)
|
||||||
self._learn_line(line, target, event)
|
context = _get_or_create_target_context(target)
|
||||||
|
_learn_line(line, context)
|
||||||
|
|
||||||
# return what was learned, for weird chaining purposes
|
# return what was learned, for weird chaining purposes
|
||||||
return line
|
return line
|
||||||
|
@ -229,6 +178,7 @@ class Markov(Module):
|
||||||
if match:
|
if match:
|
||||||
min_size = 15
|
min_size = 15
|
||||||
max_size = 30
|
max_size = 30
|
||||||
|
context = _get_or_create_target_context(target)
|
||||||
|
|
||||||
if match.group(2):
|
if match.group(2):
|
||||||
min_size = int(match.group(2))
|
min_size = int(match.group(2))
|
||||||
|
@ -237,11 +187,13 @@ class Markov(Module):
|
||||||
|
|
||||||
if match.group(5) != '':
|
if match.group(5) != '':
|
||||||
line = match.group(6)
|
line = match.group(6)
|
||||||
|
topics = [x for x in line.split(' ') if len(x) >= 3]
|
||||||
|
|
||||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||||
return self._generate_line(target, line=line, min_size=min_size, max_size=max_size)
|
return ' '.join(_generate_sentence(context, topics=topics, min_words=min_size, max_words=max_size))
|
||||||
else:
|
else:
|
||||||
self.lines_seen.append(('.self.said.', datetime.now()))
|
self.lines_seen.append(('.self.said.', datetime.now()))
|
||||||
return self._generate_line(target, min_size=min_size, max_size=max_size)
|
return ' '.join(_generate_sentence(context, min_words=min_size, max_words=max_size))
|
||||||
|
|
||||||
def thread_do(self):
|
def thread_do(self):
|
||||||
"""Do various things."""
|
"""Do various things."""
|
||||||
|
@ -254,20 +206,8 @@ class Markov(Module):
|
||||||
def _do_random_chatter_check(self):
|
def _do_random_chatter_check(self):
|
||||||
"""Randomly say something to a channel."""
|
"""Randomly say something to a channel."""
|
||||||
|
|
||||||
# don't immediately potentially chatter, let the bot
|
# TODO: make this do stuff again
|
||||||
# join channels first
|
return
|
||||||
if self.next_chatter_check == 0:
|
|
||||||
self.next_chatter_check = time.time() + 600
|
|
||||||
|
|
||||||
if self.next_chatter_check < time.time():
|
|
||||||
self.next_chatter_check = time.time() + 600
|
|
||||||
|
|
||||||
targets = self._get_chatter_targets()
|
|
||||||
for t in targets:
|
|
||||||
if t['chance'] > 0:
|
|
||||||
a = random.randint(1, t['chance'])
|
|
||||||
if a == 1:
|
|
||||||
self.sendmsg(t['target'], self._generate_line(t['target']))
|
|
||||||
|
|
||||||
def _do_shut_up_checks(self):
|
def _do_shut_up_checks(self):
|
||||||
"""Check to see if we've been talking too much, and shut up if so."""
|
"""Check to see if we've been talking too much, and shut up if so."""
|
||||||
|
@ -293,426 +233,34 @@ class Markov(Module):
|
||||||
self.sendmsg(t['target'],
|
self.sendmsg(t['target'],
|
||||||
'shutting up for 30 seconds due to last 30 seconds of activity')
|
'shutting up for 30 seconds due to last 30 seconds of activity')
|
||||||
|
|
||||||
def _learn_line(self, line, target, event):
|
def _get_or_create_target_context(target_name):
|
||||||
"""Create Markov chains from the provided line."""
|
"""Return the context for a provided nick/channel, creating missing ones."""
|
||||||
|
|
||||||
# set up the head of the chain
|
# find the stuff, or create it
|
||||||
k1 = self.start1
|
try:
|
||||||
k2 = self.start2
|
target = MarkovTarget.objects.get(name=target_name)
|
||||||
|
return target.context
|
||||||
|
except MarkovContext.DoesNotExist:
|
||||||
|
# make a context
|
||||||
|
context = MarkovContext()
|
||||||
|
context.name = target_name
|
||||||
|
context.save()
|
||||||
|
|
||||||
context_id = self._get_context_id_for_target(target)
|
target.context = context
|
||||||
|
target.save()
|
||||||
|
|
||||||
# don't learn recursion
|
return target.context
|
||||||
if not event._recursing:
|
except MarkovTarget.DoesNotExist:
|
||||||
words = line.split()
|
# first we need to make a context for this
|
||||||
if len(words) == 0:
|
context = MarkovContext()
|
||||||
return line
|
context.name = target_name
|
||||||
|
context.save()
|
||||||
|
|
||||||
db = self.get_db()
|
target = MarkovTarget()
|
||||||
try:
|
target.name = target_name
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
target.context = context
|
||||||
statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (%s, %s, %s, %s)'
|
target.save()
|
||||||
for word in words:
|
|
||||||
cur.execute(statement, (k1, k2, word, context_id))
|
|
||||||
k1, k2 = k2, word
|
|
||||||
cur.execute(statement, (k1, k2, self.stop, context_id))
|
|
||||||
|
|
||||||
db.commit()
|
return target.context
|
||||||
except mdb.Error as e:
|
|
||||||
db.rollback()
|
|
||||||
self.log.error("database error learning line")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def _generate_line(self, target, line='', min_size=15, max_size=30):
|
|
||||||
"""Create a line, optionally using some text in a seed as a point in
|
|
||||||
the chain.
|
|
||||||
|
|
||||||
Keyword arguments:
|
|
||||||
target - the target to retrieve the context for (i.e. a channel or nick)
|
|
||||||
line - the line to reply to, by picking a random word and seeding with it
|
|
||||||
min_size - the minimum desired size in words. not guaranteed
|
|
||||||
max_size - the maximum desired size in words. not guaranteed
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# if the limit is too low, there's nothing to do
|
|
||||||
if (max_size <= 3):
|
|
||||||
raise Exception("max_size is too small: %d" % max_size)
|
|
||||||
|
|
||||||
# if the min is too large, abort
|
|
||||||
if (min_size > 20):
|
|
||||||
raise Exception("min_size is too large: %d" % min_size)
|
|
||||||
|
|
||||||
seed_words = []
|
|
||||||
# shuffle the words in the input
|
|
||||||
seed_words = line.split()
|
|
||||||
random.shuffle(seed_words)
|
|
||||||
self.log.debug("seed words: {0:s}".format(seed_words))
|
|
||||||
|
|
||||||
# hit to generate a new seed word immediately if possible
|
|
||||||
seed_word = None
|
|
||||||
hit_word = None
|
|
||||||
|
|
||||||
context_id = self._get_context_id_for_target(target)
|
|
||||||
|
|
||||||
# start with an empty chain, and work from there
|
|
||||||
gen_words = [self.start1, self.start2]
|
|
||||||
|
|
||||||
# build a response by creating multiple sentences
|
|
||||||
while len(gen_words) < max_size + 2:
|
|
||||||
# if we're past the min and on a stop, we can end
|
|
||||||
if len(gen_words) > min_size + 2:
|
|
||||||
if gen_words[-1] == self.stop:
|
|
||||||
break
|
|
||||||
|
|
||||||
# pick a word from the shuffled seed words, if we need a new one
|
|
||||||
if seed_word == hit_word:
|
|
||||||
if len(seed_words) > 0:
|
|
||||||
seed_word = seed_words.pop()
|
|
||||||
self.log.debug("picked new seed word: "
|
|
||||||
"{0:s}".format(seed_word))
|
|
||||||
else:
|
|
||||||
seed_word = None
|
|
||||||
self.log.debug("ran out of seed words")
|
|
||||||
|
|
||||||
# if we have a stop, the word before it might need to be
|
|
||||||
# made to look like a sentence end
|
|
||||||
if gen_words[-1] == self.stop:
|
|
||||||
# chop off the stop, temporarily
|
|
||||||
gen_words = gen_words[:-1]
|
|
||||||
|
|
||||||
# we should have a real word, make it look like a
|
|
||||||
# sentence end
|
|
||||||
sentence_end = gen_words[-1]
|
|
||||||
eos_punctuation = ['!', '?', ',', '.']
|
|
||||||
if sentence_end[-1] not in eos_punctuation:
|
|
||||||
random.shuffle(eos_punctuation)
|
|
||||||
gen_words[-1] = sentence_end + eos_punctuation.pop()
|
|
||||||
self.log.debug("monkeyed with end of sentence, it's "
|
|
||||||
"now: {0:s}".format(gen_words[-1]))
|
|
||||||
|
|
||||||
# put the stop back on
|
|
||||||
gen_words.append(self.stop)
|
|
||||||
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
||||||
|
|
||||||
# first, see if we should start a new sentence. if so,
|
|
||||||
# work backwards
|
|
||||||
if gen_words[-1] in (self.start2, self.stop) and seed_word is not None and 0 == 1:
|
|
||||||
# drop a stop, since we're starting another sentence
|
|
||||||
if gen_words[-1] == self.stop:
|
|
||||||
gen_words = gen_words[:-1]
|
|
||||||
|
|
||||||
# work backwards from seed_word
|
|
||||||
working_backwards = []
|
|
||||||
back_k2 = self._retrieve_random_k2_for_value(seed_word, context_id)
|
|
||||||
if back_k2:
|
|
||||||
found_word = seed_word
|
|
||||||
if back_k2 == self.start2:
|
|
||||||
self.log.debug("random further back was start2, swallowing")
|
|
||||||
else:
|
|
||||||
working_backwards.append(back_k2)
|
|
||||||
working_backwards.append(found_word)
|
|
||||||
self.log.debug("started working backwards with: {0:s}".format(found_word))
|
|
||||||
self.log.debug("working_backwards: {0:s}".format(" ".join(working_backwards)))
|
|
||||||
|
|
||||||
# now work backwards until we randomly bump into a start
|
|
||||||
# to steer the chainer away from spending too much time on
|
|
||||||
# the weaker-context reverse chaining, we make max_size
|
|
||||||
# a non-linear distribution, making it more likely that
|
|
||||||
# some time is spent on better forward chains
|
|
||||||
max_back = min(random.randint(1, max_size/2) + random.randint(1, max_size/2),
|
|
||||||
max_size/4)
|
|
||||||
self.log.debug("max_back: {0:d}".format(max_back))
|
|
||||||
while len(working_backwards) < max_back:
|
|
||||||
back_k2 = self._retrieve_random_k2_for_value(working_backwards[0], context_id)
|
|
||||||
if back_k2 == self.start2:
|
|
||||||
self.log.debug("random further back was start2, finishing")
|
|
||||||
break
|
|
||||||
elif back_k2:
|
|
||||||
working_backwards.insert(0, back_k2)
|
|
||||||
self.log.debug("added '{0:s}' to working_backwards".format(back_k2))
|
|
||||||
self.log.debug("working_backwards: {0:s}".format(" ".join(working_backwards)))
|
|
||||||
else:
|
|
||||||
self.log.debug("nothing (at all!?) further back, finishing")
|
|
||||||
break
|
|
||||||
|
|
||||||
gen_words += working_backwards
|
|
||||||
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
||||||
hit_word = gen_words[-1]
|
|
||||||
else:
|
|
||||||
# we are working forward, with either:
|
|
||||||
# * a pair of words (normal path, filling out a sentence)
|
|
||||||
# * start1, start2 (completely new chain, no seed words)
|
|
||||||
# * stop (new sentence in existing chain, no seed words)
|
|
||||||
self.log.debug("working forwards")
|
|
||||||
forw_v = None
|
|
||||||
if gen_words[-1] in (self.start2, self.stop):
|
|
||||||
# case 2 or 3 above, need to work forward on a beginning
|
|
||||||
# of a sentence (this is slow)
|
|
||||||
if gen_words[-1] == self.stop:
|
|
||||||
# remove the stop if it's there
|
|
||||||
gen_words = gen_words[:-1]
|
|
||||||
|
|
||||||
new_sentence = self._create_chain_with_k1_k2(self.start1,
|
|
||||||
self.start2,
|
|
||||||
3, context_id,
|
|
||||||
avoid_address=True)
|
|
||||||
|
|
||||||
if len(new_sentence) > 0:
|
|
||||||
self.log.debug("started new sentence "
|
|
||||||
"'{0:s}'".format(" ".join(new_sentence)))
|
|
||||||
gen_words += new_sentence
|
|
||||||
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
||||||
else:
|
|
||||||
# this is a problem. we started a sentence on
|
|
||||||
# start1,start2, and still didn't find anything. to
|
|
||||||
# avoid endlessly looping we need to abort here
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
if seed_word:
|
|
||||||
self.log.debug("preferring: '{0:s}'".format(seed_word))
|
|
||||||
forw_v = self._retrieve_random_v_for_k1_and_k2_with_pref(gen_words[-2],
|
|
||||||
gen_words[-1],
|
|
||||||
seed_word,
|
|
||||||
context_id)
|
|
||||||
else:
|
|
||||||
forw_v = self._retrieve_random_v_for_k1_and_k2(gen_words[-2],
|
|
||||||
gen_words[-1],
|
|
||||||
context_id)
|
|
||||||
|
|
||||||
if forw_v:
|
|
||||||
gen_words.append(forw_v)
|
|
||||||
self.log.debug("added random word '{0:s}' to gen_words".format(forw_v))
|
|
||||||
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
||||||
hit_word = gen_words[-1]
|
|
||||||
else:
|
|
||||||
# append stop. this is an end to a sentence (since
|
|
||||||
# we had non-start words to begin with)
|
|
||||||
gen_words.append(self.stop)
|
|
||||||
self.log.debug("nothing found, added stop")
|
|
||||||
self.log.debug("gen_words: {0:s}".format(" ".join(gen_words)))
|
|
||||||
|
|
||||||
# chop off the seed data at the start
|
|
||||||
gen_words = gen_words[2:]
|
|
||||||
|
|
||||||
if len(gen_words):
|
|
||||||
# chop off the end text, if it was the keyword indicating an end of chain
|
|
||||||
if gen_words[-1] == self.stop:
|
|
||||||
gen_words = gen_words[:-1]
|
|
||||||
else:
|
|
||||||
self.log.warning("after all this we have an empty list of words. "
|
|
||||||
"there probably isn't any data for this context")
|
|
||||||
|
|
||||||
return ' '.join(gen_words)
|
|
||||||
|
|
||||||
def _retrieve_random_v_for_k1_and_k2(self, k1, k2, context_id):
|
|
||||||
"""Get one v for a given k1,k2."""
|
|
||||||
|
|
||||||
self.log.debug("searching with '{0:s}','{1:s}'".format(k1, k2))
|
|
||||||
values = []
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
query = '''
|
|
||||||
SELECT v FROM markov_chain AS r1
|
|
||||||
JOIN (
|
|
||||||
SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id
|
|
||||||
) AS r2
|
|
||||||
WHERE r1.k1 = %s
|
|
||||||
AND r1.k2 = %s
|
|
||||||
AND r1.context_id = %s
|
|
||||||
ORDER BY r1.id >= r2.id DESC, r1.id ASC
|
|
||||||
LIMIT 1
|
|
||||||
'''
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(query, (k1, k2, context_id))
|
|
||||||
result = cur.fetchone()
|
|
||||||
if result:
|
|
||||||
self.log.debug("found '{0:s}'".format(result['v']))
|
|
||||||
return result['v']
|
|
||||||
except mdb.Error as e:
|
|
||||||
self.log.error("database error in _retrieve_random_v_for_k1_and_k2")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def _retrieve_random_v_for_k1_and_k2_with_pref(self, k1, k2, prefer, context_id):
|
|
||||||
"""Get one v for a given k1,k2.
|
|
||||||
|
|
||||||
Prefer that the result be prefer, if it's found.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.log.debug("searching with '{0:s}','{1:s}', prefer "
|
|
||||||
"'{2:s}'".format(k1, k2, prefer))
|
|
||||||
values = []
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
query = '''
|
|
||||||
SELECT v FROM markov_chain AS r1
|
|
||||||
JOIN (
|
|
||||||
SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id
|
|
||||||
) AS r2
|
|
||||||
WHERE r1.k1 = %s
|
|
||||||
AND r1.k2 = %s
|
|
||||||
AND r1.context_id = %s
|
|
||||||
ORDER BY r1.id >= r2.id DESC, r1.v = %s DESC, r1.id ASC
|
|
||||||
LIMIT 1
|
|
||||||
'''
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(query, (k1, k2, context_id, prefer))
|
|
||||||
result = cur.fetchone()
|
|
||||||
if result:
|
|
||||||
self.log.debug("found '{0:s}'".format(result['v']))
|
|
||||||
return result['v']
|
|
||||||
except mdb.Error as e:
|
|
||||||
self.log.error("database error in _retrieve_random_v_for_k1_and_k2_with_pref")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def _retrieve_random_k2_for_value(self, v, context_id):
|
|
||||||
"""Get one k2 for a given value."""
|
|
||||||
|
|
||||||
values = []
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
query = '''
|
|
||||||
SELECT k2 FROM markov_chain AS r1
|
|
||||||
JOIN (
|
|
||||||
SELECT (RAND() * (SELECT MAX(id) FROM markov_chain)) AS id
|
|
||||||
) AS r2
|
|
||||||
WHERE r1.v = %s
|
|
||||||
AND r1.context_id = %s
|
|
||||||
ORDER BY r1.id >= r2.id DESC, r1.id ASC
|
|
||||||
LIMIT 1
|
|
||||||
'''
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(query, (v, context_id))
|
|
||||||
result = cur.fetchone()
|
|
||||||
if result:
|
|
||||||
return result['k2']
|
|
||||||
except mdb.Error as e:
|
|
||||||
self.log.error("database error in _retrieve_random_k2_for_value")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def _create_chain_with_k1_k2(self, k1, k2, length, context_id,
|
|
||||||
avoid_address=False):
|
|
||||||
"""Create a chain of the given length, using k1,k2.
|
|
||||||
|
|
||||||
k1,k2 does not appear in the resulting chain.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
chain = [k1, k2]
|
|
||||||
self.log.debug("creating chain for {0:s},{1:s}".format(k1, k2))
|
|
||||||
|
|
||||||
for _ in range(length):
|
|
||||||
v = self._retrieve_random_v_for_k1_and_k2(chain[-2],
|
|
||||||
chain[-1],
|
|
||||||
context_id)
|
|
||||||
if v:
|
|
||||||
chain.append(v)
|
|
||||||
|
|
||||||
# check for addresses (the "whoever:" in
|
|
||||||
# __start1 __start2 whoever: some words)
|
|
||||||
addressing_suffixes = [':', ',']
|
|
||||||
if len(chain) > 2 and chain[2][-1] in addressing_suffixes and avoid_address:
|
|
||||||
return chain[3:]
|
|
||||||
elif len(chain) > 2:
|
|
||||||
return chain[2:]
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _get_chatter_targets(self):
|
|
||||||
"""Get all possible chatter targets."""
|
|
||||||
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
# need to create our own db object, since this is likely going to be in a new thread
|
|
||||||
query = 'SELECT target, chance FROM markov_chatter_target'
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(query)
|
|
||||||
results = cur.fetchall()
|
|
||||||
return results
|
|
||||||
except mdb.Error as e:
|
|
||||||
self.log.error("database error in _get_chatter_targets")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def _get_context_id_for_target(self, target):
|
|
||||||
"""Get the context ID for the desired/input target."""
|
|
||||||
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
query = '''
|
|
||||||
SELECT mc.id FROM markov_context mc
|
|
||||||
INNER JOIN markov_target_to_context_map mt
|
|
||||||
ON mt.context_id = mc.id
|
|
||||||
WHERE mt.target = %s
|
|
||||||
'''
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(query, (target,))
|
|
||||||
result = cur.fetchone()
|
|
||||||
db.close()
|
|
||||||
if result:
|
|
||||||
return result['id']
|
|
||||||
else:
|
|
||||||
# auto-generate a context to keep things private
|
|
||||||
self._add_context_for_target(target)
|
|
||||||
return self._get_context_id_for_target(target)
|
|
||||||
except mdb.Error as e:
|
|
||||||
self.log.error("database error in _get_context_id_for_target")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
def _add_context_for_target(self, target):
|
|
||||||
"""Create a new context for the desired/input target."""
|
|
||||||
|
|
||||||
db = self.get_db()
|
|
||||||
try:
|
|
||||||
statement = 'INSERT INTO markov_context (context) VALUES (%s)'
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(statement, (target,))
|
|
||||||
statement = '''
|
|
||||||
INSERT INTO markov_target_to_context_map (target, context_id)
|
|
||||||
VALUES (%s, (SELECT id FROM markov_context WHERE context = %s))
|
|
||||||
'''
|
|
||||||
cur.execute(statement, (target, target))
|
|
||||||
db.commit()
|
|
||||||
except mdb.Error as e:
|
|
||||||
db.rollback()
|
|
||||||
self.log.error("database error in _add_context_for_target")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
try:
|
|
||||||
query = '''
|
|
||||||
SELECT mc.id FROM markov_context mc
|
|
||||||
INNER JOIN markov_target_to_context_map mt
|
|
||||||
ON mt.context_id = mc.id
|
|
||||||
WHERE mt.target = %s
|
|
||||||
'''
|
|
||||||
cur = db.cursor(mdb.cursors.DictCursor)
|
|
||||||
cur.execute(query, (target,))
|
|
||||||
result = cur.fetchone()
|
|
||||||
if result:
|
|
||||||
return result['id']
|
|
||||||
else:
|
|
||||||
# auto-generate a context to keep things private
|
|
||||||
self._add_context_for_target(target)
|
|
||||||
return self._get_context_id_for_target(target)
|
|
||||||
except mdb.Error as e:
|
|
||||||
self.log.error("database error in _get_context_id_for_target")
|
|
||||||
self.log.exception(e)
|
|
||||||
raise
|
|
||||||
finally: cur.close()
|
|
||||||
|
|
||||||
# vi:tabstop=4:expandtab:autoindent
|
# vi:tabstop=4:expandtab:autoindent
|
||||||
# kate: indent-mode python;indent-width 4;replace-tabs on;
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
from django.contrib import admin
|
||||||
|
|
||||||
|
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
||||||
|
|
||||||
|
admin.site.register(MarkovContext)
|
||||||
|
admin.site.register(MarkovTarget)
|
||||||
|
admin.site.register(MarkovState)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,32 @@
|
||||||
|
"""
|
||||||
|
markov/forms.py --- forms for manipulating markov data
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.forms import Form, CharField, FileField, ModelChoiceField
|
||||||
|
|
||||||
|
from markov.models import MarkovContext
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.markov')
|
||||||
|
|
||||||
|
|
||||||
|
class LogUploadForm(Form):
|
||||||
|
|
||||||
|
"""Accept a file upload that will be imported into Markov stuff."""
|
||||||
|
|
||||||
|
log_file = FileField(help_text="Weechat log format.")
|
||||||
|
context = ModelChoiceField(queryset=MarkovContext.objects.all())
|
||||||
|
ignore = CharField(help_text="Comma-separated list of nicks to ignore.",
|
||||||
|
required=False)
|
||||||
|
|
||||||
|
|
||||||
|
class TeachLineForm(Form):
|
||||||
|
|
||||||
|
"""Accept a line that will be imported into Markov stuff."""
|
||||||
|
|
||||||
|
context = ModelChoiceField(queryset=MarkovContext.objects.all())
|
||||||
|
line = CharField()
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,80 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from south.utils import datetime_utils as datetime
|
||||||
|
from south.db import db
|
||||||
|
from south.v2 import SchemaMigration
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(SchemaMigration):
|
||||||
|
|
||||||
|
def forwards(self, orm):
|
||||||
|
# Adding model 'MarkovContext'
|
||||||
|
db.create_table(u'markov_markovcontext', (
|
||||||
|
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||||
|
('name', self.gf('django.db.models.fields.CharField')(max_length=32)),
|
||||||
|
))
|
||||||
|
db.send_create_signal(u'markov', ['MarkovContext'])
|
||||||
|
|
||||||
|
# Adding model 'MarkovTarget'
|
||||||
|
db.create_table(u'markov_markovtarget', (
|
||||||
|
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||||
|
('target', self.gf('django.db.models.fields.CharField')(max_length=64)),
|
||||||
|
('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])),
|
||||||
|
('chatter_chance', self.gf('django.db.models.fields.IntegerField')(default=0)),
|
||||||
|
))
|
||||||
|
db.send_create_signal(u'markov', ['MarkovTarget'])
|
||||||
|
|
||||||
|
# Adding model 'MarkovState'
|
||||||
|
db.create_table(u'markov_markovstate', (
|
||||||
|
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
|
||||||
|
('k1', self.gf('django.db.models.fields.CharField')(max_length=128)),
|
||||||
|
('k2', self.gf('django.db.models.fields.CharField')(max_length=128)),
|
||||||
|
('v', self.gf('django.db.models.fields.CharField')(max_length=128)),
|
||||||
|
('count', self.gf('django.db.models.fields.IntegerField')(default=0)),
|
||||||
|
('context', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['markov.MarkovContext'])),
|
||||||
|
))
|
||||||
|
db.send_create_signal(u'markov', ['MarkovState'])
|
||||||
|
|
||||||
|
# Adding unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v']
|
||||||
|
db.create_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v'])
|
||||||
|
|
||||||
|
|
||||||
|
def backwards(self, orm):
|
||||||
|
# Removing unique constraint on 'MarkovState', fields ['context', 'k1', 'k2', 'v']
|
||||||
|
db.delete_unique(u'markov_markovstate', ['context_id', 'k1', 'k2', 'v'])
|
||||||
|
|
||||||
|
# Deleting model 'MarkovContext'
|
||||||
|
db.delete_table(u'markov_markovcontext')
|
||||||
|
|
||||||
|
# Deleting model 'MarkovTarget'
|
||||||
|
db.delete_table(u'markov_markovtarget')
|
||||||
|
|
||||||
|
# Deleting model 'MarkovState'
|
||||||
|
db.delete_table(u'markov_markovstate')
|
||||||
|
|
||||||
|
|
||||||
|
models = {
|
||||||
|
u'markov.markovcontext': {
|
||||||
|
'Meta': {'object_name': 'MarkovContext'},
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'max_length': '32'})
|
||||||
|
},
|
||||||
|
u'markov.markovstate': {
|
||||||
|
'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'},
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'v': ('django.db.models.fields.CharField', [], {'max_length': '128'})
|
||||||
|
},
|
||||||
|
u'markov.markovtarget': {
|
||||||
|
'Meta': {'object_name': 'MarkovTarget'},
|
||||||
|
'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'target': ('django.db.models.fields.CharField', [], {'max_length': '64'})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
complete_apps = ['markov']
|
|
@ -0,0 +1,60 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from south.utils import datetime_utils as datetime
|
||||||
|
from south.db import db
|
||||||
|
from south.v2 import SchemaMigration
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(SchemaMigration):
|
||||||
|
|
||||||
|
def forwards(self, orm):
|
||||||
|
# Deleting field 'MarkovTarget.target'
|
||||||
|
db.delete_column(u'markov_markovtarget', 'target')
|
||||||
|
|
||||||
|
# Adding field 'MarkovTarget.name'
|
||||||
|
db.add_column(u'markov_markovtarget', 'name',
|
||||||
|
self.gf('django.db.models.fields.CharField')(default='', unique=True, max_length=64),
|
||||||
|
keep_default=False)
|
||||||
|
|
||||||
|
# Adding unique constraint on 'MarkovContext', fields ['name']
|
||||||
|
db.create_unique(u'markov_markovcontext', ['name'])
|
||||||
|
|
||||||
|
|
||||||
|
def backwards(self, orm):
|
||||||
|
# Removing unique constraint on 'MarkovContext', fields ['name']
|
||||||
|
db.delete_unique(u'markov_markovcontext', ['name'])
|
||||||
|
|
||||||
|
# Adding field 'MarkovTarget.target'
|
||||||
|
db.add_column(u'markov_markovtarget', 'target',
|
||||||
|
self.gf('django.db.models.fields.CharField')(default='', max_length=64),
|
||||||
|
keep_default=False)
|
||||||
|
|
||||||
|
# Deleting field 'MarkovTarget.name'
|
||||||
|
db.delete_column(u'markov_markovtarget', 'name')
|
||||||
|
|
||||||
|
|
||||||
|
models = {
|
||||||
|
u'markov.markovcontext': {
|
||||||
|
'Meta': {'object_name': 'MarkovContext'},
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '32'})
|
||||||
|
},
|
||||||
|
u'markov.markovstate': {
|
||||||
|
'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'},
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'v': ('django.db.models.fields.CharField', [], {'max_length': '128'})
|
||||||
|
},
|
||||||
|
u'markov.markovtarget': {
|
||||||
|
'Meta': {'object_name': 'MarkovTarget'},
|
||||||
|
'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
complete_apps = ['markov']
|
|
@ -0,0 +1,44 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from south.utils import datetime_utils as datetime
|
||||||
|
from south.db import db
|
||||||
|
from south.v2 import SchemaMigration
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(SchemaMigration):
|
||||||
|
|
||||||
|
def forwards(self, orm):
|
||||||
|
|
||||||
|
# Changing field 'MarkovContext.name'
|
||||||
|
db.alter_column(u'markov_markovcontext', 'name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=64))
|
||||||
|
|
||||||
|
def backwards(self, orm):
|
||||||
|
|
||||||
|
# Changing field 'MarkovContext.name'
|
||||||
|
db.alter_column(u'markov_markovcontext', 'name', self.gf('django.db.models.fields.CharField')(max_length=32, unique=True))
|
||||||
|
|
||||||
|
models = {
|
||||||
|
u'markov.markovcontext': {
|
||||||
|
'Meta': {'object_name': 'MarkovContext'},
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'})
|
||||||
|
},
|
||||||
|
u'markov.markovstate': {
|
||||||
|
'Meta': {'unique_together': "(('context', 'k1', 'k2', 'v'),)", 'object_name': 'MarkovState'},
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
'count': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'k1': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'k2': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
|
||||||
|
'v': ('django.db.models.fields.CharField', [], {'max_length': '128'})
|
||||||
|
},
|
||||||
|
u'markov.markovtarget': {
|
||||||
|
'Meta': {'object_name': 'MarkovTarget'},
|
||||||
|
'chatter_chance': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'context': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['markov.MarkovContext']"}),
|
||||||
|
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
complete_apps = ['markov']
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""
|
||||||
|
markov/models.py --- save brain pieces for chaining
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.markov')
|
||||||
|
|
||||||
|
|
||||||
|
class MarkovContext(models.Model):
|
||||||
|
|
||||||
|
"""Define contexts for Markov chains."""
|
||||||
|
|
||||||
|
name = models.CharField(max_length=64, unique=True)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""String representation."""
|
||||||
|
|
||||||
|
return u"{0:s}".format(self.name)
|
||||||
|
|
||||||
|
|
||||||
|
class MarkovTarget(models.Model):
|
||||||
|
|
||||||
|
"""Define IRC targets that relate to a context, and can occasionally be talked to."""
|
||||||
|
|
||||||
|
name = models.CharField(max_length=64, unique=True)
|
||||||
|
context = models.ForeignKey(MarkovContext)
|
||||||
|
|
||||||
|
chatter_chance = models.IntegerField(default=0)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""String representation."""
|
||||||
|
|
||||||
|
return u"{0:s}".format(self.name)
|
||||||
|
|
||||||
|
|
||||||
|
class MarkovState(models.Model):
|
||||||
|
|
||||||
|
"""One element in a Markov chain, some text or something."""
|
||||||
|
|
||||||
|
_start1 = '__start1'
|
||||||
|
_start2 = '__start2'
|
||||||
|
_stop = '__stop'
|
||||||
|
|
||||||
|
k1 = models.CharField(max_length=128)
|
||||||
|
k2 = models.CharField(max_length=128)
|
||||||
|
v = models.CharField(max_length=128)
|
||||||
|
|
||||||
|
count = models.IntegerField(default=0)
|
||||||
|
context = models.ForeignKey(MarkovContext)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
permissions = {
|
||||||
|
('import_log_file', "Can import states from a log file"),
|
||||||
|
('teach_line', "Can teach lines"),
|
||||||
|
}
|
||||||
|
unique_together = ('context', 'k1', 'k2', 'v')
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
"""String representation."""
|
||||||
|
|
||||||
|
return u"{0:s},{1:s} -> {2:s} (count: {3:d})".format(self.k1, self.k2, self.v, self.count)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,15 @@
|
||||||
|
"""
|
||||||
|
markov/urls.py --- url patterns for markov stuff
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from django.conf.urls import patterns, url
|
||||||
|
|
||||||
|
urlpatterns = patterns('markov.views',
|
||||||
|
url(r'^$', 'index', name='markov_index'),
|
||||||
|
url(r'^context/(?P<context_id>\d+)/$', 'context_index', name='markov_context_index'),
|
||||||
|
url(r'^import/$', 'import_file', name='markov_import_file'),
|
||||||
|
url(r'^teach/$', 'teach_line', name='markov_teach_line'),
|
||||||
|
)
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -0,0 +1,215 @@
|
||||||
|
"""
|
||||||
|
markov/views.py --- manipulate markov data
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
|
||||||
|
from django.contrib.auth.decorators import permission_required
|
||||||
|
from django.http import HttpResponse
|
||||||
|
from django.shortcuts import get_object_or_404, render
|
||||||
|
|
||||||
|
from markov.forms import LogUploadForm, TeachLineForm
|
||||||
|
from markov.models import MarkovContext, MarkovTarget, MarkovState
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.markov')
|
||||||
|
|
||||||
|
|
||||||
|
def index(request):
|
||||||
|
"""Display nothing, for the moment."""
|
||||||
|
|
||||||
|
return HttpResponse()
|
||||||
|
|
||||||
|
|
||||||
|
def context_index(request, context_id):
|
||||||
|
"""Display the context index for the given context."""
|
||||||
|
|
||||||
|
start_t = time.time()
|
||||||
|
context = get_object_or_404(MarkovContext, pk=context_id)
|
||||||
|
chain = ' '.join(_generate_sentence(context))
|
||||||
|
end_t = time.time()
|
||||||
|
|
||||||
|
return render(request, 'markov/context.html', {'chain': chain,
|
||||||
|
'context': context,
|
||||||
|
'elapsed': end_t - start_t})
|
||||||
|
|
||||||
|
|
||||||
|
@permission_required('import_log_file', raise_exception=True)
|
||||||
|
def import_file(request):
|
||||||
|
"""Accept a file upload and turn it into markov stuff.
|
||||||
|
|
||||||
|
Current file formats supported:
|
||||||
|
* weechat
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if request.method == 'POST':
|
||||||
|
form = LogUploadForm(request.POST, request.FILES)
|
||||||
|
if form.is_valid():
|
||||||
|
log_file = request.FILES['log_file']
|
||||||
|
context = form.cleaned_data['context']
|
||||||
|
ignores = form.cleaned_data['ignore'].split(',')
|
||||||
|
|
||||||
|
whos = []
|
||||||
|
for line in log_file:
|
||||||
|
(timestamp, who, what) = line.decode('utf-8').split('\t', 2)
|
||||||
|
|
||||||
|
if who in ('-->', '<--', '--', ' *'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if who in ignores:
|
||||||
|
continue
|
||||||
|
|
||||||
|
whos.append(who)
|
||||||
|
|
||||||
|
# this is a line we probably care about now
|
||||||
|
_learn_line(what.rstrip(), context)
|
||||||
|
|
||||||
|
log.debug(set(whos))
|
||||||
|
else:
|
||||||
|
form = LogUploadForm()
|
||||||
|
|
||||||
|
return render(request, 'markov/import_file.html', {'form': form})
|
||||||
|
|
||||||
|
|
||||||
|
@permission_required('teach_line', raise_exception=True)
|
||||||
|
def teach_line(request):
|
||||||
|
"""Teach one line directly."""
|
||||||
|
|
||||||
|
if request.method == 'POST':
|
||||||
|
form = TeachLineForm(request.POST)
|
||||||
|
if form.is_valid():
|
||||||
|
line = form.cleaned_data['line']
|
||||||
|
context = form.cleaned_data['context']
|
||||||
|
_learn_line(line.rstrip(), context)
|
||||||
|
else:
|
||||||
|
form = TeachLineForm()
|
||||||
|
|
||||||
|
return render(request, 'markov/teach_line.html', {'form': form})
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_line(context, topics=None, max_words=30):
|
||||||
|
"""Generate a Markov chain."""
|
||||||
|
|
||||||
|
words = []
|
||||||
|
# if we have topics, try to work from it and work backwards
|
||||||
|
if topics:
|
||||||
|
topic_word = random.choice(topics)
|
||||||
|
topics.remove(topic_word)
|
||||||
|
log.debug(u"looking for topic '{0:s}'".format(topic_word))
|
||||||
|
new_states = MarkovState.objects.filter(context=context, v=topic_word)
|
||||||
|
|
||||||
|
if len(new_states) > 0:
|
||||||
|
log.debug(u"found '{0:s}', starting backwards".format(topic_word))
|
||||||
|
words.insert(0, topic_word)
|
||||||
|
while len(words) <= max_words and words[0] != MarkovState._start2:
|
||||||
|
log.debug(u"looking backwards for '{0:s}'".format(words[0]))
|
||||||
|
new_states = MarkovState.objects.filter(context=context, v=words[0])
|
||||||
|
words.insert(0, _get_word_out_of_states(new_states, backwards=True))
|
||||||
|
|
||||||
|
# if we didn't get topic stuff, we need to start (forwards) here
|
||||||
|
if len(words) == 0:
|
||||||
|
words = [MarkovState._start1, MarkovState._start2]
|
||||||
|
|
||||||
|
i = len(words)
|
||||||
|
while len(words) <= max_words and words[-1] != MarkovState._stop:
|
||||||
|
log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
|
||||||
|
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
|
||||||
|
words.append(_get_word_out_of_states(new_states))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
words = [word for word in words if word not in
|
||||||
|
(MarkovState._start1, MarkovState._start2, MarkovState._stop)]
|
||||||
|
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_longish_line(context, topics=None, min_words=4, max_words=30):
|
||||||
|
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
|
||||||
|
|
||||||
|
tries = 0
|
||||||
|
while tries < 5:
|
||||||
|
line = _generate_line(context, topics=topics, max_words=max_words)
|
||||||
|
if len(line) >= min_words:
|
||||||
|
return line
|
||||||
|
|
||||||
|
tries += 1
|
||||||
|
|
||||||
|
# if we got here, we need to just give up
|
||||||
|
return _generate_line(context)
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_sentence(context, topics=None, min_words=15, max_words=30):
|
||||||
|
"""String multiple lines together into a coherent sentence."""
|
||||||
|
|
||||||
|
tries = 0
|
||||||
|
sentence = []
|
||||||
|
while tries < 5:
|
||||||
|
sentence += _generate_longish_line(context, topics=topics, max_words=max_words)
|
||||||
|
if len(sentence) >= min_words:
|
||||||
|
return sentence
|
||||||
|
else:
|
||||||
|
sentence[-1] += random.choice([',', '.', '!'])
|
||||||
|
|
||||||
|
tries += 1
|
||||||
|
|
||||||
|
# if we got here, we need to give up
|
||||||
|
return sentence
|
||||||
|
|
||||||
|
|
||||||
|
def _get_word_out_of_states(states, backwards=False):
|
||||||
|
"""Pick one random word out of the given states."""
|
||||||
|
|
||||||
|
running = 0
|
||||||
|
weighted_words = []
|
||||||
|
for state in states:
|
||||||
|
running += state.count
|
||||||
|
if backwards:
|
||||||
|
weighted_words.append((running, state.k2))
|
||||||
|
else:
|
||||||
|
weighted_words.append((running, state.v))
|
||||||
|
|
||||||
|
log.debug(u"{0:s}".format(weighted_words))
|
||||||
|
|
||||||
|
hit = random.randint(0, running)
|
||||||
|
log.debug(u"hit: {0:d}".format(hit))
|
||||||
|
|
||||||
|
new_word = ''
|
||||||
|
for weight, word in weighted_words:
|
||||||
|
new_word = word
|
||||||
|
|
||||||
|
if weight >= hit:
|
||||||
|
break
|
||||||
|
|
||||||
|
log.debug(u"found '{0:s}'".format(new_word))
|
||||||
|
return new_word
|
||||||
|
|
||||||
|
|
||||||
|
def _learn_line(line, context):
|
||||||
|
"""Create a bunch of MarkovStates for a given line of text."""
|
||||||
|
|
||||||
|
log.debug(u"learning {0:.40s}...".format(line))
|
||||||
|
|
||||||
|
words = line.split()
|
||||||
|
words = [MarkovState._start1, MarkovState._start2] + words + [MarkovState._stop]
|
||||||
|
|
||||||
|
for word in words:
|
||||||
|
if len(word) > MarkovState._meta.get_field('k1').max_length:
|
||||||
|
return
|
||||||
|
|
||||||
|
for i, word in enumerate(words):
|
||||||
|
log.debug(u"'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
|
||||||
|
state, created = MarkovState.objects.get_or_create(context=context,
|
||||||
|
k1=words[i],
|
||||||
|
k2=words[i+1],
|
||||||
|
v=words[i+2])
|
||||||
|
state.count += 1
|
||||||
|
state.save()
|
||||||
|
|
||||||
|
if i > len(words) - 4:
|
||||||
|
break
|
||||||
|
|
||||||
|
# vi:tabstop=4:expandtab:autoindent
|
|
@ -1,7 +1,17 @@
|
||||||
|
"""
|
||||||
|
races/models.py --- models for managing competitive races
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.races')
|
||||||
|
|
||||||
|
|
||||||
class Race(models.Model):
|
class Race(models.Model):
|
||||||
|
|
||||||
"""Track a race."""
|
"""Track a race."""
|
||||||
|
|
|
@ -1,8 +1,18 @@
|
||||||
|
"""
|
||||||
|
races/views.py --- display race statuses and whatnot
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
from django.shortcuts import get_object_or_404, render
|
from django.shortcuts import get_object_or_404, render
|
||||||
|
|
||||||
from races.models import Race, Racer, RaceUpdate
|
from races.models import Race, Racer, RaceUpdate
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger('dr_botzo.races')
|
||||||
|
|
||||||
|
|
||||||
def index(request):
|
def index(request):
|
||||||
"""Display a list of races."""
|
"""Display a list of races."""
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block title %}context: {{ context.name }}{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<p>{{ chain }}</p>
|
||||||
|
<p>in: {{ elapsed }}s</p>
|
||||||
|
{% endblock %}
|
||||||
|
<!--
|
||||||
|
vi:tabstop=4:expandtab:autoindent
|
||||||
|
-->
|
|
@ -0,0 +1,16 @@
|
||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block title %}markov import{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<form id="markov_import_file_form" enctype="multipart/form-data" action="{% url 'markov_import_file' %}" method="post">
|
||||||
|
{% csrf_token %}
|
||||||
|
<table>
|
||||||
|
{{ form }}
|
||||||
|
</table>
|
||||||
|
<input class="submit-button" type="submit" value="Import"/>
|
||||||
|
</form>
|
||||||
|
{% endblock %}
|
||||||
|
<!--
|
||||||
|
vi:tabstop=4:expandtab:autoindent
|
||||||
|
-->
|
|
@ -0,0 +1,16 @@
|
||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block title %}markov teach{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<form id="markov_teach_line_form" action="{% url 'markov_teach_line' %}" method="post">
|
||||||
|
{% csrf_token %}
|
||||||
|
<table>
|
||||||
|
{{ form }}
|
||||||
|
</table>
|
||||||
|
<input class="submit-button" type="submit" value="Teach"/>
|
||||||
|
</form>
|
||||||
|
{% endblock %}
|
||||||
|
<!--
|
||||||
|
vi:tabstop=4:expandtab:autoindent
|
||||||
|
-->
|
Loading…
Reference in New Issue