Compare commits

...

12 Commits

12 changed files with 321 additions and 61 deletions

View File

@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-02-16 22:38
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ircbot', '0018_ircserver_replace_irc_control_with_markdown'),
]
operations = [
migrations.AddField(
model_name='ircchannel',
name='discord_bridge',
field=models.CharField(blank=True, default='', max_length=32),
),
]

View File

@ -104,6 +104,8 @@ class IrcChannel(models.Model):
markov_learn_from_channel = models.BooleanField(default=True)
discord_bridge = models.CharField(default='', max_length=32, blank=True)
class Meta:
"""Settings for the model."""

View File

@ -7,6 +7,7 @@ import irc.client
import markov.lib as markovlib
from ircbot.lib import Plugin, reply_destination_for_event
from ircbot.models import IrcChannel
from markov.models import MarkovContext, MarkovTarget
log = logging.getLogger('markov.ircplugin')
@ -40,7 +41,7 @@ class Markov(Plugin):
min_size = 15
max_size = 30
context = markovlib.get_or_create_target_context(target)
context = self.get_or_create_target_context(target)
if match.group(2):
min_size = int(match.group(2))
@ -60,15 +61,10 @@ class Markov(Plugin):
def handle_chatter(self, connection, event):
"""Learn from IRC chatter."""
what = event.arguments[0]
if connection.server_config.additional_addressed_nicks:
all_nicks = '|'.join(connection.server_config.additional_addressed_nicks.split('\n') +
[connection.get_nickname()])
else:
all_nicks = connection.get_nickname()
trimmed_what = re.sub(r'^(({nicks})[:,]|@({nicks}))\s+'.format(nicks=all_nicks), '', what)
nick = irc.client.NickMask(event.source).nick
who = irc.client.NickMask(event.source).nick
target = reply_destination_for_event(event)
log.debug("what: '%s', who: '%s', target: '%s'", what, who, target)
# check to see whether or not we should learn from this channel
channel = None
if irc.client.is_channel(target):
@ -78,15 +74,29 @@ class Markov(Plugin):
log.debug("not learning from %s as i've been told to ignore it", channel)
else:
# learn the line
learning_what = what
# don't learn the speaker's nick if this came over a bridge
if channel and who == channel.discord_bridge:
learning_what = ' '.join(learning_what.split(' ')[1:])
# remove our own nick and aliases from what we learn
if connection.server_config.additional_addressed_nicks:
all_nicks = '|'.join(connection.server_config.additional_addressed_nicks.split('\n') +
[connection.get_nickname()])
else:
all_nicks = connection.get_nickname()
learning_what = re.sub(r'^(({nicks})[:,]|@({nicks}))\s+'.format(nicks=all_nicks), '', learning_what)
recursing = getattr(event, 'recursing', False)
if not recursing:
log.debug("learning %s", trimmed_what)
context = markovlib.get_or_create_target_context(target)
markovlib.learn_line(trimmed_what, context)
log.debug("learning %s", learning_what)
context = self.get_or_create_target_context(target)
markovlib.learn_line(learning_what, context)
log.debug("searching '%s' for '%s'", what, all_nicks)
if re.search(all_nicks, what, re.IGNORECASE) is not None:
context = markovlib.get_or_create_target_context(target)
context = self.get_or_create_target_context(target)
addressed_pattern = r'^(({nicks})[:,]|@({nicks}))\s+(?P<addressed_msg>.*)'.format(nicks=all_nicks)
match = re.match(addressed_pattern, what, re.IGNORECASE)
@ -96,7 +106,7 @@ class Markov(Plugin):
topics = [x for x in match.group('addressed_msg').split(' ') if len(x) >= 3]
return self.bot.reply(event, "{0:s}: {1:s}"
"".format(nick, " ".join(markovlib.generate_line(context, topics=topics))))
"".format(who, " ".join(markovlib.generate_line(context, topics=topics))))
else:
# i wasn't addressed directly, so just respond
topics = [x for x in what.split(' ') if len(x) >= 3]
@ -104,5 +114,31 @@ class Markov(Plugin):
return self.bot.reply(event, "{0:s}"
"".format(" ".join(markovlib.generate_line(context, topics=topics))))
def get_or_create_target_context(self, target_name):
"""Return the context for a provided nick/channel, creating missing ones."""
target_name = target_name.lower()
# find the stuff, or create it
try:
target = MarkovTarget.objects.get(name=target_name)
except MarkovTarget.DoesNotExist:
# we need to create a context and a target, and we have to make the context first
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
channel, c = IrcChannel.objects.get_or_create(name=target_name, server=self.connection.server_config)
context, c = MarkovContext.objects.get_or_create(name=target_name)
target, c = MarkovTarget.objects.get_or_create(name=target_name, context=context, channel=channel)
return target.context
try:
return target.context
except MarkovContext.DoesNotExist:
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target.context = context
target.save()
return target.context
plugin = Markov

View File

@ -1,17 +1,16 @@
"""Provide methods for manipulating markov chain processing."""
import logging
import random
from django.db.models import Sum
from markov.models import MarkovContext, MarkovState, MarkovTarget
from markov.models import MarkovState
log = logging.getLogger('markov.lib')
log = logging.getLogger(__name__)
def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bias=2, max_tries=5):
"""String multiple sentences together into a coherent sentence."""
"""Combine multiple sentences together into a coherent sentence."""
tries = 0
line = []
min_words_per_sentence = min_words / sentence_bias
@ -23,7 +22,7 @@ def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bia
else:
if len(line) > 0:
if line[-1][-1] not in [',', '.', '!', '?', ':']:
line[-1] += random.choice(['?', '.', '!'])
line[-1] += random.SystemRandom().choice(['?', '.', '!'])
tries += 1
@ -33,7 +32,6 @@ def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bia
def generate_longish_sentence(context, topics=None, min_words=15, max_words=30, max_tries=100):
"""Generate a Markov chain, but throw away the short ones unless we get desperate."""
sent = ""
tries = 0
while tries < max_tries:
@ -52,20 +50,19 @@ def generate_longish_sentence(context, topics=None, min_words=15, max_words=30,
def generate_sentence(context, topics=None, min_words=15, max_words=30):
"""Generate a Markov chain."""
words = []
# if we have topics, try to work from it and work backwards
if topics:
topic_word = random.choice(topics)
topic_word = random.SystemRandom().choice(topics)
topics.remove(topic_word)
log.debug("looking for topic '{0:s}'".format(topic_word))
log.debug("looking for topic '%s'", topic_word)
new_states = MarkovState.objects.filter(context=context, v=topic_word)
if len(new_states) > 0:
log.debug("found '{0:s}', starting backwards".format(topic_word))
log.debug("found '%s', starting backwards", topic_word)
words.insert(0, topic_word)
while len(words) <= max_words and words[0] != MarkovState._start2:
log.debug("looking backwards for '{0:s}'".format(words[0]))
log.debug("looking backwards for '%s'", words[0])
new_states = MarkovState.objects.filter(context=context, v=words[0])
# if we find a start, use it
if MarkovState._start2 in new_states:
@ -87,7 +84,7 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):
i = len(words)
while words[-1] != MarkovState._stop:
log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
log.debug("looking for '%s','%s'", words[i-2], words[i-1])
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
log.debug("states retrieved")
@ -103,7 +100,7 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):
words.append(MarkovState._stop)
elif len(target_hits) > 0:
# if there's a target word in the states, pick it
target_hit = random.choice(target_hits)
target_hit = random.SystemRandom().choice(target_hits)
log.debug("found a topic hit %s, using it", target_hit)
topics.remove(target_hit)
words.append(target_hit)
@ -127,36 +124,8 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):
return words
def get_or_create_target_context(target_name):
"""Return the context for a provided nick/channel, creating missing ones."""
target_name = target_name.lower()
# find the stuff, or create it
try:
target = MarkovTarget.objects.get(name=target_name)
except MarkovTarget.DoesNotExist:
# we need to create a context and a target, and we have to make the context first
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target, c = MarkovTarget.objects.get_or_create(name=target_name, context=context)
return target.context
try:
return target.context
except MarkovContext.DoesNotExist:
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target.context = context
target.save()
return target.context
def get_word_out_of_states(states, backwards=False):
"""Pick one random word out of the given states."""
# work around possible broken data, where a k1,k2 should have a value but doesn't
if len(states) == 0:
states = MarkovState.objects.filter(v=MarkovState._stop)
@ -168,9 +137,9 @@ def get_word_out_of_states(states, backwards=False):
# this being None probably means there's no data for this context
raise ValueError("no markov states to generate from")
hit = random.randint(0, count_sum)
hit = random.SystemRandom().randint(0, count_sum)
log.debug("sum: {0:d} hit: {1:d}".format(count_sum, hit))
log.debug("sum: %s hit: %s", count_sum, hit)
states_itr = states.iterator()
for state in states_itr:
@ -183,13 +152,12 @@ def get_word_out_of_states(states, backwards=False):
break
log.debug("found '{0:s}'".format(new_word))
log.debug("found '%s'", new_word)
return new_word
def learn_line(line, context):
"""Create a bunch of MarkovStates for a given line of text."""
log.debug("learning %s...", line[:40])
words = line.split()
@ -200,7 +168,7 @@ def learn_line(line, context):
return
for i, word in enumerate(words):
log.debug("'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
log.debug("'%s','%s' -> '%s'", words[i], words[i+1], words[i+2])
state, created = MarkovState.objects.get_or_create(context=context,
k1=words[i],
k2=words[i+1],

View File

@ -0,0 +1 @@
"""Management operations for the markov plugin and models."""

View File

@ -0,0 +1 @@
"""Management commands for the markov plugin and models."""

View File

@ -0,0 +1,77 @@
"""Clean up learned chains with speaker nicks (from the bridge) or self (because the bridge broke the regex)."""
from django.core.management import BaseCommand
from ircbot.models import IrcChannel
from markov.models import MarkovContext, MarkovState
class Command(BaseCommand):
"""Find markov chains that erroneously have speaker/self nicks and remove them."""
def handle(self, *args, **kwargs):
"""Scan the DB, looking for bad chains, and repair them."""
candidate_channels = IrcChannel.objects.exclude(discord_bridge='')
markov_contexts = MarkovContext.objects.filter(markovtarget__name__in=list(candidate_channels))
for context in markov_contexts:
self.stdout.write(self.style.NOTICE(f"scanning context {context}..."))
# get starting states that look like they came over the bridge
bridge_states = context.states.filter(k1=MarkovState._start1, k2=MarkovState._start2,
v__regex=r'<.*>')
self._chain_remover(context, bridge_states)
def _chain_remover(self, context, start_states):
"""Remove a given k from markov states, deleting the found states after rebuilding subsequent states.
As in, if trying to remove A,B -> X, then B,X -> C and X,C -> D must be rebuilt (A,B -> C / B,C -> D)
then the three states with X deleted.
"""
for start_state in start_states:
self.stdout.write(self.style.NOTICE(f" diving into {start_state}..."))
# find the states that build off of the start
second_states = context.states.filter(k1=start_state.k2, k2=start_state.v)
for second_state in second_states:
self.stdout.write(self.style.NOTICE(f" diving into {second_state}..."))
# find the third states
leaf_states = context.states.filter(k1=second_state.k2, k2=second_state.v)
for leaf_state in leaf_states:
self.stdout.write(self.style.NOTICE(f" upserting state based on {leaf_state}"))
# get/update state without the nick from the bridge
try:
updated_leaf = MarkovState.objects.get(k1=second_state.k1, k2=leaf_state.k2, v=leaf_state.v)
updated_leaf.count += leaf_state.count
updated_leaf.save()
self.stdout.write(self.style.SUCCESS(f" updated count for {updated_leaf}"))
except MarkovState.DoesNotExist:
new_leaf = MarkovState.objects.create(k1=second_state.k1, k2=leaf_state.k2, v=leaf_state.v,
context=context)
new_leaf.count = leaf_state.count
new_leaf.save()
self.stdout.write(self.style.SUCCESS(f" created {new_leaf}"))
# remove the migrated leaf state
self.stdout.write(self.style.SUCCESS(f" deleting {leaf_state}"))
leaf_state.delete()
# take care of the new middle state
self.stdout.write(self.style.NOTICE(f" upserting state based on {second_state}"))
try:
updated_second = MarkovState.objects.get(k1=start_state.k1, k2=start_state.k2, v=second_state.v)
updated_second.count += second_state.count
updated_second.save()
self.stdout.write(self.style.SUCCESS(f" updated count for {updated_second}"))
except MarkovState.DoesNotExist:
new_second = MarkovState.objects.create(k1=start_state.k1, k2=start_state.k2, v=second_state.v,
context=context)
new_second.count = second_state.count
new_second.save()
self.stdout.write(self.style.SUCCESS(f" created {new_second}"))
# remove the migrated second state
self.stdout.write(self.style.SUCCESS(f" deleting {second_state}"))
second_state.delete()
# remove the dead end original start
self.stdout.write(self.style.SUCCESS(f" deleting {start_state}"))
start_state.delete()

View File

@ -0,0 +1,19 @@
# Generated by Django 3.2.18 on 2023-02-19 19:00
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('markov', '0003_auto_20161112_2348'),
]
operations = [
migrations.AlterField(
model_name='markovstate',
name='context',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='states', to='markov.markovcontext'),
),
]

View File

@ -0,0 +1,20 @@
# Generated by Django 3.2.18 on 2023-02-19 23:14
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('ircbot', '0019_ircchannel_discord_bridge'),
('markov', '0004_alter_markovstate_context'),
]
operations = [
migrations.AddField(
model_name='markovtarget',
name='channel',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='ircbot.ircchannel'),
),
]

View File

@ -3,6 +3,8 @@ import logging
from django.db import models
from ircbot.models import IrcChannel
log = logging.getLogger(__name__)
@ -21,6 +23,7 @@ class MarkovTarget(models.Model):
name = models.CharField(max_length=200, unique=True)
context = models.ForeignKey(MarkovContext, on_delete=models.CASCADE)
channel = models.ForeignKey(IrcChannel, null=True, on_delete=models.CASCADE)
chatter_chance = models.IntegerField(default=0)
@ -41,7 +44,7 @@ class MarkovState(models.Model):
v = models.CharField(max_length=128)
count = models.IntegerField(default=0)
context = models.ForeignKey(MarkovContext, on_delete=models.CASCADE)
context = models.ForeignKey(MarkovContext, on_delete=models.CASCADE, related_name='states')
class Meta:
"""Options for the model itself."""

View File

@ -0,0 +1,114 @@
"""Test IRC behavior of the markov plugin."""
from unittest import mock
from django.test import TestCase
from ircbot.models import IrcChannel, IrcServer
from markov.ircplugin import Markov
class MarkovTestCase(TestCase):
"""Test the markov plugin."""
fixtures = ['tests/fixtures/irc_server_fixture.json']
def setUp(self):
"""Create common objects."""
self.mock_bot = mock.MagicMock()
self.mock_connection = mock.MagicMock()
self.mock_connection.get_nickname.return_value = 'test_bot'
self.mock_connection.server_config = IrcServer.objects.get(pk=1)
self.plugin = Markov(self.mock_bot, self.mock_connection, mock.MagicMock())
def test_learn(self):
"""Test that an IRC event triggers learning as expected."""
mock_event = mock.MagicMock()
mock_event.arguments = ['hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
with mock.patch('markov.lib.learn_line') as mock_learn_line:
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us."""
mock_event = mock.MagicMock()
mock_event.arguments = ['test_bot: hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_variant_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us, discord style."""
mock_event = mock.MagicMock()
mock_event.arguments = ['@test_bot hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_bridge_edit(self):
"""Test that we don't learn the speaker's nick when learning a message from the bridge."""
mock_event = mock.MagicMock()
mock_event.arguments = ['<tester> hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
mock_event.source = 'bridge!bridge@localhost'
with mock.patch('markov.lib.learn_line') as mock_learn_line:
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_bridge_and_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us, discord style."""
mock_event = mock.MagicMock()
mock_event.arguments = ['<tester> test_bot: hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
mock_event.source = 'bridge!bridge@localhost'
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_bridge_and_variant_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us, discord style."""
mock_event = mock.MagicMock()
mock_event.arguments = ['<tester> @test_bot hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
mock_event.source = 'bridge!bridge@localhost'
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_autocreate_ircchannel(self):
"""Test that we create the necessary config objects when seeing a target for the first time."""
self.assertEqual(IrcChannel.objects.filter(name='#fakechannel').count(), 0)
context = self.plugin.get_or_create_target_context('#fakechannel')
self.assertEqual(IrcChannel.objects.filter(name='#fakechannel').count(), 1)
self.assertIsNotNone(context)
self.assertIsNotNone(context.markovtarget_set)
self.assertIsNotNone(context.markovtarget_set.all()[0].channel)
self.assertEqual(context.markovtarget_set.all()[0].channel.name, '#fakechannel')
self.assertEqual(context.markovtarget_set.all()[0].name, '#fakechannel')

View File

@ -194,5 +194,6 @@ python_files =
*_tests.py
tests.py
test_*.py
log_level=DEBUG
DJANGO_SETTINGS_MODULE = dr_botzo.settings
django_find_project = false