Compare commits

...

9 Commits

15 changed files with 397 additions and 38 deletions

1
.gitignore vendored
View File

@ -15,7 +15,6 @@ megahal.*
dr.botzo.log
dr.botzo.markov
*.facts
*.json
*.log
*.pyc
*.sqlite3

View File

@ -7,6 +7,7 @@ import irc.client
import markov.lib as markovlib
from ircbot.lib import Plugin, reply_destination_for_event
from ircbot.models import IrcChannel
from markov.models import MarkovContext, MarkovTarget
log = logging.getLogger('markov.ircplugin')
@ -40,7 +41,7 @@ class Markov(Plugin):
min_size = 15
max_size = 30
context = markovlib.get_or_create_target_context(target)
context = self.get_or_create_target_context(target)
if match.group(2):
min_size = int(match.group(2))
@ -60,10 +61,10 @@ class Markov(Plugin):
def handle_chatter(self, connection, event):
"""Learn from IRC chatter."""
what = event.arguments[0]
nick = irc.client.NickMask(event.source).nick
who = irc.client.NickMask(event.source).nick
target = reply_destination_for_event(event)
log.debug("what: '%s', nick: '%s', target: '%s'", what, nick, target)
log.debug("what: '%s', who: '%s', target: '%s'", what, who, target)
# check to see whether or not we should learn from this channel
channel = None
if irc.client.is_channel(target):
@ -73,21 +74,29 @@ class Markov(Plugin):
log.debug("not learning from %s as i've been told to ignore it", channel)
else:
# learn the line
learning_what = what
# don't learn the speaker's nick if this came over a bridge
if channel and who == channel.discord_bridge:
learning_what = ' '.join(learning_what.split(' ')[1:])
# remove our own nick and aliases from what we learn
if connection.server_config.additional_addressed_nicks:
all_nicks = '|'.join(connection.server_config.additional_addressed_nicks.split('\n') +
[connection.get_nickname()])
else:
all_nicks = connection.get_nickname()
trimmed_what = re.sub(r'^(({nicks})[:,]|@({nicks}))\s+'.format(nicks=all_nicks), '', what)
learning_what = re.sub(r'^(({nicks})[:,]|@({nicks}))\s+'.format(nicks=all_nicks), '', learning_what)
recursing = getattr(event, 'recursing', False)
if not recursing:
log.debug("learning %s", trimmed_what)
context = markovlib.get_or_create_target_context(target)
markovlib.learn_line(trimmed_what, context)
log.debug("learning %s", learning_what)
context = self.get_or_create_target_context(target)
markovlib.learn_line(learning_what, context)
log.debug("searching '%s' for '%s'", what, all_nicks)
if re.search(all_nicks, what, re.IGNORECASE) is not None:
context = markovlib.get_or_create_target_context(target)
context = self.get_or_create_target_context(target)
addressed_pattern = r'^(({nicks})[:,]|@({nicks}))\s+(?P<addressed_msg>.*)'.format(nicks=all_nicks)
match = re.match(addressed_pattern, what, re.IGNORECASE)
@ -97,7 +106,7 @@ class Markov(Plugin):
topics = [x for x in match.group('addressed_msg').split(' ') if len(x) >= 3]
return self.bot.reply(event, "{0:s}: {1:s}"
"".format(nick, " ".join(markovlib.generate_line(context, topics=topics))))
"".format(who, " ".join(markovlib.generate_line(context, topics=topics))))
else:
# i wasn't addressed directly, so just respond
topics = [x for x in what.split(' ') if len(x) >= 3]
@ -105,5 +114,31 @@ class Markov(Plugin):
return self.bot.reply(event, "{0:s}"
"".format(" ".join(markovlib.generate_line(context, topics=topics))))
def get_or_create_target_context(self, target_name):
"""Return the context for a provided nick/channel, creating missing ones."""
target_name = target_name.lower()
# find the stuff, or create it
try:
target = MarkovTarget.objects.get(name=target_name)
except MarkovTarget.DoesNotExist:
# we need to create a context and a target, and we have to make the context first
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
channel, c = IrcChannel.objects.get_or_create(name=target_name, server=self.connection.server_config)
context, c = MarkovContext.objects.get_or_create(name=target_name)
target, c = MarkovTarget.objects.get_or_create(name=target_name, context=context, channel=channel)
return target.context
try:
return target.context
except MarkovContext.DoesNotExist:
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target.context = context
target.save()
return target.context
plugin = Markov

View File

@ -4,7 +4,7 @@ import random
from django.db.models import Sum
from markov.models import MarkovContext, MarkovState, MarkovTarget
from markov.models import MarkovState
log = logging.getLogger(__name__)
@ -124,32 +124,6 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):
return words
def get_or_create_target_context(target_name):
"""Return the context for a provided nick/channel, creating missing ones."""
target_name = target_name.lower()
# find the stuff, or create it
try:
target = MarkovTarget.objects.get(name=target_name)
except MarkovTarget.DoesNotExist:
# we need to create a context and a target, and we have to make the context first
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target, c = MarkovTarget.objects.get_or_create(name=target_name, context=context)
return target.context
try:
return target.context
except MarkovContext.DoesNotExist:
# make a context --- lacking a good idea, just create one with this target name until configured otherwise
context, c = MarkovContext.objects.get_or_create(name=target_name)
target.context = context
target.save()
return target.context
def get_word_out_of_states(states, backwards=False):
"""Pick one random word out of the given states."""
# work around possible broken data, where a k1,k2 should have a value but doesn't

View File

@ -0,0 +1 @@
"""Management operations for the markov plugin and models."""

View File

@ -0,0 +1 @@
"""Management commands for the markov plugin and models."""

View File

@ -0,0 +1,77 @@
"""Clean up learned chains with speaker nicks (from the bridge) or self (because the bridge broke the regex)."""
from django.core.management import BaseCommand
from ircbot.models import IrcChannel
from markov.models import MarkovContext, MarkovState
class Command(BaseCommand):
"""Find markov chains that erroneously have speaker/self nicks and remove them."""
def handle(self, *args, **kwargs):
"""Scan the DB, looking for bad chains, and repair them."""
candidate_channels = IrcChannel.objects.exclude(discord_bridge='')
markov_contexts = MarkovContext.objects.filter(markovtarget__name__in=list(candidate_channels))
for context in markov_contexts:
self.stdout.write(self.style.NOTICE(f"scanning context {context}..."))
# get starting states that look like they came over the bridge
bridge_states = context.states.filter(k1=MarkovState._start1, k2=MarkovState._start2,
v__regex=r'<.*>')
self._chain_remover(context, bridge_states)
def _chain_remover(self, context, start_states):
"""Remove a given k from markov states, deleting the found states after rebuilding subsequent states.
As in, if trying to remove A,B -> X, then B,X -> C and X,C -> D must be rebuilt (A,B -> C / B,C -> D)
then the three states with X deleted.
"""
for start_state in start_states:
self.stdout.write(self.style.NOTICE(f" diving into {start_state}..."))
# find the states that build off of the start
second_states = context.states.filter(k1=start_state.k2, k2=start_state.v)
for second_state in second_states:
self.stdout.write(self.style.NOTICE(f" diving into {second_state}..."))
# find the third states
leaf_states = context.states.filter(k1=second_state.k2, k2=second_state.v)
for leaf_state in leaf_states:
self.stdout.write(self.style.NOTICE(f" upserting state based on {leaf_state}"))
# get/update state without the nick from the bridge
try:
updated_leaf = MarkovState.objects.get(k1=second_state.k1, k2=leaf_state.k2, v=leaf_state.v)
updated_leaf.count += leaf_state.count
updated_leaf.save()
self.stdout.write(self.style.SUCCESS(f" updated count for {updated_leaf}"))
except MarkovState.DoesNotExist:
new_leaf = MarkovState.objects.create(k1=second_state.k1, k2=leaf_state.k2, v=leaf_state.v,
context=context)
new_leaf.count = leaf_state.count
new_leaf.save()
self.stdout.write(self.style.SUCCESS(f" created {new_leaf}"))
# remove the migrated leaf state
self.stdout.write(self.style.SUCCESS(f" deleting {leaf_state}"))
leaf_state.delete()
# take care of the new middle state
self.stdout.write(self.style.NOTICE(f" upserting state based on {second_state}"))
try:
updated_second = MarkovState.objects.get(k1=start_state.k1, k2=start_state.k2, v=second_state.v)
updated_second.count += second_state.count
updated_second.save()
self.stdout.write(self.style.SUCCESS(f" updated count for {updated_second}"))
except MarkovState.DoesNotExist:
new_second = MarkovState.objects.create(k1=start_state.k1, k2=start_state.k2, v=second_state.v,
context=context)
new_second.count = second_state.count
new_second.save()
self.stdout.write(self.style.SUCCESS(f" created {new_second}"))
# remove the migrated second state
self.stdout.write(self.style.SUCCESS(f" deleting {second_state}"))
second_state.delete()
# remove the dead end original start
self.stdout.write(self.style.SUCCESS(f" deleting {start_state}"))
start_state.delete()

View File

@ -0,0 +1,19 @@
# Generated by Django 3.2.18 on 2023-02-19 19:00
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('markov', '0003_auto_20161112_2348'),
]
operations = [
migrations.AlterField(
model_name='markovstate',
name='context',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='states', to='markov.markovcontext'),
),
]

View File

@ -0,0 +1,20 @@
# Generated by Django 3.2.18 on 2023-02-20 00:09
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('ircbot', '0019_ircchannel_discord_bridge'),
('markov', '0004_alter_markovstate_context'),
]
operations = [
migrations.AddField(
model_name='markovtarget',
name='channel',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='ircbot.ircchannel'),
),
]

View File

@ -0,0 +1,24 @@
"""Generated by Django 3.2.18 on 2023-02-19 23:15."""
from django.db import migrations
def link_markovcontext_to_ircchannel(apps, schema_editor):
"""Link the markov targets to a hopefully matching channel, by name."""
IrcChannel = apps.get_model('ircbot', 'IrcChannel')
MarkovTarget = apps.get_model('markov', 'MarkovTarget')
for target in MarkovTarget.objects.all():
channel = IrcChannel.objects.get(name=target.name)
target.channel = channel
target.save()
class Migration(migrations.Migration):
"""Populate the markov target to IRC channel link."""
dependencies = [
('markov', '0005_markovtarget_channel'),
]
operations = [
migrations.RunPython(link_markovcontext_to_ircchannel)
]

View File

@ -0,0 +1,20 @@
# Generated by Django 3.2.18 on 2023-02-20 00:11
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('ircbot', '0019_ircchannel_discord_bridge'),
('markov', '0006_link_markovtarget_to_ircchannel'),
]
operations = [
migrations.AlterField(
model_name='markovtarget',
name='channel',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='ircbot.ircchannel'),
),
]

View File

@ -3,6 +3,8 @@ import logging
from django.db import models
from ircbot.models import IrcChannel
log = logging.getLogger(__name__)
@ -21,6 +23,7 @@ class MarkovTarget(models.Model):
name = models.CharField(max_length=200, unique=True)
context = models.ForeignKey(MarkovContext, on_delete=models.CASCADE)
channel = models.ForeignKey(IrcChannel, on_delete=models.CASCADE)
chatter_chance = models.IntegerField(default=0)
@ -41,7 +44,7 @@ class MarkovState(models.Model):
v = models.CharField(max_length=128)
count = models.IntegerField(default=0)
context = models.ForeignKey(MarkovContext, on_delete=models.CASCADE)
context = models.ForeignKey(MarkovContext, on_delete=models.CASCADE, related_name='states')
class Meta:
"""Options for the model itself."""

36
tests/fixtures/irc_server_fixture.json vendored Normal file
View File

@ -0,0 +1,36 @@
[
{
"model": "ircbot.ircserver",
"pk": 1,
"fields": {
"name": "Localhost",
"hostname": "localhost",
"port": 6697,
"password": null,
"nickname": "test_bot",
"realname": "test_bot",
"additional_addressed_nicks": "",
"use_ssl": true,
"use_ipv6": true,
"post_connect": "",
"delay_before_joins": 5,
"xmlrpc_host": "localhost",
"xmlrpc_port": 13132,
"replace_irc_control_with_markdown": false
}
},
{
"model": "ircbot.ircchannel",
"pk": 1,
"fields": {
"name": "#test",
"server": 1,
"autojoin": true,
"topic_msg": "",
"topic_time": "2022-09-09T18:28:29Z",
"topic_by": "",
"markov_learn_from_channel": true,
"discord_bridge": "bridge"
}
}
]

19
tests/fixtures/markov_fixture.json vendored Normal file
View File

@ -0,0 +1,19 @@
[
{
"model": "markov.markovcontext",
"pk": 1,
"fields": {
"name": "#factory"
}
},
{
"model": "markov.markovtarget",
"pk": 1,
"fields": {
"name": "#factory",
"context": 1,
"channel": 71,
"chatter_chance": 0
}
}
]

View File

@ -0,0 +1,114 @@
"""Test IRC behavior of the markov plugin."""
from unittest import mock
from django.test import TestCase
from ircbot.models import IrcChannel, IrcServer
from markov.ircplugin import Markov
class MarkovTestCase(TestCase):
"""Test the markov plugin."""
fixtures = ['tests/fixtures/irc_server_fixture.json']
def setUp(self):
"""Create common objects."""
self.mock_bot = mock.MagicMock()
self.mock_connection = mock.MagicMock()
self.mock_connection.get_nickname.return_value = 'test_bot'
self.mock_connection.server_config = IrcServer.objects.get(pk=1)
self.plugin = Markov(self.mock_bot, self.mock_connection, mock.MagicMock())
def test_learn(self):
"""Test that an IRC event triggers learning as expected."""
mock_event = mock.MagicMock()
mock_event.arguments = ['hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
with mock.patch('markov.lib.learn_line') as mock_learn_line:
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us."""
mock_event = mock.MagicMock()
mock_event.arguments = ['test_bot: hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_variant_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us, discord style."""
mock_event = mock.MagicMock()
mock_event.arguments = ['@test_bot hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_bridge_edit(self):
"""Test that we don't learn the speaker's nick when learning a message from the bridge."""
mock_event = mock.MagicMock()
mock_event.arguments = ['<tester> hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
mock_event.source = 'bridge!bridge@localhost'
with mock.patch('markov.lib.learn_line') as mock_learn_line:
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_bridge_and_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us, discord style."""
mock_event = mock.MagicMock()
mock_event.arguments = ['<tester> test_bot: hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
mock_event.source = 'bridge!bridge@localhost'
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_learn_bridge_and_variant_self_edit(self):
"""Test that we don't learn our own name when learning something addressed to us, discord style."""
mock_event = mock.MagicMock()
mock_event.arguments = ['<tester> @test_bot hello this is a test message']
mock_event.target = '#test'
mock_event.recursing = False
mock_event.source = 'bridge!bridge@localhost'
with mock.patch('markov.lib.learn_line') as mock_learn_line:
with mock.patch('markov.lib.generate_line'):
self.plugin.handle_chatter(self.mock_connection, mock_event)
self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
def test_autocreate_ircchannel(self):
"""Test that we create the necessary config objects when seeing a target for the first time."""
self.assertEqual(IrcChannel.objects.filter(name='#fakechannel').count(), 0)
context = self.plugin.get_or_create_target_context('#fakechannel')
self.assertEqual(IrcChannel.objects.filter(name='#fakechannel').count(), 1)
self.assertIsNotNone(context)
self.assertIsNotNone(context.markovtarget_set)
self.assertIsNotNone(context.markovtarget_set.all()[0].channel)
self.assertEqual(context.markovtarget_set.all()[0].channel.name, '#fakechannel')
self.assertEqual(context.markovtarget_set.all()[0].name, '#fakechannel')

17
tests/test_markov_lib.py Normal file
View File

@ -0,0 +1,17 @@
"""Test markov utility methods."""
from django.test import TestCase
from markov.lib import get_word_out_of_states, learn_line
from markov.models import MarkovContext, MarkovState
class MarkovLibTestCase(TestCase):
"""Test library methods used by the Markov plugin."""
fixtures = ['tests/fixtures/irc_server_fixture.json', 'tests/fixtures/markov_fixture.json']
def test_learn_and_get(self):
"""Test that we can learn some lines and get a word back."""
learn_line("the elephant goes ERRRRRRRRRRRRR", MarkovContext.objects.get(pk=1))
word = get_word_out_of_states(MarkovState.objects.all())
self.assertIn(word, ['the', 'elephant', 'goes', 'ERRRRRRRRRRRRR', '__stop'])