test the ability to not learn our nick when addressed

remove the speaker from messages coming over the bridge when learning
variable tweak to match other plugins (nick -> who)
2023-02-18 20:01:58 -06:00 · 2023-02-18 20:01:58 -06:00 · 2023-02-18 20:01:58 -06:00 · 2023-02-18 20:01:58 -06:00 · 2023-02-18 20:01:58 -06:00 · 2023-02-18 20:01:58 -06:00
6 changed files with 124 additions and 30 deletions
--- a/ircbot/migrations/0019_ircchannel_discord_bridge.py
+++ b/ircbot/migrations/0019_ircchannel_discord_bridge.py
@ -0,0 +1,18 @@
+# Generated by Django 3.2.18 on 2023-02-16 22:38
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('ircbot', '0018_ircserver_replace_irc_control_with_markdown'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='ircchannel',
+            name='discord_bridge',
+            field=models.CharField(blank=True, default='', max_length=32),
+        ),
+    ]
--- a/ircbot/models.py
+++ b/ircbot/models.py
@ -104,6 +104,8 @@ class IrcChannel(models.Model):

    markov_learn_from_channel = models.BooleanField(default=True)

+    discord_bridge = models.CharField(default='', max_length=32, blank=True)
+
    class Meta:
        """Settings for the model."""

--- a/markov/ircplugin.py
+++ b/markov/ircplugin.py
@ -60,15 +60,10 @@ class Markov(Plugin):
    def handle_chatter(self, connection, event):
        """Learn from IRC chatter."""
        what = event.arguments[0]
-        if connection.server_config.additional_addressed_nicks:
-            all_nicks = '|'.join(connection.server_config.additional_addressed_nicks.split('\n') +
-                                 [connection.get_nickname()])
-        else:
-            all_nicks = connection.get_nickname()
-        trimmed_what = re.sub(r'^(({nicks})[:,]|@({nicks}))\s+'.format(nicks=all_nicks), '', what)
-        nick = irc.client.NickMask(event.source).nick
+        who = irc.client.NickMask(event.source).nick
        target = reply_destination_for_event(event)

+        log.debug("what: '%s', who: '%s', target: '%s'", what, who, target)
        # check to see whether or not we should learn from this channel
        channel = None
        if irc.client.is_channel(target):
@ -78,11 +73,23 @@ class Markov(Plugin):
            log.debug("not learning from %s as i've been told to ignore it", channel)
        else:
            # learn the line
+            # remove our own nick and aliases from what we learn
+            if connection.server_config.additional_addressed_nicks:
+                all_nicks = '|'.join(connection.server_config.additional_addressed_nicks.split('\n') +
+                                     [connection.get_nickname()])
+            else:
+                all_nicks = connection.get_nickname()
+            what = re.sub(r'^(({nicks})[:,]|@({nicks}))\s+'.format(nicks=all_nicks), '', what)
+
+            # don't learn the speaker's nick if this came over a bridge
+            if channel and who == channel.discord_bridge:
+                what = ' '.join(what.split(' ')[1:])
+
            recursing = getattr(event, 'recursing', False)
            if not recursing:
-                log.debug("learning %s", trimmed_what)
+                log.debug("learning %s", what)
                context = markovlib.get_or_create_target_context(target)
-                markovlib.learn_line(trimmed_what, context)
+                markovlib.learn_line(what, context)

        log.debug("searching '%s' for '%s'", what, all_nicks)
        if re.search(all_nicks, what, re.IGNORECASE) is not None:
@ -96,7 +103,7 @@ class Markov(Plugin):
                topics = [x for x in match.group('addressed_msg').split(' ') if len(x) >= 3]

                return self.bot.reply(event, "{0:s}: {1:s}"
-                                      "".format(nick, " ".join(markovlib.generate_line(context, topics=topics))))
+                                      "".format(who, " ".join(markovlib.generate_line(context, topics=topics))))
            else:
                # i wasn't addressed directly, so just respond
                topics = [x for x in what.split(' ') if len(x) >= 3]
--- a/markov/lib.py
+++ b/markov/lib.py
@ -1,3 +1,4 @@
+"""Provide methods for manipulating markov chain processing."""
 import logging
 import random

@ -5,13 +6,11 @@ from django.db.models import Sum

 from markov.models import MarkovContext, MarkovState, MarkovTarget

-
-log = logging.getLogger('markov.lib')
+log = logging.getLogger(__name__)


 def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bias=2, max_tries=5):
-    """String multiple sentences together into a coherent sentence."""
-
+    """Combine multiple sentences together into a coherent sentence."""
    tries = 0
    line = []
    min_words_per_sentence = min_words / sentence_bias
@ -23,7 +22,7 @@ def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bia
        else:
            if len(line) > 0:
                if line[-1][-1] not in [',', '.', '!', '?', ':']:
-                    line[-1] += random.choice(['?', '.', '!'])
+                    line[-1] += random.SystemRandom().choice(['?', '.', '!'])

        tries += 1

@ -33,7 +32,6 @@ def generate_line(context, topics=None, min_words=15, max_words=30, sentence_bia

 def generate_longish_sentence(context, topics=None, min_words=15, max_words=30, max_tries=100):
    """Generate a Markov chain, but throw away the short ones unless we get desperate."""
-
    sent = ""
    tries = 0
    while tries < max_tries:
@ -52,20 +50,19 @@ def generate_longish_sentence(context, topics=None, min_words=15, max_words=30,

 def generate_sentence(context, topics=None, min_words=15, max_words=30):
    """Generate a Markov chain."""
-
    words = []
    # if we have topics, try to work from it and work backwards
    if topics:
-        topic_word = random.choice(topics)
+        topic_word = random.SystemRandom().choice(topics)
        topics.remove(topic_word)
-        log.debug("looking for topic '{0:s}'".format(topic_word))
+        log.debug("looking for topic '%s'", topic_word)
        new_states = MarkovState.objects.filter(context=context, v=topic_word)

        if len(new_states) > 0:
-            log.debug("found '{0:s}', starting backwards".format(topic_word))
+            log.debug("found '%s', starting backwards", topic_word)
            words.insert(0, topic_word)
            while len(words) <= max_words and words[0] != MarkovState._start2:
-                log.debug("looking backwards for '{0:s}'".format(words[0]))
+                log.debug("looking backwards for '%s'", words[0])
                new_states = MarkovState.objects.filter(context=context, v=words[0])
                # if we find a start, use it
                if MarkovState._start2 in new_states:
@ -87,7 +84,7 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):

    i = len(words)
    while words[-1] != MarkovState._stop:
-        log.debug("looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
+        log.debug("looking for '%s','%s'", words[i-2], words[i-1])
        new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
        log.debug("states retrieved")

@ -103,7 +100,7 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):
            words.append(MarkovState._stop)
        elif len(target_hits) > 0:
            # if there's a target word in the states, pick it
-            target_hit = random.choice(target_hits)
+            target_hit = random.SystemRandom().choice(target_hits)
            log.debug("found a topic hit %s, using it", target_hit)
            topics.remove(target_hit)
            words.append(target_hit)
@ -129,7 +126,6 @@ def generate_sentence(context, topics=None, min_words=15, max_words=30):

 def get_or_create_target_context(target_name):
    """Return the context for a provided nick/channel, creating missing ones."""
-
    target_name = target_name.lower()

    # find the stuff, or create it
@ -156,7 +152,6 @@ def get_or_create_target_context(target_name):

 def get_word_out_of_states(states, backwards=False):
    """Pick one random word out of the given states."""
-
    # work around possible broken data, where a k1,k2 should have a value but doesn't
    if len(states) == 0:
        states = MarkovState.objects.filter(v=MarkovState._stop)
@ -168,9 +163,9 @@ def get_word_out_of_states(states, backwards=False):
        # this being None probably means there's no data for this context
        raise ValueError("no markov states to generate from")

-    hit = random.randint(0, count_sum)
+    hit = sysrand.randint(0, count_sum)

-    log.debug("sum: {0:d} hit: {1:d}".format(count_sum, hit))
+    log.debug("sum: %s hit: %s", count_sum, hit)

    states_itr = states.iterator()
    for state in states_itr:
@ -183,13 +178,12 @@ def get_word_out_of_states(states, backwards=False):

            break

-    log.debug("found '{0:s}'".format(new_word))
+    log.debug("found '%s'", new_word)
    return new_word


 def learn_line(line, context):
    """Create a bunch of MarkovStates for a given line of text."""
-
    log.debug("learning %s...", line[:40])

    words = line.split()
@ -200,7 +194,7 @@ def learn_line(line, context):
            return

    for i, word in enumerate(words):
-        log.debug("'{0:s}','{1:s}' -> '{2:s}'".format(words[i], words[i+1], words[i+2]))
+        log.debug("'%s','%s' -> '%s'", words[i], words[i+1], words[i+2])
        state, created = MarkovState.objects.get_or_create(context=context,
                                                           k1=words[i],
                                                           k2=words[i+1],
--- a/tests/test_markov_ircplugin.py
+++ b/tests/test_markov_ircplugin.py
@ -0,0 +1,72 @@
+"""Test IRC behavior of the markov plugin."""
+from unittest import mock
+
+from django.test import TestCase
+
+from ircbot.models import IrcServer
+from markov.ircplugin import Markov
+
+
+class MarkovTestCase(TestCase):
+    """Test the markov plugin."""
+
+    fixtures = ['tests/fixtures/irc_server_fixture.json']
+
+    def setUp(self):
+        """Create common objects."""
+        self.mock_bot = mock.MagicMock()
+        self.mock_connection = mock.MagicMock()
+
+        self.mock_connection.get_nickname.return_value = 'test_bot'
+        self.mock_connection.server_config = IrcServer.objects.get(pk=1)
+
+        self.plugin = Markov(self.mock_bot, self.mock_connection, mock.MagicMock())
+
+    def test_learn(self):
+        """Test that an IRC event triggers learning as expected."""
+        mock_event = mock.MagicMock()
+        mock_event.arguments = ['hello this is a test message']
+        mock_event.target = '#test'
+        mock_event.recursing = False
+
+        with mock.patch('markov.lib.learn_line') as mock_learn_line:
+            self.plugin.handle_chatter(self.mock_connection, mock_event)
+
+        self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
+
+    def test_learn_self_edit(self):
+        """Test that we don't learn our own name when learning something addressed to us."""
+        mock_event = mock.MagicMock()
+        mock_event.arguments = ['test_bot: hello this is a test message']
+        mock_event.target = '#test'
+        mock_event.recursing = False
+
+        with mock.patch('markov.lib.learn_line') as mock_learn_line:
+            self.plugin.handle_chatter(self.mock_connection, mock_event)
+
+        self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
+
+    def test_learn_variant_self_edit(self):
+        """Test that we don't learn our own name when learning something addressed to us, discord style."""
+        mock_event = mock.MagicMock()
+        mock_event.arguments = ['@test_bot hello this is a test message']
+        mock_event.target = '#test'
+        mock_event.recursing = False
+
+        with mock.patch('markov.lib.learn_line') as mock_learn_line:
+            self.plugin.handle_chatter(self.mock_connection, mock_event)
+
+        self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
+
+    def test_learn_bridge_edit(self):
+        """Test that we don't learn the speaker's nick when learning a message from the bridge."""
+        mock_event = mock.MagicMock()
+        mock_event.arguments = ['<tester> hello this is a test message']
+        mock_event.target = '#test'
+        mock_event.recursing = False
+        mock_event.source = 'bridge!bridge@localhost'
+
+        with mock.patch('markov.lib.learn_line') as mock_learn_line:
+            self.plugin.handle_chatter(self.mock_connection, mock_event)
+
+        self.assertEqual(mock_learn_line.call_args.args[0], 'hello this is a test message')
--- a/tox.ini
+++ b/tox.ini
@ -194,5 +194,6 @@ python_files =
    *_tests.py
    tests.py
    test_*.py
+log_level=DEBUG
 DJANGO_SETTINGS_MODULE = dr_botzo.settings
 django_find_project = false
Author	SHA1	Message	Date
Brian S. Stephan	cf648cd555	test the ability to not learn our nick when addressed	2023-02-18 20:01:58 -06:00
Brian S. Stephan	c67b56ee5e	remove the speaker from messages coming over the bridge when learning	2023-02-18 20:01:58 -06:00
Brian S. Stephan	b9c2a96231	variable tweak to match other plugins (nick -> who)	2023-02-18 20:01:58 -06:00
Brian S. Stephan	ff5e95a53e	add test to confirm markov irc plugin behavior	2023-02-18 20:01:58 -06:00
Brian S. Stephan	3468622aa5	set pytest settings to aid testing	2023-02-18 20:01:58 -06:00
Brian S. Stephan	e05d0a1a1c	don't build trimmed_what until we know not to ignore chatter	2023-02-18 20:01:58 -06:00
Brian S. Stephan	dfda9d8c71	add discord bridge field to the channel model will be used in a future change to clean up markov chains	2023-02-18 20:01:58 -06:00
Brian S. Stephan	4c89989b8e	linter fixes for markov library methods	2023-02-18 20:01:48 -06:00