Markov: cache the first word in markov chains
this eliminates the expensive database hit on every request for a line. the cache is loaded when the module loads and learning new lines should add the appropriate word to the list. seemed like a pretty good compromise
This commit is contained in:
		
							parent
							
								
									3d6ede5155
								
							
						
					
					
						commit
						87073d7fd3
					
				@ -59,6 +59,9 @@ class Markov(Module):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        Module.__init__(self, irc, config, server)
 | 
					        Module.__init__(self, irc, config, server)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # load the existing chain starts from the database
 | 
				
			||||||
 | 
					        self.starts = self._get_chain_beginnings()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def db_init(self):
 | 
					    def db_init(self):
 | 
				
			||||||
        """Create the markov chain table."""
 | 
					        """Create the markov chain table."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -183,12 +186,15 @@ class Markov(Module):
 | 
				
			|||||||
        k1 = self.start1
 | 
					        k1 = self.start1
 | 
				
			||||||
        k2 = self.start2
 | 
					        k2 = self.start2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        words = line.split()
 | 
				
			||||||
 | 
					        self.starts.append(words[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            db = self.get_db()
 | 
					            db = self.get_db()
 | 
				
			||||||
            cur = db.cursor()
 | 
					            cur = db.cursor()
 | 
				
			||||||
            statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
 | 
					            statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for word in line.split():
 | 
					            for word in words:
 | 
				
			||||||
                cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
 | 
					                cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower()))
 | 
				
			||||||
                k1, k2 = k2, word
 | 
					                k1, k2 = k2, word
 | 
				
			||||||
            cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
 | 
					            cur.execute(statement, (k1.decode('utf-8', 'replace').lower(), k2.decode('utf-8', 'replace').lower(), self.stop))
 | 
				
			||||||
@ -211,7 +217,7 @@ class Markov(Module):
 | 
				
			|||||||
            raise Exception("min_size is too large: %d" % min_size)
 | 
					            raise Exception("min_size is too large: %d" % min_size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # start with an empty chain, and work from there
 | 
					        # start with an empty chain, and work from there
 | 
				
			||||||
        gen_words = [self.start1, self.start2]
 | 
					        gen_words = [self.start1, self.start2, random.choice(self.starts)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # set up the number of times we've tried to hit the specified minimum
 | 
					        # set up the number of times we've tried to hit the specified minimum
 | 
				
			||||||
        min_search_tries = 0
 | 
					        min_search_tries = 0
 | 
				
			||||||
@ -262,7 +268,7 @@ class Markov(Module):
 | 
				
			|||||||
        target_word = words[random.randint(0, len(words)-1)]
 | 
					        target_word = words[random.randint(0, len(words)-1)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # start with an empty chain, and work from there
 | 
					        # start with an empty chain, and work from there
 | 
				
			||||||
        gen_words = [self.start1, self.start2]
 | 
					        gen_words = [self.start1, self.start2, random.choice(self.starts)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # walk a chain, randomly, building the list of words
 | 
					        # walk a chain, randomly, building the list of words
 | 
				
			||||||
        while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
 | 
					        while len(gen_words) < max_size + 2 and gen_words[-1] != self.stop:
 | 
				
			||||||
@ -305,5 +311,23 @@ class Markov(Module):
 | 
				
			|||||||
            print('sqlite error: ' + str(e))
 | 
					            print('sqlite error: ' + str(e))
 | 
				
			||||||
            raise
 | 
					            raise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _get_chain_beginnings(self):
 | 
				
			||||||
 | 
					        """Get all of the first (real) words in the brain."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        values = []
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            db = self.get_db()
 | 
				
			||||||
 | 
					            query = 'SELECT v FROM markov_chain WHERE k1 = "__start1" AND k2 = "__start2"'
 | 
				
			||||||
 | 
					            cursor = db.execute(query)
 | 
				
			||||||
 | 
					            results = cursor.fetchall()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for result in results:
 | 
				
			||||||
 | 
					                values.append(result['v'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            return values
 | 
				
			||||||
 | 
					        except sqlite3.Error as e:
 | 
				
			||||||
 | 
					            print('sqlite error: ' + str(e))
 | 
				
			||||||
 | 
					            raise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# vi:tabstop=4:expandtab:autoindent
 | 
					# vi:tabstop=4:expandtab:autoindent
 | 
				
			||||||
# kate: indent-mode python;indent-width 4;replace-tabs on;
 | 
					# kate: indent-mode python;indent-width 4;replace-tabs on;
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user