From bf6a43ec9e201c5df6838904819bb578d6bfaa9b Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 5 Apr 2014 22:54:39 -0500 Subject: [PATCH] Markov: improve performance of state transitions --- markov/views.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/markov/views.py b/markov/views.py index da83050..cc2f1a0 100644 --- a/markov/views.py +++ b/markov/views.py @@ -8,6 +8,7 @@ import random import time from django.contrib.auth.decorators import permission_required +from django.db.models import Sum from django.http import HttpResponse from django.shortcuts import get_object_or_404, render @@ -122,6 +123,7 @@ def _generate_line(context, topics=None, max_words=30): while len(words) <= max_words and words[-1] != MarkovState._stop: log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1])) new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1]) + log.debug(u"states retrieved") words.append(_get_word_out_of_states(new_states)) i += 1 @@ -167,25 +169,22 @@ def _generate_sentence(context, topics=None, min_words=15, max_words=30): def _get_word_out_of_states(states, backwards=False): """Pick one random word out of the given states.""" - running = 0 - weighted_words = [] - for state in states: - running += state.count - if backwards: - weighted_words.append((running, state.k2)) - else: - weighted_words.append((running, state.v)) - - log.debug(u"{0:s}".format(weighted_words)) - - hit = random.randint(0, running) - log.debug(u"hit: {0:d}".format(hit)) - new_word = '' - for weight, word in weighted_words: - new_word = word + running = 0 + count_sum = states.aggregate(Sum('count'))['count__sum'] + hit = random.randint(0, count_sum) + + log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit)) + + states_itr = states.iterator() + for state in states_itr: + running += state.count + if running >= hit: + if backwards: + new_word = state.k2 + else: + new_word = state.v - if weight >= hit: break log.debug(u"found '{0:s}'".format(new_word))