Markov: improve performance of state transitions

This commit is contained in:
Brian S. Stephan 2014-04-05 22:54:39 -05:00
parent c193b7f4be
commit bf6a43ec9e
1 changed files with 16 additions and 17 deletions

View File

@ -8,6 +8,7 @@ import random
import time
from django.contrib.auth.decorators import permission_required
from django.db.models import Sum
from django.http import HttpResponse
from django.shortcuts import get_object_or_404, render
@ -122,6 +123,7 @@ def _generate_line(context, topics=None, max_words=30):
while len(words) <= max_words and words[-1] != MarkovState._stop:
log.debug(u"looking for '{0:s}','{1:s}'".format(words[i-2], words[i-1]))
new_states = MarkovState.objects.filter(context=context, k1=words[i-2], k2=words[i-1])
log.debug(u"states retrieved")
words.append(_get_word_out_of_states(new_states))
i += 1
@ -167,25 +169,22 @@ def _generate_sentence(context, topics=None, min_words=15, max_words=30):
def _get_word_out_of_states(states, backwards=False):
"""Pick one random word out of the given states."""
running = 0
weighted_words = []
for state in states:
running += state.count
if backwards:
weighted_words.append((running, state.k2))
else:
weighted_words.append((running, state.v))
log.debug(u"{0:s}".format(weighted_words))
hit = random.randint(0, running)
log.debug(u"hit: {0:d}".format(hit))
new_word = ''
for weight, word in weighted_words:
new_word = word
running = 0
count_sum = states.aggregate(Sum('count'))['count__sum']
hit = random.randint(0, count_sum)
log.debug(u"sum: {0:d} hit: {1:d}".format(count_sum, hit))
states_itr = states.iterator()
for state in states_itr:
running += state.count
if running >= hit:
if backwards:
new_word = state.k2
else:
new_word = state.v
if weight >= hit:
break
log.debug(u"found '{0:s}'".format(new_word))