Markov: rebuild the tables, use the context stuff in a better fashion this time

the module will drop your old tables if you have them, so if there's data there,
be sure to back them up and figure out some migration strategy (probably annoying
and probably having to script it).

the big change is that each line is associated to a context now, and channels
are also associated to contexts. this should allow for a better partitioning
of multiple brains, and changing which channels point to which brain.

also caught in the wake is some additional logging verbosity, and a change to
no longer lower() everything learned.

the script to dump a file into the database has also been updated with the above
changes
This commit is contained in:
2012-02-28 23:23:14 -06:00
parent 79ddce0bcb
commit 26bc8bec34
2 changed files with 102 additions and 145 deletions

View File

@@ -21,16 +21,16 @@ import os
import sqlite3
import sys
parser = argparse.ArgumentParser(description='Import lines into the specified context.')
parser.add_argument('context', metavar='CONTEXT', type=str, nargs=1)
parser = argparse.ArgumentParser(description='Import lines into the specified context_id.')
parser.add_argument('context_id', metavar='CONTEXT', type=int, nargs=1)
args = parser.parse_args()
print(args.context[0])
print(args.context_id[0])
db = sqlite3.connect('dr.botzo.data')
db.row_factory = sqlite3.Row
cur = db.cursor()
statement = 'INSERT INTO markov_chain (k1, k2, v, context) VALUES (?, ?, ?, ?)'
statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (?, ?, ?, ?)'
for line in sys.stdin:
# set up the head of the chain
w1 = '__start1'
@@ -39,7 +39,7 @@ for line in sys.stdin:
# for each word pair, add the next word to the dictionary
for word in line.split():
try:
cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower(), args.context[0]))
cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), word.decode('utf-8', 'replace'), args.context_id[0]))
except sqlite3.Error as e:
db.rollback()
print("sqlite error: " + str(e))
@@ -48,7 +48,7 @@ for line in sys.stdin:
w1, w2 = w2, word
try:
cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), '__stop', args.context[0]))
cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), '__stop', args.context_id[0]))
db.commit()
except sqlite3.Error as e:
db.rollback()