the module will drop your old tables if you have them, so if there's data there, be sure to back them up and figure out some migration strategy (probably annoying and probably having to script it). the big change is that each line is associated to a context now, and channels are also associated to contexts. this should allow for a better partitioning of multiple brains, and changing which channels point to which brain. also caught in the wake is some additional logging verbosity, and a change to no longer lower() everything learned. the script to dump a file into the database has also been updated with the above changes
59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
"""
|
|
import-file-into-markov_chain.py
|
|
Copyright (C) 2011 Brian S. Stephan
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
|
|
parser = argparse.ArgumentParser(description='Import lines into the specified context_id.')
|
|
parser.add_argument('context_id', metavar='CONTEXT', type=int, nargs=1)
|
|
args = parser.parse_args()
|
|
print(args.context_id[0])
|
|
|
|
db = sqlite3.connect('dr.botzo.data')
|
|
db.row_factory = sqlite3.Row
|
|
|
|
cur = db.cursor()
|
|
statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (?, ?, ?, ?)'
|
|
for line in sys.stdin:
|
|
# set up the head of the chain
|
|
w1 = '__start1'
|
|
w2 = '__start2'
|
|
|
|
# for each word pair, add the next word to the dictionary
|
|
for word in line.split():
|
|
try:
|
|
cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), word.decode('utf-8', 'replace'), args.context_id[0]))
|
|
except sqlite3.Error as e:
|
|
db.rollback()
|
|
print("sqlite error: " + str(e))
|
|
raise
|
|
|
|
w1, w2 = w2, word
|
|
|
|
try:
|
|
cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), '__stop', args.context_id[0]))
|
|
db.commit()
|
|
except sqlite3.Error as e:
|
|
db.rollback()
|
|
print("sqlite error: " + str(e))
|
|
raise
|
|
|
|
# vi:tabstop=4:expandtab:autoindent
|