update markov chain import script for always using a context, specified on command line

also read stdin rather than a file for lines
This commit is contained in:
Brian S. Stephan 2011-06-15 20:40:24 -05:00
parent a8031909b4
commit 74c03cff88
1 changed files with 10 additions and 5 deletions

View File

@ -16,17 +16,22 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
""" """
import fileinput import argparse
import os import os
import sqlite3 import sqlite3
import sys import sys
parser = argparse.ArgumentParser(description='Import lines into the specified context.')
parser.add_argument('context', metavar='CONTEXT', type=str, nargs=1)
args = parser.parse_args()
print(args.context[0])
db = sqlite3.connect('dr.botzo.data') db = sqlite3.connect('dr.botzo.data')
db.row_factory = sqlite3.Row db.row_factory = sqlite3.Row
cur = db.cursor() cur = db.cursor()
statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)' statement = 'INSERT INTO markov_chain (k1, k2, v, context) VALUES (?, ?, ?, ?)'
for line in fileinput.input(): for line in sys.stdin:
# set up the head of the chain # set up the head of the chain
w1 = '__start1' w1 = '__start1'
w2 = '__start2' w2 = '__start2'
@ -34,7 +39,7 @@ for line in fileinput.input():
# for each word pair, add the next word to the dictionary # for each word pair, add the next word to the dictionary
for word in line.split(): for word in line.split():
try: try:
cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower())) cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower(), args.context[0]))
except sqlite3.Error as e: except sqlite3.Error as e:
db.rollback() db.rollback()
print("sqlite error: " + str(e)) print("sqlite error: " + str(e))
@ -43,7 +48,7 @@ for line in fileinput.input():
w1, w2 = w2, word w1, w2 = w2, word
try: try:
cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), '__stop')) cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), '__stop', args.context[0]))
db.commit() db.commit()
except sqlite3.Error as e: except sqlite3.Error as e:
db.rollback() db.rollback()