59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
"""
|
|
import-file-into-markov_chain.py
|
|
Copyright (C) 2011 Brian S. Stephan
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
|
|
parser = argparse.ArgumentParser(description='Import lines into the specified context_id.')
|
|
parser.add_argument('context_id', metavar='CONTEXT', type=int, nargs=1)
|
|
args = parser.parse_args()
|
|
print(args.context_id[0])
|
|
|
|
db = sqlite3.connect('dr.botzo.data')
|
|
db.row_factory = sqlite3.Row
|
|
|
|
cur = db.cursor()
|
|
statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (?, ?, ?, ?)'
|
|
for line in sys.stdin:
|
|
# set up the head of the chain
|
|
w1 = '__start1'
|
|
w2 = '__start2'
|
|
|
|
# for each word pair, add the next word to the dictionary
|
|
for word in line.split():
|
|
try:
|
|
cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), word.decode('utf-8', 'replace'), args.context_id[0]))
|
|
except sqlite3.Error as e:
|
|
db.rollback()
|
|
print("sqlite error: " + str(e))
|
|
raise
|
|
|
|
w1, w2 = w2, word
|
|
|
|
try:
|
|
cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), '__stop', args.context_id[0]))
|
|
db.commit()
|
|
except sqlite3.Error as e:
|
|
db.rollback()
|
|
print("sqlite error: " + str(e))
|
|
raise
|
|
|
|
# vi:tabstop=4:expandtab:autoindent
|