diff --git a/scripts/import-file-into-markov_chain.py b/scripts/import-file-into-markov_chain.py new file mode 100644 index 0000000..9657222 --- /dev/null +++ b/scripts/import-file-into-markov_chain.py @@ -0,0 +1,53 @@ +""" +import-file-into-markov_chain.py +Copyright (C) 2011 Brian S. Stephan + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +""" + +import fileinput +import os +import sqlite3 +import sys + +db = sqlite3.connect('dr.botzo.data') +db.row_factory = sqlite3.Row + +cur = db.cursor() +statement = 'INSERT INTO markov_chain (k1, k2, v) VALUES (?, ?, ?)' +for line in fileinput.input(): + # set up the head of the chain + w1 = '__start1' + w2 = '__start2' + + # for each word pair, add the next word to the dictionary + for word in line.split(): + try: + cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), word.decode('utf-8', 'replace').lower())) + except sqlite3.Error as e: + db.rollback() + print("sqlite error: " + str(e)) + raise + + w1, w2 = w2, word + + try: + cur.execute(statement, (w1.decode('utf-8', 'replace').lower(), w2.decode('utf-8', 'replace').lower(), '__stop')) + db.commit() + except sqlite3.Error as e: + db.rollback() + print("sqlite error: " + str(e)) + raise + +# vi:tabstop=4:expandtab:autoindent