dr.botzo/scripts/import-file-into-markov_chain.py

"""
import-file-into-markov_chain.py
Copyright (C) 2011  Brian S. Stephan

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import argparse
import os
import sqlite3
import sys

parser = argparse.ArgumentParser(description='Import lines into the specified context_id.')
parser.add_argument('context_id', metavar='CONTEXT', type=int, nargs=1)
args = parser.parse_args()
print(args.context_id[0])

db = sqlite3.connect('dr.botzo.data')
db.row_factory = sqlite3.Row

cur = db.cursor()
statement = 'INSERT INTO markov_chain (k1, k2, v, context_id) VALUES (?, ?, ?, ?)'
for line in sys.stdin:
    # set up the head of the chain
    w1 = '__start1'
    w2 = '__start2'

    # for each word pair, add the next word to the dictionary
    for word in line.split():
        try:
            cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), word.decode('utf-8', 'replace'), args.context_id[0]))
        except sqlite3.Error as e:
            db.rollback()
            print("sqlite error: " + str(e))
            raise

        w1, w2 = w2, word

    try:
        cur.execute(statement, (w1.decode('utf-8', 'replace'), w2.decode('utf-8', 'replace'), '__stop', args.context_id[0]))
        db.commit()
    except sqlite3.Error as e:
        db.rollback()
        print("sqlite error: " + str(e))
        raise

# vi:tabstop=4:expandtab:autoindent
No results found.