obviously this means all of the modules changed to accomodate. this is one of many steps to reduce the number of times we pass connections and servers and other such info around, when it's mostly unnecessary because modules have a reference to DrBotIRC
81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
"""
|
|
Babelfish - go out to babelfish and translate sentences
|
|
Copyright (C) 2012 Brian S. Stephan
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import re
|
|
import urllib2
|
|
from urllib import urlencode
|
|
|
|
from Module import Module
|
|
|
|
class Babelfish(Module):
|
|
|
|
"""Class that translates text via Babelfish.
|
|
|
|
http://babelfish.yahoo.com/
|
|
"""
|
|
|
|
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
"""Handle IRC input."""
|
|
|
|
match = re.search('^!translate\s+(\S+)\s+(\S+)\s+(.*)$', what)
|
|
if match:
|
|
fromlang = match.group(1)
|
|
tolang = match.group(2)
|
|
text = match.group(3)
|
|
|
|
return self.irc.reply(event, self.translate(fromlang, tolang, text))
|
|
|
|
def translate(self, fromlang, tolang, text):
|
|
|
|
"""Translate text from fromlang to tolang, assuming it's a valid pair."""
|
|
|
|
langpair = '%s_%s' % (fromlang, tolang)
|
|
|
|
# do some text conversion
|
|
text = text.replace('<', '< ') # babelfish blows up on e.g. <text> but < text> is fine
|
|
|
|
url = 'http://babelfish.yahoo.com/translate_txt'
|
|
params = urlencode({'ei':'UTF-8', 'doit':'done', 'fr':'bf-home', 'intl':'1', 'tt':'urltext',
|
|
'trtext':text, 'lp':langpair})
|
|
req = urllib2.Request(url, params)
|
|
req.add_header('Accept-Charset', 'UTF-8,*;q=0.5')
|
|
res = urllib2.urlopen(req)
|
|
content = res.read()
|
|
start_idx = content.find('<div id="result"><div style="padding:')+45
|
|
if start_idx > 0:
|
|
end_idx = content.find('</div>', start_idx)
|
|
translation = content[start_idx:end_idx]
|
|
|
|
# do some text conversion
|
|
translation = translation.replace('"', '"')
|
|
translation = translation.replace('&', '&')
|
|
translation = translation.replace('<', '<')
|
|
translation = translation.replace('>', '>')
|
|
translation = translation.replace(''', '\'')
|
|
translation = translation.replace(''', '\'')
|
|
translation = translation.replace('< ', '<') # crappy attempt at undoing the safety above
|
|
|
|
return translation
|
|
|
|
if __name__ == '__main__':
|
|
babelfish = Babelfish(None, None, None)
|
|
print('\'' + babelfish.translate('en', 'ja', 'i can\'t read it, there aren\'t any words there') + '\'')
|
|
|
|
# vi:tabstop=4:expandtab:autoindent
|
|
# kate: indent-mode python;indent-width 4;replace-tabs on;
|