81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
"""
|
|
Babelfish - go out to babelfish and translate sentences
|
|
Copyright (C) 2012 Brian S. Stephan
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import re
|
|
import urllib2
|
|
from urllib import urlencode
|
|
|
|
from Module import Module
|
|
|
|
class Babelfish(Module):
|
|
|
|
"""Class that translates text via Babelfish.
|
|
|
|
http://babelfish.yahoo.com/
|
|
"""
|
|
|
|
def do(self, connection, event, nick, userhost, what, admin_unlocked):
|
|
"""Handle IRC input."""
|
|
|
|
match = re.search('^!translate\s+(\S+)\s+(\S+)\s+(.*)$', what)
|
|
if match:
|
|
fromlang = match.group(1)
|
|
tolang = match.group(2)
|
|
text = match.group(3)
|
|
|
|
return self.reply(connection, event, self.translate(fromlang, tolang, text))
|
|
|
|
def translate(self, fromlang, tolang, text):
|
|
|
|
"""Translate text from fromlang to tolang, assuming it's a valid pair."""
|
|
|
|
langpair = '%s_%s' % (fromlang, tolang)
|
|
|
|
# do some text conversion
|
|
text = text.replace('<', '< ') # babelfish blows up on e.g. <text> but < text> is fine
|
|
|
|
url = 'http://babelfish.yahoo.com/translate_txt'
|
|
params = urlencode({'ei':'UTF-8', 'doit':'done', 'fr':'bf-home', 'intl':'1', 'tt':'urltext',
|
|
'trtext':text, 'lp':langpair})
|
|
req = urllib2.Request(url, params)
|
|
req.add_header('Accept-Charset', 'UTF-8,*;q=0.5')
|
|
res = urllib2.urlopen(req)
|
|
content = res.read()
|
|
start_idx = content.find('<div id="result"><div style="padding:')+45
|
|
if start_idx > 0:
|
|
end_idx = content.find('</div>', start_idx)
|
|
translation = content[start_idx:end_idx]
|
|
|
|
# do some text conversion
|
|
translation = translation.replace('"', '"')
|
|
translation = translation.replace('&', '&')
|
|
translation = translation.replace('<', '<')
|
|
translation = translation.replace('>', '>')
|
|
translation = translation.replace(''', '\'')
|
|
translation = translation.replace(''', '\'')
|
|
translation = translation.replace('< ', '<') # crappy attempt at undoing the safety above
|
|
|
|
return translation
|
|
|
|
if __name__ == '__main__':
|
|
babelfish = Babelfish(None, None, None)
|
|
print('\'' + babelfish.translate('en', 'ja', 'i can\'t read it, there aren\'t any words there') + '\'')
|
|
|
|
# vi:tabstop=4:expandtab:autoindent
|
|
# kate: indent-mode python;indent-width 4;replace-tabs on;
|