dr.botzo/modules/Babelfish.py
Brian S. Stephan 3e76f75bba Module: remove reply(), use DrBotIRC's
obviously this means all of the modules changed to accomodate. this is
one of many steps to reduce the number of times we pass connections and
servers and other such info around, when it's mostly unnecessary because
modules have a reference to DrBotIRC
2012-12-19 20:51:35 -06:00

81 lines
3.0 KiB
Python

"""
Babelfish - go out to babelfish and translate sentences
Copyright (C) 2012 Brian S. Stephan
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import re
import urllib2
from urllib import urlencode
from Module import Module
class Babelfish(Module):
"""Class that translates text via Babelfish.
http://babelfish.yahoo.com/
"""
def do(self, connection, event, nick, userhost, what, admin_unlocked):
"""Handle IRC input."""
match = re.search('^!translate\s+(\S+)\s+(\S+)\s+(.*)$', what)
if match:
fromlang = match.group(1)
tolang = match.group(2)
text = match.group(3)
return self.irc.reply(event, self.translate(fromlang, tolang, text))
def translate(self, fromlang, tolang, text):
"""Translate text from fromlang to tolang, assuming it's a valid pair."""
langpair = '%s_%s' % (fromlang, tolang)
# do some text conversion
text = text.replace('<', '< ') # babelfish blows up on e.g. <text> but < text> is fine
url = 'http://babelfish.yahoo.com/translate_txt'
params = urlencode({'ei':'UTF-8', 'doit':'done', 'fr':'bf-home', 'intl':'1', 'tt':'urltext',
'trtext':text, 'lp':langpair})
req = urllib2.Request(url, params)
req.add_header('Accept-Charset', 'UTF-8,*;q=0.5')
res = urllib2.urlopen(req)
content = res.read()
start_idx = content.find('<div id="result"><div style="padding:')+45
if start_idx > 0:
end_idx = content.find('</div>', start_idx)
translation = content[start_idx:end_idx]
# do some text conversion
translation = translation.replace('&quot;', '"')
translation = translation.replace('&amp;', '&')
translation = translation.replace('&lt;', '<')
translation = translation.replace('&gt;', '>')
translation = translation.replace('&#039;', '\'')
translation = translation.replace('&#039', '\'')
translation = translation.replace('< ', '<') # crappy attempt at undoing the safety above
return translation
if __name__ == '__main__':
babelfish = Babelfish(None, None, None)
print('\'' + babelfish.translate('en', 'ja', 'i can\'t read it, there aren\'t any words there') + '\'')
# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;