dr.botzo, the IRC bot with Django integration.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

80 lines
3.0 KiB

"""
Babelfish - go out to babelfish and translate sentences
Copyright (C) 2012 Brian S. Stephan
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import re
import urllib2
from urllib import urlencode
from Module import Module
class Babelfish(Module):
"""Class that translates text via Babelfish.
http://babelfish.yahoo.com/
"""
def do(self, connection, event, nick, userhost, what, admin_unlocked):
"""Handle IRC input."""
match = re.search('^!translate\s+(\S+)\s+(\S+)\s+(.*)$', what)
if match:
fromlang = match.group(1)
tolang = match.group(2)
text = match.group(3)
return self.irc.reply(event, self.translate(fromlang, tolang, text))
def translate(self, fromlang, tolang, text):
"""Translate text from fromlang to tolang, assuming it's a valid pair."""
langpair = '%s_%s' % (fromlang, tolang)
# do some text conversion
text = text.replace('<', '< ') # babelfish blows up on e.g. <text> but < text> is fine
url = 'http://babelfish.yahoo.com/translate_txt'
params = urlencode({'ei':'UTF-8', 'doit':'done', 'fr':'bf-home', 'intl':'1', 'tt':'urltext',
'trtext':text, 'lp':langpair})
req = urllib2.Request(url, params)
req.add_header('Accept-Charset', 'UTF-8,*;q=0.5')
res = urllib2.urlopen(req)
content = res.read()
start_idx = content.find('<div id="result"><div style="padding:')+45
if start_idx > 0:
end_idx = content.find('</div>', start_idx)
translation = content[start_idx:end_idx]
# do some text conversion
translation = translation.replace('&quot;', '"')
translation = translation.replace('&amp;', '&')
translation = translation.replace('&lt;', '<')
translation = translation.replace('&gt;', '>')
translation = translation.replace('&#039;', '\'')
translation = translation.replace('&#039', '\'')
translation = translation.replace('< ', '<') # crappy attempt at undoing the safety above
return translation
if __name__ == '__main__':
babelfish = Babelfish(None, None, None)
print('\'' + babelfish.translate('en', 'ja', 'i can\'t read it, there aren\'t any words there') + '\'')
# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;