dr.botzo/modules/GoogleTranslate.py

84 lines
3.4 KiB
Python
Raw Normal View History

2010-07-28 23:47:29 -05:00
# GoogleTranslate - go out to google and translate sentences
# Copyright (C) 2010 Brian S. Stephan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2010-07-27 20:35:01 -05:00
from urllib2 import urlopen
from urllib import urlencode
from htmlentitydefs import name2codepoint
import re
2010-07-27 20:35:01 -05:00
2010-07-30 18:34:10 -05:00
from extlib import irclib
from Module import Module
2010-07-29 00:04:01 -05:00
# Class that translates text via Google Translate.
#
# http://code.google.com/apis/ajaxlanguage/documentation/
2010-07-27 20:35:01 -05:00
class GoogleTranslate(Module):
def do(self, connection, event, nick, userhost, replypath, what, admin_unlocked):
whats = what.split(' ')
if whats[0] == 'translate' and len(whats) >= 4:
fromlang = whats[1]
tolang = whats[2]
text = ' '.join(whats[3:])
2010-07-30 21:22:16 -05:00
text = text.encode('utf-8')
2010-07-27 20:35:01 -05:00
langpair = '%s|%s' % (fromlang, tolang)
gt_url = 'http://ajax.googleapis.com/ajax/services/language/translate?'
params = urlencode( (('v', 1.0), ('q', text), ('langpair', langpair),) )
url = gt_url + params
content = urlopen(url).read()
start_idx = content.find('"translatedText":"')+18
translation = content[start_idx:]
end_idx = translation.find('"}, "')
translation = translation[:end_idx]
# decode the string, since it may include unicode. it will be encoded later.
translation = translation.decode('utf-8')
2010-07-27 20:35:01 -05:00
# do some text conversion
for i in re.findall('\u(\w{4})', translation):
replacement = unichr(int(i, 16))
translation = translation.replace('\u' + i, replacement)
for i in re.findall('&(#)?(x)?(\w+?);', translation):
replacement = i[2]
if i[0] == "#":
if i[1] == 'x':
replacement = unichr(int(i[2], 16))
else:
replacement = unichr(int(i[2]))
else:
cp = name2codepoint.get(i[2])
replacement = unichr(cp)
translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement)
translation = translation.replace('\\\\', '\\')
#translation = translation.replace('\\u0026quot;', '"')
#translation = translation.replace('\\u0026amp;', '&')
#translation = translation.replace('\\u003c', '<')
#translation = translation.replace('\\u0026lt;', '<')
#translation = translation.replace('\\u003e', '>')
#translation = translation.replace('\\u0026gt;', '>')
#translation = translation.replace('\\u0026#39;', '\'')
2010-07-27 20:35:01 -05:00
return self.reply(connection, replypath, translation)
2010-07-27 20:35:01 -05:00
2010-07-28 23:48:47 -05:00
# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;