2010-07-28 23:47:29 -05:00
|
|
|
# GoogleTranslate - go out to google and translate sentences
|
|
|
|
# Copyright (C) 2010 Brian S. Stephan
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2010-07-27 20:35:01 -05:00
|
|
|
from urllib2 import urlopen
|
|
|
|
from urllib import urlencode
|
2010-07-30 22:33:57 -05:00
|
|
|
from htmlentitydefs import name2codepoint
|
|
|
|
import re
|
2010-07-27 20:35:01 -05:00
|
|
|
|
2010-07-30 18:34:10 -05:00
|
|
|
from extlib import irclib
|
2010-07-29 00:18:20 -05:00
|
|
|
|
|
|
|
from Module import Module
|
|
|
|
|
2010-07-29 00:04:01 -05:00
|
|
|
# Class that translates text via Google Translate.
|
|
|
|
#
|
|
|
|
# http://code.google.com/apis/ajaxlanguage/documentation/
|
|
|
|
|
2010-07-27 20:35:01 -05:00
|
|
|
class GoogleTranslate(Module):
|
|
|
|
|
|
|
|
def do(self, connection, event, nick, userhost, replypath, what, admin_unlocked):
|
|
|
|
whats = what.split(' ')
|
|
|
|
if whats[0] == 'translate' and len(whats) >= 4:
|
|
|
|
fromlang = whats[1]
|
|
|
|
tolang = whats[2]
|
|
|
|
text = ' '.join(whats[3:])
|
2010-07-30 21:22:16 -05:00
|
|
|
text = text.encode('utf-8')
|
2010-07-27 20:35:01 -05:00
|
|
|
|
|
|
|
langpair = '%s|%s' % (fromlang, tolang)
|
|
|
|
gt_url = 'http://ajax.googleapis.com/ajax/services/language/translate?'
|
|
|
|
params = urlencode( (('v', 1.0), ('q', text), ('langpair', langpair),) )
|
|
|
|
url = gt_url + params
|
|
|
|
content = urlopen(url).read()
|
|
|
|
start_idx = content.find('"translatedText":"')+18
|
|
|
|
translation = content[start_idx:]
|
|
|
|
end_idx = translation.find('"}, "')
|
|
|
|
translation = translation[:end_idx]
|
|
|
|
|
2010-07-30 19:57:10 -05:00
|
|
|
# decode the string, since it may include unicode. it will be encoded later.
|
|
|
|
translation = translation.decode('utf-8')
|
|
|
|
|
2010-07-27 20:35:01 -05:00
|
|
|
# do some text conversion
|
2010-07-30 22:33:57 -05:00
|
|
|
for i in re.findall('\u(\w{4})', translation):
|
|
|
|
replacement = unichr(int(i, 16))
|
|
|
|
translation = translation.replace('\u' + i, replacement)
|
|
|
|
|
|
|
|
for i in re.findall('&(#)?(x)?(\w+?);', translation):
|
|
|
|
replacement = i[2]
|
|
|
|
if i[0] == "#":
|
|
|
|
if i[1] == 'x':
|
|
|
|
replacement = unichr(int(i[2], 16))
|
|
|
|
else:
|
|
|
|
replacement = unichr(int(i[2]))
|
|
|
|
else:
|
|
|
|
cp = name2codepoint.get(i[2])
|
|
|
|
replacement = unichr(cp)
|
|
|
|
translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement)
|
|
|
|
|
|
|
|
translation = translation.replace('\\\\', '\\')
|
|
|
|
|
|
|
|
#translation = translation.replace('\\u0026quot;', '"')
|
|
|
|
#translation = translation.replace('\\u0026amp;', '&')
|
|
|
|
#translation = translation.replace('\\u003c', '<')
|
|
|
|
#translation = translation.replace('\\u0026lt;', '<')
|
|
|
|
#translation = translation.replace('\\u003e', '>')
|
|
|
|
#translation = translation.replace('\\u0026gt;', '>')
|
|
|
|
#translation = translation.replace('\\u0026#39;', '\'')
|
2010-07-27 20:35:01 -05:00
|
|
|
|
2010-07-30 00:34:57 -05:00
|
|
|
return self.reply(connection, replypath, translation)
|
2010-07-27 20:35:01 -05:00
|
|
|
|
2010-07-28 23:48:47 -05:00
|
|
|
# vi:tabstop=4:expandtab:autoindent
|
2010-07-28 00:11:58 -05:00
|
|
|
# kate: indent-mode python;indent-width 4;replace-tabs on;
|