# GoogleTranslate - go out to google and translate sentences # Copyright (C) 2010 Brian S. Stephan # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from urllib2 import urlopen from urllib import urlencode from htmlentitydefs import name2codepoint import re from extlib import irclib from Module import Module # Class that translates text via Google Translate. # # http://code.google.com/apis/ajaxlanguage/documentation/ class GoogleTranslate(Module): def do(self, connection, event, nick, userhost, replypath, what, admin_unlocked): whats = what.split(' ') if whats[0] == 'translate' and len(whats) >= 4: fromlang = whats[1] tolang = whats[2] text = ' '.join(whats[3:]) text = text.encode('utf-8') langpair = '%s|%s' % (fromlang, tolang) gt_url = 'http://ajax.googleapis.com/ajax/services/language/translate?' params = urlencode( (('v', 1.0), ('q', text), ('langpair', langpair),) ) url = gt_url + params content = urlopen(url).read() start_idx = content.find('"translatedText":"')+18 translation = content[start_idx:] end_idx = translation.find('"}, "') translation = translation[:end_idx] # decode the string, since it may include unicode. it will be encoded later. translation = translation.decode('utf-8') # do some text conversion for i in re.findall('\u(\w{4})', translation): replacement = unichr(int(i, 16)) translation = translation.replace('\u' + i, replacement) for i in re.findall('&(#)?(x)?(\w+?);', translation): replacement = i[2] if i[0] == "#": if i[1] == 'x': replacement = unichr(int(i[2], 16)) else: replacement = unichr(int(i[2])) else: cp = name2codepoint.get(i[2]) replacement = unichr(cp) translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement) translation = translation.replace('\\\\', '\\') #translation = translation.replace('\\u0026quot;', '"') #translation = translation.replace('\\u0026amp;', '&') #translation = translation.replace('\\u003c', '<') #translation = translation.replace('\\u0026lt;', '<') #translation = translation.replace('\\u003e', '>') #translation = translation.replace('\\u0026gt;', '>') #translation = translation.replace('\\u0026#39;', '\'') return self.reply(connection, replypath, translation) # vi:tabstop=4:expandtab:autoindent # kate: indent-mode python;indent-width 4;replace-tabs on;