dr.botzo/modules/GoogleTranslate.py

# GoogleTranslate - go out to google and translate sentences
# Copyright (C) 2010  Brian S. Stephan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from urllib2 import urlopen
from urllib import urlencode
from htmlentitydefs import name2codepoint
import re

from extlib import irclib

from Module import Module

# Class that translates text via Google Translate.
#
# http://code.google.com/apis/ajaxlanguage/documentation/

class GoogleTranslate(Module):

    def do(self, connection, event, nick, userhost, replypath, what, admin_unlocked):
        whats = what.split(' ')
        if whats[0] == 'translate' and len(whats) >= 4:
            fromlang = whats[1]
            tolang = whats[2]
            text = ' '.join(whats[3:])
            text = text.encode('utf-8')

            langpair = '%s|%s' % (fromlang, tolang)
            gt_url = 'http://ajax.googleapis.com/ajax/services/language/translate?'
            params = urlencode( (('v', 1.0), ('q', text), ('langpair', langpair),) )
            url = gt_url + params
            content = urlopen(url).read()
            start_idx = content.find('"translatedText":"')+18
            translation = content[start_idx:]
            end_idx = translation.find('"}, "')
            translation = translation[:end_idx]

            # decode the string, since it may include unicode. it will be encoded later.
            translation = translation.decode('utf-8')

            # do some text conversion
            for i in re.findall('\u(\w{4})', translation):
                replacement = unichr(int(i, 16))
                translation = translation.replace('\u' + i, replacement)

            for i in re.findall('&(#)?(x)?(\w+?);', translation):
                replacement = i[2]
                if i[0] == "#":
                    if i[1] == 'x':
                        replacement = unichr(int(i[2], 16))
                    else:
                        replacement = unichr(int(i[2]))
                else:
                    cp = name2codepoint.get(i[2])
                    replacement = unichr(cp)
                translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement)

            translation = translation.replace('\\\\', '\\')

            #translation = translation.replace('\\u0026quot;', '"')
            #translation = translation.replace('\\u0026amp;', '&')
            #translation = translation.replace('\\u003c', '<')
            #translation = translation.replace('\\u0026lt;', '<')
            #translation = translation.replace('\\u003e', '>')
            #translation = translation.replace('\\u0026gt;', '>')
            #translation = translation.replace('\\u0026#39;', '\'')

            return self.reply(connection, replypath, translation)

# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;
GPLv3 headers on the split out files 2010-07-28 23:47:29 -05:00			`# GoogleTranslate - go out to google and translate sentences`
			`# Copyright (C) 2010 Brian S. Stephan`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`

Forgot to add files :( 2010-07-27 20:35:01 -05:00			`from urllib2 import urlopen`
			`from urllib import urlencode`
Generalize translation code in GoogleTranslate.py 2010-07-30 22:33:57 -05:00			`from htmlentitydefs import name2codepoint`
			`import re`
Forgot to add files :( 2010-07-27 20:35:01 -05:00
renaming 'irclib' dir to 'extlib' 2010-07-30 18:34:10 -05:00			`from extlib import irclib`
making imports fit my style convention, actually importing os in the module that needs it 2010-07-29 00:18:20 -05:00
			`from Module import Module`

comment nitpicking and restyling 2010-07-29 00:04:01 -05:00			`# Class that translates text via Google Translate.`
			`#`
			`# http://code.google.com/apis/ajaxlanguage/documentation/`

Forgot to add files :( 2010-07-27 20:35:01 -05:00			`class GoogleTranslate(Module):`

			`def do(self, connection, event, nick, userhost, replypath, what, admin_unlocked):`
			`whats = what.split(' ')`
			`if whats[0] == 'translate' and len(whats) >= 4:`
			`fromlang = whats[1]`
			`tolang = whats[2]`
			`text = ' '.join(whats[3:])`
Fix utf-8 error in GoogleTranslate 2010-07-30 21:22:16 -05:00			`text = text.encode('utf-8')`
Forgot to add files :( 2010-07-27 20:35:01 -05:00
			`langpair = '%s\|%s' % (fromlang, tolang)`
			`gt_url = 'http://ajax.googleapis.com/ajax/services/language/translate?'`
			`params = urlencode( (('v', 1.0), ('q', text), ('langpair', langpair),) )`
			`url = gt_url + params`
			`content = urlopen(url).read()`
			`start_idx = content.find('"translatedText":"')+18`
			`translation = content[start_idx:]`
			`end_idx = translation.find('"}, "')`
			`translation = translation[:end_idx]`

unicode fixes, in part to support what is going on in an in-dev weather module 2010-07-30 19:57:10 -05:00			`# decode the string, since it may include unicode. it will be encoded later.`
			`translation = translation.decode('utf-8')`

Forgot to add files :( 2010-07-27 20:35:01 -05:00			`# do some text conversion`
Generalize translation code in GoogleTranslate.py 2010-07-30 22:33:57 -05:00			`for i in re.findall('\u(\w{4})', translation):`
			`replacement = unichr(int(i, 16))`
			`translation = translation.replace('\u' + i, replacement)`

			`for i in re.findall('&(#)?(x)?(\w+?);', translation):`
			`replacement = i[2]`
			`if i[0] == "#":`
			`if i[1] == 'x':`
			`replacement = unichr(int(i[2], 16))`
			`else:`
			`replacement = unichr(int(i[2]))`
			`else:`
			`cp = name2codepoint.get(i[2])`
			`replacement = unichr(cp)`
			`translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement)`

			`translation = translation.replace('\\\\', '\\')`

			`#translation = translation.replace('\\u0026quot;', '"')`
			`#translation = translation.replace('\\u0026amp;', '&')`
			`#translation = translation.replace('\\u003c', '<')`
			`#translation = translation.replace('\\u0026lt;', '<')`
			`#translation = translation.replace('\\u003e', '>')`
			`#translation = translation.replace('\\u0026gt;', '>')`
			`#translation = translation.replace('\\u0026#39;', '\'')`
Forgot to add files :( 2010-07-27 20:35:01 -05:00
move common reply functionality into a method in Module 2010-07-30 00:34:57 -05:00			`return self.reply(connection, replypath, translation)`
Forgot to add files :( 2010-07-27 20:35:01 -05:00
vi modelines for split out files 2010-07-28 23:48:47 -05:00			`# vi:tabstop=4:expandtab:autoindent`
Add some modelines and fix indentation, I hope. 2010-07-28 00:11:58 -05:00			`# kate: indent-mode python;indent-width 4;replace-tabs on;`