# GoogleTranslate - go out to google and translate sentences
# Copyright (C) 2010  Brian S. Stephan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from urllib2 import urlopen
from urllib import urlencode
from htmlentitydefs import name2codepoint
import re

from extlib import irclib

from Module import Module

# Class that translates text via Google Translate.
#
# http://code.google.com/apis/ajaxlanguage/documentation/

class GoogleTranslate(Module):

    def do(self, connection, event, nick, userhost, replypath, what, admin_unlocked):
        whats = what.split(' ')
        if whats[0] == 'translate' and len(whats) >= 4:
            fromlang = whats[1]
            tolang = whats[2]
            text = ' '.join(whats[3:])
            text = text.encode('utf-8')

            langpair = '%s|%s' % (fromlang, tolang)
            gt_url = 'http://ajax.googleapis.com/ajax/services/language/translate?'
            params = urlencode( (('v', 1.0), ('q', text), ('langpair', langpair),) )
            url = gt_url + params
            content = urlopen(url).read()
            start_idx = content.find('"translatedText":"')+18
            translation = content[start_idx:]
            end_idx = translation.find('"}, "')
            translation = translation[:end_idx]

            # decode the string, since it may include unicode. it will be encoded later.
            translation = translation.decode('utf-8')

            # do some text conversion
            for i in re.findall('\u(\w{4})', translation):
                replacement = unichr(int(i, 16))
                translation = translation.replace('\u' + i, replacement)

            for i in re.findall('&(#)?(x)?(\w+?);', translation):
                replacement = i[2]
                if i[0] == "#":
                    if i[1] == 'x':
                        replacement = unichr(int(i[2], 16))
                    else:
                        replacement = unichr(int(i[2]))
                else:
                    cp = name2codepoint.get(i[2])
                    replacement = unichr(cp)
                translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement)

            translation = translation.replace('\\\\', '\\')

            #translation = translation.replace('\\u0026quot;', '"')
            #translation = translation.replace('\\u0026amp;', '&')
            #translation = translation.replace('\\u003c', '<')
            #translation = translation.replace('\\u0026lt;', '<')
            #translation = translation.replace('\\u003e', '>')
            #translation = translation.replace('\\u0026gt;', '>')
            #translation = translation.replace('\\u0026#39;', '\'')

            return self.reply(connection, replypath, translation)

# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;