dr.botzo/modules/Babelfish.py

"""
Babelfish - go out to babelfish and translate sentences
Copyright (C) 2012  Brian S. Stephan

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import re
import urllib2
from urllib import urlencode

from Module import Module

class Babelfish(Module):

    """Class that translates text via Babelfish.

    http://babelfish.yahoo.com/
    """

    def do(self, connection, event, nick, userhost, what, admin_unlocked):
        """Handle IRC input."""

        match = re.search('^!translate\s+(\S+)\s+(\S+)\s+(.*)$', what)
        if match:
            fromlang = match.group(1)
            tolang = match.group(2)
            text = match.group(3)

            return self.reply(connection, event, self.translate(fromlang, tolang, text))

    def translate(self, fromlang, tolang, text):

        """Translate text from fromlang to tolang, assuming it's a valid pair."""

        langpair = '%s_%s' % (fromlang, tolang)

        # do some text conversion
        text = text.replace('<', '< ')      # babelfish blows up on e.g. <text> but < text> is fine

        url = 'http://babelfish.yahoo.com/translate_txt'
        params = urlencode({'ei':'UTF-8', 'doit':'done', 'fr':'bf-home', 'intl':'1', 'tt':'urltext',
                            'trtext':text, 'lp':langpair})
        req = urllib2.Request(url, params)
        req.add_header('Accept-Charset', 'UTF-8,*;q=0.5')
        res = urllib2.urlopen(req)
        content = res.read()
        start_idx = content.find('<div id="result"><div style="padding:')+45
        if start_idx > 0:
            end_idx = content.find('</div>', start_idx)
            translation = content[start_idx:end_idx]

            # do some text conversion
            translation = translation.replace('&quot;', '"')
            translation = translation.replace('&amp;', '&')
            translation = translation.replace('&lt;', '<')
            translation = translation.replace('&gt;', '>')
            translation = translation.replace('&#039;', '\'')
            translation = translation.replace('&#039', '\'')
            translation = translation.replace('< ', '<')   # crappy attempt at undoing the safety above

            return translation

if __name__ == '__main__':
    babelfish = Babelfish(None, None, None)
    print('\'' + babelfish.translate('en', 'ja', 'i can\'t read it, there aren\'t any words there') + '\'')

# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;