From 1920a127599721dd927da984f6d2b157ef1dcbab Mon Sep 17 00:00:00 2001 From: kad Date: Fri, 30 Jul 2010 21:33:57 -0600 Subject: [PATCH] Generalize translation code in GoogleTranslate.py --- modules/GoogleTranslate.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/modules/GoogleTranslate.py b/modules/GoogleTranslate.py index bcb35d7..c57aa38 100644 --- a/modules/GoogleTranslate.py +++ b/modules/GoogleTranslate.py @@ -16,6 +16,8 @@ from urllib2 import urlopen from urllib import urlencode +from htmlentitydefs import name2codepoint +import re from extlib import irclib @@ -49,13 +51,31 @@ class GoogleTranslate(Module): translation = translation.decode('utf-8') # do some text conversion - translation = translation.replace('\\u0026quot;', '"') - translation = translation.replace('\\u0026amp;', '&') - translation = translation.replace('\\u003c', '<') - translation = translation.replace('\\u0026lt;', '<') - translation = translation.replace('\\u003e', '>') - translation = translation.replace('\\u0026gt;', '>') - translation = translation.replace('\\u0026#39;', '\'') + for i in re.findall('\u(\w{4})', translation): + replacement = unichr(int(i, 16)) + translation = translation.replace('\u' + i, replacement) + + for i in re.findall('&(#)?(x)?(\w+?);', translation): + replacement = i[2] + if i[0] == "#": + if i[1] == 'x': + replacement = unichr(int(i[2], 16)) + else: + replacement = unichr(int(i[2])) + else: + cp = name2codepoint.get(i[2]) + replacement = unichr(cp) + translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement) + + translation = translation.replace('\\\\', '\\') + + #translation = translation.replace('\\u0026quot;', '"') + #translation = translation.replace('\\u0026amp;', '&') + #translation = translation.replace('\\u003c', '<') + #translation = translation.replace('\\u0026lt;', '<') + #translation = translation.replace('\\u003e', '>') + #translation = translation.replace('\\u0026gt;', '>') + #translation = translation.replace('\\u0026#39;', '\'') return self.reply(connection, replypath, translation)