Generalize translation code in GoogleTranslate.py

This commit is contained in:
kad 2010-07-30 22:33:57 -05:00
parent 92d5b5d94e
commit 1920a12759
1 changed files with 27 additions and 7 deletions

View File

@ -16,6 +16,8 @@
from urllib2 import urlopen
from urllib import urlencode
from htmlentitydefs import name2codepoint
import re
from extlib import irclib
@ -49,13 +51,31 @@ class GoogleTranslate(Module):
translation = translation.decode('utf-8')
# do some text conversion
translation = translation.replace('\\u0026quot;', '"')
translation = translation.replace('\\u0026amp;', '&')
translation = translation.replace('\\u003c', '<')
translation = translation.replace('\\u0026lt;', '<')
translation = translation.replace('\\u003e', '>')
translation = translation.replace('\\u0026gt;', '>')
translation = translation.replace('\\u0026#39;', '\'')
for i in re.findall('\u(\w{4})', translation):
replacement = unichr(int(i, 16))
translation = translation.replace('\u' + i, replacement)
for i in re.findall('&(#)?(x)?(\w+?);', translation):
replacement = i[2]
if i[0] == "#":
if i[1] == 'x':
replacement = unichr(int(i[2], 16))
else:
replacement = unichr(int(i[2]))
else:
cp = name2codepoint.get(i[2])
replacement = unichr(cp)
translation = translation.replace('&' + i[0] + i[1] + i[2] + ';', replacement)
translation = translation.replace('\\\\', '\\')
#translation = translation.replace('\\u0026quot;', '"')
#translation = translation.replace('\\u0026amp;', '&')
#translation = translation.replace('\\u003c', '<')
#translation = translation.replace('\\u0026lt;', '<')
#translation = translation.replace('\\u003e', '>')
#translation = translation.replace('\\u0026gt;', '>')
#translation = translation.replace('\\u0026#39;', '\'')
return self.reply(connection, replypath, translation)