Twitter: unescape some xml entities that may show up in the input.

this should maybe be moved into DrBotIRC or something, it'll likely
be handy elsewhere.
This commit is contained in:
Brian S. Stephan 2011-01-19 23:12:01 -06:00
parent a961180065
commit 5ec7ac7177
1 changed files with 12 additions and 4 deletions

View File

@ -246,14 +246,14 @@ class Twitter(Module):
if tweet.retweeted_status:
retweet = tweet.retweeted_status
if print_source:
return '%s (RT %s): %s [%s]' % (tweet.user.name.encode('utf-8', 'ignore'), retweet.user.name.encode('utf-8', 'ignore'), retweet.text.encode('utf-8', 'ignore'), tweet.id)
return '%s (RT %s): %s [%s]' % (tweet.user.name.encode('utf-8', 'ignore'), retweet.user.name.encode('utf-8', 'ignore'), self._unescape(retweet.text.encode('utf-8', 'ignore')), tweet.id)
else:
return '(RT %s): %s [%s]' % (retweet.user.name.encode('utf-8', 'ignore'), retweet.text.encode('utf-8', 'ignore'), tweet.id)
return '(RT %s): %s [%s]' % (retweet.user.name.encode('utf-8', 'ignore'), self._unescape(retweet.text.encode('utf-8', 'ignore')), tweet.id)
else:
if print_source:
return '%s: %s [%s]' % (tweet.user.name.encode('utf-8', 'ignore'), tweet.text.encode('utf-8', 'ignore'), tweet.id)
return '%s: %s [%s]' % (tweet.user.name.encode('utf-8', 'ignore'), self._unescape(tweet.text.encode('utf-8', 'ignore')), tweet.id)
else:
return '%s [%s]' % (tweet.text.encode('utf-8', 'ignore'), tweet.id)
return '%s [%s]' % (self._unescape(tweet.text.encode('utf-8', 'ignore')), tweet.id)
def _get_last_since_id(self):
"""Get the since_id out of the database."""
@ -321,5 +321,13 @@ class Twitter(Module):
return latest
def _unescape(self, text):
"""Convert <, >, & to their real entities."""
text = text.replace('&lt;', '<')
text = text.replace('&gt;', '>')
text = text.replace('&amp;', '&')
return text
# vi:tabstop=4:expandtab:autoindent
# kate: indent-mode python;indent-width 4;replace-tabs on;