From b26817ba77dd05632ed9fa015c135e509710535a Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Sat, 2 Jul 2022 13:09:37 -0400 Subject: [PATCH] Refactoring xml serialization. --- utils/twitter_fetcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/twitter_fetcher.py b/utils/twitter_fetcher.py index d3758a496..945220507 100644 --- a/utils/twitter_fetcher.py +++ b/utils/twitter_fetcher.py @@ -382,9 +382,6 @@ class TwitterFetcher: tweet_title = user_tweet['full_text'] tweet_text = linebreaks(content_tweet['full_text']) - - # Remove unserializable control characters - tweet_text = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', '', tweet_text) replaced = {} entities_media = content_tweet['entities'].get('media', []) @@ -472,6 +469,9 @@ class TwitterFetcher: ("%s %s" % (content_tweet['retweet_count'], "retweet" if content_tweet['retweet_count'] == 1 else "retweets")) if content_tweet['retweet_count'] else "", ) + # Remove unserializable control characters + content = re.sub(r'[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x84\x86-\x9F\uFDD0-\uFDEF\uFFFE\uFFFF]', '', content) + story = { 'title': tweet_title, 'link': "https://twitter.com/%s/status/%s" % (original_author_screen_name, user_tweet['id']),