NewsBlur-viq/utils/twitter_fetcher.py

598 lines
25 KiB
Python
Raw Normal View History

import re
import datetime
2022-06-21 16:07:12 -04:00
from jmespath import search
from urllib.parse import urlparse, parse_qs
import tweepy
import dateutil.parser
from qurl import qurl
from django.conf import settings
from django.utils import feedgenerator
from django.utils.html import linebreaks
from django.utils.dateformat import DateFormat
from apps.social.models import MSocialServices
from apps.reader.models import UserSubscription
from utils import log as logging
2024-04-24 09:43:56 -04:00
class TwitterFetcher:
def __init__(self, feed, options=None):
self.feed = feed
self.address = self.feed.feed_address
self.options = options or {}
2024-04-24 09:43:56 -04:00
def fetch(self, address=None):
2020-06-04 16:57:04 -04:00
data = {}
if not address:
address = self.feed.feed_address
self.address = address
twitter_user = None
2024-04-24 09:43:56 -04:00
if "/lists/" in address:
2020-06-04 16:57:04 -04:00
list_id = self.extract_list_id()
if not list_id:
return
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
tweets, list_info = self.fetch_list_timeline(list_id)
if not tweets:
return
2024-04-24 09:43:56 -04:00
data["title"] = "%s on Twitter" % list_info.full_name
data["link"] = "https://twitter.com%s" % list_info.uri
data["description"] = "%s on Twitter" % list_info.full_name
elif "/search" in address:
2022-06-21 16:07:12 -04:00
search_query = self.extract_search_query()
if not search_query:
return
2024-04-24 09:43:56 -04:00
2022-06-21 16:07:12 -04:00
tweets = self.fetch_search_query(search_query)
if not tweets:
return
2024-04-24 09:43:56 -04:00
data["title"] = '"%s" on Twitter' % search_query
data["link"] = "%s" % address
data["description"] = 'Searching "%s" on Twitter' % search_query
2020-06-04 16:57:04 -04:00
else:
username = self.extract_username()
if not username:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter fetch failed: %s: No active user API access"
% (self.feed.log_title[:30], self.address)
)
2020-06-04 16:57:04 -04:00
return
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
twitter_user = self.fetch_user(username)
if not twitter_user:
return
tweets = self.user_timeline(twitter_user)
2024-04-24 09:43:56 -04:00
data["title"] = "%s on Twitter" % username
data["link"] = "https://twitter.com/%s" % username
data["description"] = "%s on Twitter" % username
data["lastBuildDate"] = datetime.datetime.utcnow()
data["generator"] = "NewsBlur Twitter API Decrapifier - %s" % settings.NEWSBLUR_URL
data["docs"] = None
data["feed_url"] = address
rss = feedgenerator.Atom1Feed(**data)
2024-04-24 09:43:56 -04:00
for tweet in tweets:
story_data = self.tweet_story(tweet.__dict__)
rss.add_item(**story_data)
2024-04-24 09:43:56 -04:00
return rss.writeString("utf-8")
def extract_username(self):
username = None
try:
2024-04-24 09:43:56 -04:00
address = qurl(self.address, remove=["_"])
username_groups = re.search("twitter.com/(\w+)/?$", address)
if not username_groups:
return
username = username_groups.group(1)
except IndexError:
return
2024-04-24 09:43:56 -04:00
return username
2020-06-04 16:57:04 -04:00
def extract_list_id(self):
list_id = None
try:
2024-04-24 09:43:56 -04:00
list_groups = re.search("twitter.com/i/lists/(\w+)/?", self.address)
2020-06-04 16:57:04 -04:00
if not list_groups:
return
list_id = list_groups.group(1)
except IndexError:
return
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
return list_id
2022-06-21 16:07:12 -04:00
def extract_search_query(self):
search_query = None
2024-04-24 09:43:56 -04:00
address = qurl(self.address, remove=["_"])
2022-06-21 16:07:12 -04:00
query = urlparse(address).query
query_dict = parse_qs(query)
2024-04-24 09:43:56 -04:00
if "q" in query_dict:
search_query = query_dict["q"][0]
2022-06-21 16:07:12 -04:00
return search_query
def twitter_api(self, include_social_services=False):
twitter_api = None
social_services = None
2024-04-24 09:43:56 -04:00
if self.options.get("requesting_user_id", None):
social_services = MSocialServices.get_user(self.options.get("requesting_user_id"))
try:
twitter_api = social_services.twitter_api()
2020-06-19 02:27:48 -04:00
except tweepy.error.TweepError as e:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter fetch failed: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
return
else:
usersubs = UserSubscription.objects.filter(feed=self.feed)
if not usersubs:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter fetch failed: %s: No subscriptions"
% (self.feed.log_title[:30], self.address)
)
return
for sub in usersubs:
social_services = MSocialServices.get_user(sub.user_id)
2024-04-24 09:43:56 -04:00
if not social_services.twitter_uid:
continue
try:
twitter_api = social_services.twitter_api()
2024-04-24 09:43:56 -04:00
if not twitter_api:
continue
else:
break
2020-06-19 02:27:48 -04:00
except tweepy.error.TweepError as e:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter fetch failed: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
continue
2024-04-24 09:43:56 -04:00
if not twitter_api:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter fetch failed: %s: No twitter API for %s"
% (self.feed.log_title[:30], self.address, usersubs[0].user.username)
)
return
2024-04-24 09:43:56 -04:00
if include_social_services:
return twitter_api, social_services
2020-06-04 16:57:04 -04:00
return twitter_api
2024-04-24 09:43:56 -04:00
2021-06-14 14:22:27 -04:00
def disconnect_twitter(self):
_, social_services = self.twitter_api(include_social_services=True)
social_services.disconnect_twitter()
2020-06-04 16:57:04 -04:00
def fetch_user(self, username):
twitter_api = self.twitter_api()
if not twitter_api:
return
2024-04-24 09:43:56 -04:00
try:
twitter_user = twitter_api.get_user(username)
2020-06-19 02:27:48 -04:00
except TypeError as e:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter fetch failed, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
self.feed.save_feed_history(560, "Twitter Error: %s" % (e))
return
2020-06-19 02:27:48 -04:00
except tweepy.error.TweepError as e:
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "suspended" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(562, "Twitter Error: User suspended")
# self.disconnect_twitter()
return
2024-04-24 09:43:56 -04:00
elif "expired token" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user expired, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(563, "Twitter Error: Expired token")
self.disconnect_twitter()
return
2024-04-24 09:43:56 -04:00
elif "not found" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(564, "Twitter Error: User not found")
return
2024-04-24 09:43:56 -04:00
elif "not authenticate you" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, (not) disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2023-04-04 09:36:40 -04:00
self.feed.save_feed_history(565, "Twitter Error: API not authorized")
return
2024-04-24 09:43:56 -04:00
elif "over capacity" in message or "Max retries" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
self.feed.save_feed_history(460, "Twitter Error: Over capacity")
return
2024-04-24 09:43:56 -04:00
elif "503" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter throwing a 503, ignoring... %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
self.feed.save_feed_history(463, "Twitter Error: Twitter's down")
return
else:
raise e
2024-04-24 09:43:56 -04:00
return twitter_user
2024-04-24 09:43:56 -04:00
def user_timeline(self, twitter_user):
try:
2024-04-24 09:43:56 -04:00
tweets = twitter_user.timeline(tweet_mode="extended")
2020-06-19 02:27:48 -04:00
except tweepy.error.TweepError as e:
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "not authorized" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter timeline failed, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(565, "Twitter Error: Not authorized")
2018-01-18 16:12:59 -08:00
return []
2024-04-24 09:43:56 -04:00
elif "user not found" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(566, "Twitter Error: User not found")
2020-06-04 16:57:04 -04:00
return []
2024-04-24 09:43:56 -04:00
elif "429" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter rate limited: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(567, "Twitter Error: Rate limited")
2020-06-04 16:57:04 -04:00
return []
2024-04-24 09:43:56 -04:00
elif "blocked from viewing" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user blocked, ignoring: %s" % (self.feed.log_title[:30], e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(568, "Twitter Error: Blocked from viewing")
2020-06-04 16:57:04 -04:00
return []
2024-04-24 09:43:56 -04:00
elif "over capacity" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter over capacity, ignoring: %s" % (self.feed.log_title[:30], e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(569, "Twitter Error: Over capacity")
2020-11-30 18:59:19 -05:00
return []
2020-06-04 16:57:04 -04:00
else:
raise e
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
if not tweets:
return []
return tweets
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
def fetch_list_timeline(self, list_id):
twitter_api = self.twitter_api()
if not twitter_api:
2020-06-24 09:34:49 -04:00
return None, None
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
try:
2024-04-24 09:43:56 -04:00
list_timeline = twitter_api.list_timeline(list_id=list_id, tweet_mode="extended")
2020-06-19 02:27:48 -04:00
except TypeError as e:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter list fetch failed, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(570, "Twitter Error: %s" % (e))
2020-06-24 09:34:49 -04:00
return None, None
2020-06-19 02:27:48 -04:00
except tweepy.error.TweepError as e:
2020-06-04 16:57:04 -04:00
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "suspended" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(572, "Twitter Error: User suspended")
# self.disconnect_twitter()
2020-06-24 09:34:49 -04:00
return None, None
2024-04-24 09:43:56 -04:00
elif "expired token" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user expired, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(573, "Twitter Error: Expired token")
self.disconnect_twitter()
2020-06-24 09:34:49 -04:00
return None, None
2024-04-24 09:43:56 -04:00
elif "not found" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(574, "Twitter Error: User not found")
2020-06-24 09:34:49 -04:00
return None, None
2024-04-24 09:43:56 -04:00
elif "not authenticate you" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, (not) disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2023-04-04 09:36:40 -04:00
self.feed.save_feed_history(565, "Twitter Error: API not authorized")
return None, None
2024-04-24 09:43:56 -04:00
elif "over capacity" in message or "Max retries" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2021-06-14 13:24:02 -04:00
self.feed.save_feed_history(470, "Twitter Error: Over capacity")
2020-06-24 09:34:49 -04:00
return None, None
2020-06-04 16:57:04 -04:00
else:
raise e
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
list_info = twitter_api.get_list(list_id=list_id)
2024-04-24 09:43:56 -04:00
2020-06-04 16:57:04 -04:00
if not list_timeline:
return [], list_info
return list_timeline, list_info
2024-04-24 09:43:56 -04:00
2022-06-21 16:07:12 -04:00
def fetch_search_query(self, search_query):
twitter_api = self.twitter_api()
if not twitter_api:
return None
2024-04-24 09:43:56 -04:00
2022-06-21 16:07:12 -04:00
try:
2024-04-24 09:43:56 -04:00
list_timeline = twitter_api.search(search_query, tweet_mode="extended")
2022-06-21 16:07:12 -04:00
except TypeError as e:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRTwitter list fetch failed, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2022-06-21 16:07:12 -04:00
self.feed.save_feed_history(570, "Twitter Error: %s" % (e))
return None
except tweepy.error.TweepError as e:
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "suspended" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2022-06-21 16:07:12 -04:00
self.feed.save_feed_history(572, "Twitter Error: User suspended")
# self.disconnect_twitter()
return None
2024-04-24 09:43:56 -04:00
elif "expired token" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user expired, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2022-06-21 16:07:12 -04:00
self.feed.save_feed_history(573, "Twitter Error: Expired token")
self.disconnect_twitter()
return None
2024-04-24 09:43:56 -04:00
elif "not found" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2022-06-21 16:07:12 -04:00
self.feed.save_feed_history(574, "Twitter Error: User not found")
return None
2024-04-24 09:43:56 -04:00
elif "not authenticate you" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter user not found, (not) disconnecting twitter: %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2023-04-04 09:36:40 -04:00
self.feed.save_feed_history(565, "Twitter Error: API not authorized")
return None
2024-04-24 09:43:56 -04:00
elif "over capacity" in message or "Max retries" in message:
logging.debug(
" ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s"
% (self.feed.log_title[:30], self.address, e)
)
2022-06-21 16:07:12 -04:00
self.feed.save_feed_history(470, "Twitter Error: Over capacity")
return None
else:
raise e
2024-04-24 09:43:56 -04:00
2022-06-21 16:07:12 -04:00
if not list_timeline:
return []
return list_timeline
2024-04-24 09:43:56 -04:00
def tweet_story(self, user_tweet):
categories = set()
2024-04-24 09:43:56 -04:00
if user_tweet["full_text"].startswith("RT @"):
categories.add("retweet")
elif user_tweet["in_reply_to_status_id"] or user_tweet["full_text"].startswith("@"):
categories.add("reply")
else:
2024-04-24 09:43:56 -04:00
categories.add("tweet")
if user_tweet["full_text"].startswith("RT @"):
categories.add("retweet")
if user_tweet["favorite_count"]:
categories.add("liked")
if user_tweet["retweet_count"]:
categories.add("retweeted")
if "http" in user_tweet["full_text"]:
categories.add("link")
story = {}
content_tweet = user_tweet
entities = ""
2024-04-24 09:43:56 -04:00
author = user_tweet.get("author") or user_tweet.get("user")
if not isinstance(author, dict):
author = author.__dict__
author_screen_name = author["screen_name"]
author_name = author["name"]
author_fullname = (
"%s (%s)" % (author_name, author_screen_name)
if author_screen_name != author_name
else author_screen_name
)
original_author_screen_name = author_screen_name
2024-04-24 09:43:56 -04:00
if user_tweet["in_reply_to_user_id"] == author["id"]:
categories.add("reply-to-self")
retweet_author = ""
2024-04-24 09:43:56 -04:00
tweet_link = "https://twitter.com/%s/status/%s" % (original_author_screen_name, user_tweet["id"])
if "retweeted_status" in user_tweet:
retweet_author = """Retweeted by
<a href="https://twitter.com/%s"><img src="%s" style="height: 20px" /></a>
<a href="https://twitter.com/%s">%s</a>
on %s""" % (
author_screen_name,
2024-04-24 09:43:56 -04:00
author["profile_image_url_https"],
author_screen_name,
author_fullname,
2024-04-24 09:43:56 -04:00
DateFormat(user_tweet["created_at"]).format("l, F jS, Y g:ia").replace(".", ""),
)
content_tweet = user_tweet["retweeted_status"].__dict__
author = content_tweet["author"]
if not isinstance(author, dict):
author = author.__dict__
author_screen_name = author["screen_name"]
author_name = author["name"]
author_fullname = (
"%s (%s)" % (author_name, author_screen_name)
if author_screen_name != author_name
else author_screen_name
)
tweet_link = "https://twitter.com/%s/status/%s" % (
author_screen_name,
user_tweet["retweeted_status"].id,
)
tweet_title = user_tweet["full_text"]
tweet_text = linebreaks(content_tweet["full_text"])
replaced = {}
2024-04-24 09:43:56 -04:00
entities_media = content_tweet["entities"].get("media", [])
if "extended_entities" in content_tweet:
entities_media = content_tweet["extended_entities"].get("media", [])
for media in entities_media:
2024-04-24 09:43:56 -04:00
if "media_url_https" not in media:
continue
if media["type"] == "photo":
if media.get("url") and media["url"] in tweet_text:
tweet_title = tweet_title.replace(media["url"], media["display_url"])
replacement = '<a href="%s">%s</a>' % (media["expanded_url"], media["display_url"])
if not replaced.get(media["url"]):
tweet_text = tweet_text.replace(media["url"], replacement)
replaced[media["url"]] = True
entities += '<img src="%s"> <hr>' % media["media_url_https"]
categories.add("photo")
if media["type"] == "video" or media["type"] == "animated_gif":
if media.get("url") and media["url"] in tweet_text:
tweet_title = tweet_title.replace(media["url"], media["display_url"])
replacement = '<a href="%s">%s</a>' % (media["expanded_url"], media["display_url"])
if not replaced.get(media["url"]):
tweet_text = tweet_text.replace(media["url"], replacement)
replaced[media["url"]] = True
2018-04-01 13:24:15 -07:00
bitrate = 0
chosen_variant = None
2024-04-24 09:43:56 -04:00
for variant in media["video_info"]["variants"]:
2018-04-01 13:24:15 -07:00
if not chosen_variant:
chosen_variant = variant
2024-04-24 09:43:56 -04:00
if variant.get("bitrate", 0) > bitrate:
bitrate = variant["bitrate"]
2018-04-01 13:24:15 -07:00
chosen_variant = variant
if chosen_variant:
2024-04-24 09:43:56 -04:00
entities += (
'<video src="%s" autoplay loop muted playsinline controls> <hr>'
% chosen_variant["url"]
)
categories.add(media["type"])
# Replace all shortened urls with their full urls
2024-04-24 09:43:56 -04:00
for url in content_tweet["entities"].get("urls", []):
if url["url"] in tweet_text:
replacement = '<a href="%s">%s</a>' % (url["expanded_url"], url["display_url"])
if not replaced.get(url["url"]):
tweet_text = tweet_text.replace(url["url"], replacement)
replaced[url["url"]] = True
tweet_title = tweet_title.replace(url["url"], url["display_url"])
# Replace @username's with an <a> link
for word in re.findall("@\w+", tweet_text, re.MULTILINE):
2024-04-24 09:43:56 -04:00
replacement = '<a href="https://twitter.com/%s">%s</a>' % (word[1:], word)
tweet_text = tweet_text.replace(word, replacement)
2024-04-24 09:43:56 -04:00
quote_tweet_content = ""
2024-04-24 09:43:56 -04:00
if "quoted_status" in content_tweet:
quote_tweet_content = (
"<blockquote>"
+ self.tweet_story(content_tweet["quoted_status"].__dict__)["description"]
+ "</blockquote>"
)
created_date = content_tweet["created_at"]
2020-06-19 02:27:48 -04:00
if isinstance(created_date, str):
created_date = dateutil.parser.parse(created_date)
2024-04-24 09:43:56 -04:00
content = """<div class="NB-twitter-rss">
<div class="NB-twitter-rss-tweet">%s</div>
<div class="NB-twitter-rss-quote-tweet">%s</div>
<hr />
<div class="NB-twitter-rss-entities">%s</div>
<div class="NB-twitter-rss-author">
Posted by
<a href="https://twitter.com/%s"><img src="%s" style="height: 32px" /></a>
<a href="https://twitter.com/%s">%s</a>
on <a href="%s">%s</a></div>
<div class="NB-twitter-rss-retweet">%s</div>
<div class="NB-twitter-rss-stats">%s %s%s %s</div>
</div>""" % (
tweet_text,
quote_tweet_content,
entities,
author_screen_name,
2024-04-24 09:43:56 -04:00
author["profile_image_url_https"],
author_screen_name,
author_fullname,
tweet_link,
2024-04-24 09:43:56 -04:00
DateFormat(created_date).format("l, F jS, Y g:ia").replace(".", ""),
retweet_author,
2024-04-24 09:43:56 -04:00
("<br /><br />" if content_tweet["favorite_count"] or content_tweet["retweet_count"] else ""),
(
"<b>%s</b> %s"
% (
content_tweet["favorite_count"],
"like" if content_tweet["favorite_count"] == 1 else "likes",
)
)
if content_tweet["favorite_count"]
else "",
(", " if content_tweet["favorite_count"] and content_tweet["retweet_count"] else ""),
(
"<b>%s</b> %s"
% (
content_tweet["retweet_count"],
"retweet" if content_tweet["retweet_count"] == 1 else "retweets",
)
)
if content_tweet["retweet_count"]
else "",
)
2024-04-24 09:43:56 -04:00
2022-07-02 13:09:37 -04:00
# Remove unserializable control characters
2024-04-24 09:43:56 -04:00
content = re.sub(
r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x84\x86-\x9F\uFDD0-\uFDEF\uFFFE\uFFFF]", "", content
)
author_fullname = re.sub(
r"[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x84\x86-\x9F\uFDD0-\uFDEF\uFFFE\uFFFF]", "", author_fullname
)
story = {
2024-04-24 09:43:56 -04:00
"title": tweet_title,
"link": "https://twitter.com/%s/status/%s" % (original_author_screen_name, user_tweet["id"]),
"description": content,
"author_name": author_fullname,
"categories": list(categories),
"unique_id": "tweet:%s" % user_tweet["id"],
"pubdate": user_tweet["created_at"],
}
2024-04-24 09:43:56 -04:00
return story