diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 95a7a1c96..e584cbc48 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -52,6 +52,7 @@ from utils import json_functions as json from celery.exceptions import SoftTimeLimitExceeded from utils.twitter_fetcher import TwitterFetcher from utils.facebook_fetcher import FacebookFetcher +from utils.reddit_fetcher import RedditFetcher from utils.json_fetcher import JSONFetcher # from utils.feed_functions import mail_feed_error_to_admin @@ -152,7 +153,7 @@ class FetchFeed: ) return FEED_ERRHTTP, None self.fpf = feedparser.parse(facebook_feed) - elif re.match(r'(.*?)reddit.com/\w+/?$', qurl(address, remove=['_'])): + elif re.match(r'(.*?)reddit.com/(.*?)$', qurl(address, remove=['_'])): reddit_feed = self.fetch_reddit() if not reddit_feed: logging.debug( diff --git a/utils/reddit_fetcher.py b/utils/reddit_fetcher.py index 590dbcd97..b0881dcb8 100644 --- a/utils/reddit_fetcher.py +++ b/utils/reddit_fetcher.py @@ -25,11 +25,19 @@ class RedditFetcher: return self._api def fetch(self): - subreddit_name = self.extract_subreddit_name() - if not subreddit_name: - return - - subreddit = self.fetch_subreddit(subreddit_name) + # Common subreddits handled differently + # Home page + if self.feed.feed_address == "https://reddit.com/.rss": + subreddit = self.fetch_subreddit("popular") + elif self.feed.feed_address == "https://reddit.com/r/all.rss": + subreddit = self.fetch_subreddit("all") + elif self.feed.feed_address == "https://reddit.com/r/popular.rss": + subreddit = self.fetch_subreddit("popular") + else: + subreddit_name = self.extract_subreddit_name() + if not subreddit_name: + return + subreddit = self.fetch_subreddit(subreddit_name) data = {} data['title'] = subreddit.title @@ -91,7 +99,8 @@ class RedditFetcher: story_data = {} story_data['title'] = submission.title story_data['link'] = submission.url - story_data['description'] = submission.selftext + story_data['description'] = self.process_story_text(submission) + story_data['author_name'] = submission.author.name story_data['categories'] = [] story_data['unique_id'] = "reddit_post:%s" % submission.id story_data['pubdate'] = datetime.datetime.fromtimestamp(submission.created_utc) @@ -101,12 +110,40 @@ class RedditFetcher: story_data = {} story_data['title'] = submission.title story_data['link'] = submission.url - story_data['description'] = submission.selftext + story_data['description'] = self.process_story_text(submission) + story_data["author_name"] = submission.author.name story_data['categories'] = [] story_data['unique_id'] = "reddit_post:%s" % submission.id story_data['pubdate'] = datetime.datetime.fromtimestamp(submission.created_utc) return story_data + def process_story_text(self, submission): + text = submission.selftext + + # Wrap blocks with four spaces in
tags + text = re.sub(r'(^\s{4})(.*\n)', r'\2', text, flags=re.M) + # Wrap links in tags + text = re.sub(r'(https?://[^\s]+)', r'\1', text, flags=re.M) + # Wrap image links intags + text = re.sub(r'(https?://[^\s]+\.(jpg|jpeg|gif|png))', r'
', text, flags=re.M) + # Wrap bold text in tags + text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) + # Wrap italics text in tags + text = re.sub(r'\*(.*?)\*', r'\1', text) + # Replace newlines with
tags + + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + text = linebreaks(text) + + # Add author and [link] [comments] footer + permalink = submission.permalink + if submission.is_self: + permalink = submission.url + text = f'{text}\n\nPosted by {submission.author.name}
' + + return text + def favicon_url(self, subreddit=None): if not subreddit: subreddit_name = self.extract_subreddit_name()
[link] [comments]