NewsBlur-viq/utils/facebook_fetcher.py

238 lines
8.8 KiB
Python
Raw Permalink Normal View History

import datetime
2024-04-24 09:50:42 -04:00
import re
import dateutil.parser
from django.conf import settings
from django.utils import feedgenerator
from django.utils.html import linebreaks
2024-04-24 09:50:42 -04:00
from apps.reader.models import UserSubscription
2024-04-24 09:50:42 -04:00
from apps.social.models import MSocialServices
from utils import log as logging
2018-03-26 19:13:30 -07:00
from vendor.facebook import GraphAPIError
2024-04-24 09:43:56 -04:00
class FacebookFetcher:
def __init__(self, feed, options=None):
self.feed = feed
self.options = options or {}
2024-04-24 09:43:56 -04:00
def fetch(self):
page_name = self.extract_page_name()
2024-04-24 09:43:56 -04:00
if not page_name:
return
facebook_user = self.facebook_user()
if not facebook_user:
return
2024-04-24 09:43:56 -04:00
# If 'video', use video API to get embed:
# f.get_object('tastyvegetarian', fields='posts')
# f.get_object('1992797300790726', fields='embed_html')
2024-04-24 09:43:56 -04:00
feed = self.fetch_page_feed(facebook_user, page_name, "name,about,posts,videos,photos")
data = {}
2024-04-24 09:43:56 -04:00
data["title"] = feed.get("name", "%s on Facebook" % page_name)
data["link"] = feed.get("link", "https://facebook.com/%s" % page_name)
data["description"] = feed.get("about", "%s on Facebook" % page_name)
data["lastBuildDate"] = datetime.datetime.utcnow()
data["generator"] = "NewsBlur Facebook API Decrapifier - %s" % settings.NEWSBLUR_URL
data["docs"] = None
data["feed_url"] = self.feed.feed_address
rss = feedgenerator.Atom1Feed(**data)
merged_data = []
2024-04-24 09:43:56 -04:00
posts = feed.get("posts", {}).get("data", None)
if posts:
for post in posts:
story_data = self.page_posts_story(facebook_user, post)
if not story_data:
continue
merged_data.append(story_data)
2024-04-24 09:43:56 -04:00
videos = feed.get("videos", {}).get("data", None)
2018-03-26 19:18:46 -07:00
if videos:
for video in videos:
story_data = self.page_video_story(facebook_user, video)
if not story_data:
continue
for seen_data in merged_data:
2024-04-24 09:43:56 -04:00
if story_data["link"] == seen_data["link"]:
2018-03-26 19:18:46 -07:00
# Video wins over posts (and attachments)
2024-04-24 09:43:56 -04:00
seen_data["description"] = story_data["description"]
seen_data["title"] = story_data["title"]
2018-03-26 19:18:46 -07:00
break
2024-04-24 09:43:56 -04:00
for story_data in merged_data:
rss.add_item(**story_data)
2024-04-24 09:43:56 -04:00
return rss.writeString("utf-8")
def extract_page_name(self):
page = None
try:
2024-04-24 09:43:56 -04:00
page_groups = re.search("facebook.com/(\w+)/?", self.feed.feed_address)
if not page_groups:
return
page = page_groups.group(1)
except IndexError:
return
2024-04-24 09:43:56 -04:00
return page
2024-04-24 09:43:56 -04:00
def facebook_user(self):
facebook_api = None
social_services = None
2024-04-24 09:43:56 -04:00
if self.options.get("requesting_user_id", None):
social_services = MSocialServices.get_user(self.options.get("requesting_user_id"))
facebook_api = social_services.facebook_api()
if not facebook_api:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s"
% (self.feed.log_title[:30], self.feed.feed_address, self.options)
)
return
else:
usersubs = UserSubscription.objects.filter(feed=self.feed)
if not usersubs:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook fetch failed: %s: No subscriptions"
% (self.feed.log_title[:30], self.feed.feed_address)
)
return
for sub in usersubs:
social_services = MSocialServices.get_user(sub.user_id)
2024-04-24 09:43:56 -04:00
if not social_services.facebook_uid:
continue
facebook_api = social_services.facebook_api()
2024-04-24 09:43:56 -04:00
if not facebook_api:
continue
else:
break
2024-04-24 09:43:56 -04:00
if not facebook_api:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s"
% (self.feed.log_title[:30], self.feed.feed_address, usersubs[0].user.username)
)
return
2024-04-24 09:43:56 -04:00
return facebook_api
2024-04-24 09:43:56 -04:00
def fetch_page_feed(self, facebook_user, page, fields):
2018-03-26 19:12:33 -07:00
try:
stories = facebook_user.get_object(page, fields=fields)
2020-06-13 13:13:20 -04:00
except GraphAPIError as e:
2018-03-26 19:12:33 -07:00
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "session has expired" in message:
logging.debug(
" ***> [%-30s] ~FRFacebook page failed/expired, disconnecting facebook: %s: %s"
% (self.feed.log_title[:30], self.feed.feed_address, e)
)
2018-03-26 19:12:33 -07:00
self.feed.save_feed_history(560, "Facebook Error: Expired token")
2018-03-26 19:18:07 -07:00
return {}
2024-04-24 09:43:56 -04:00
if not stories:
2018-03-26 19:18:07 -07:00
return {}
return stories
2024-04-24 09:43:56 -04:00
def page_posts_story(self, facebook_user, page_story):
categories = set()
2024-04-24 09:43:56 -04:00
if "message" not in page_story:
# Probably a story shared on the page's timeline, not a published story
return
2024-04-24 09:43:56 -04:00
message = linebreaks(page_story["message"])
created_date = page_story["created_time"]
2020-06-13 13:13:20 -04:00
if isinstance(created_date, str):
created_date = dateutil.parser.parse(created_date)
2024-04-24 09:43:56 -04:00
fields = facebook_user.get_object(page_story["id"], fields="permalink_url,link,attachments")
permalink = fields.get("link", fields["permalink_url"])
attachments_html = ""
2024-04-24 09:43:56 -04:00
if fields.get("attachments", None) and fields["attachments"]["data"]:
for attachment in fields["attachments"]["data"]:
if "media" in attachment:
attachments_html += '<img src="%s" />' % attachment["media"]["image"]["src"]
if attachment.get("subattachments", None):
for subattachment in attachment["subattachments"]["data"]:
attachments_html += '<img src="%s" />' % subattachment["media"]["image"]["src"]
content = """<div class="NB-facebook-rss">
<div class="NB-facebook-rss-message">%s</div>
<div class="NB-facebook-rss-picture">%s</div>
</div>""" % (
message,
2024-04-24 09:43:56 -04:00
attachments_html,
)
2024-04-24 09:43:56 -04:00
story = {
2024-04-24 09:43:56 -04:00
"title": message,
"link": permalink,
"description": content,
"categories": list(categories),
"unique_id": "fb_post:%s" % page_story["id"],
"pubdate": created_date,
}
2024-04-24 09:43:56 -04:00
return story
2024-04-24 09:43:56 -04:00
def page_video_story(self, facebook_user, page_story):
categories = set()
2024-04-24 09:43:56 -04:00
if "description" not in page_story:
return
2024-04-24 09:43:56 -04:00
message = linebreaks(page_story["description"])
created_date = page_story["updated_time"]
2020-06-13 13:13:20 -04:00
if isinstance(created_date, str):
created_date = dateutil.parser.parse(created_date)
2024-04-24 09:43:56 -04:00
permalink = facebook_user.get_object(page_story["id"], fields="permalink_url")["permalink_url"]
embed_html = facebook_user.get_object(page_story["id"], fields="embed_html")
if permalink.startswith("/"):
permalink = "https://www.facebook.com%s" % permalink
2024-04-24 09:43:56 -04:00
content = """<div class="NB-facebook-rss">
<div class="NB-facebook-rss-message">%s</div>
<div class="NB-facebook-rss-embed">%s</div>
</div>""" % (
message,
2024-04-24 09:43:56 -04:00
embed_html.get("embed_html", ""),
)
2024-04-24 09:43:56 -04:00
story = {
2024-04-24 09:43:56 -04:00
"title": page_story.get("story", message),
"link": permalink,
"description": content,
"categories": list(categories),
"unique_id": "fb_post:%s" % page_story["id"],
"pubdate": created_date,
}
2024-04-24 09:43:56 -04:00
return story
2024-04-24 09:43:56 -04:00
def favicon_url(self):
page_name = self.extract_page_name()
facebook_user = self.facebook_user()
2018-07-24 10:00:29 -04:00
if not facebook_user:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook icon failed, disconnecting facebook: %s"
% (self.feed.log_title[:30], self.feed.feed_address)
)
2018-07-24 10:00:29 -04:00
return
2024-04-24 09:43:56 -04:00
2018-03-26 19:15:40 -07:00
try:
2024-04-24 09:43:56 -04:00
picture_data = facebook_user.get_object(page_name, fields="picture")
2020-06-13 13:13:20 -04:00
except GraphAPIError as e:
2018-03-26 19:15:40 -07:00
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "session has expired" in message:
logging.debug(
" ***> [%-30s] ~FRFacebook icon failed/expired, disconnecting facebook: %s: %s"
% (self.feed.log_title[:30], self.feed.feed_address, e)
)
2018-03-26 19:15:40 -07:00
return
2024-04-24 09:43:56 -04:00
if "picture" in picture_data:
return picture_data["picture"]["data"]["url"]