NewsBlur-viq/utils/facebook_fetcher.py

236 lines
8.8 KiB
Python
Raw Normal View History

import re
import datetime
import dateutil.parser
from django.conf import settings
from django.utils import feedgenerator
from django.utils.html import linebreaks
from apps.social.models import MSocialServices
from apps.reader.models import UserSubscription
from utils import log as logging
2018-03-26 19:13:30 -07:00
from vendor.facebook import GraphAPIError
2024-04-24 09:43:56 -04:00
class FacebookFetcher:
def __init__(self, feed, options=None):
self.feed = feed
self.options = options or {}
2024-04-24 09:43:56 -04:00
def fetch(self):
page_name = self.extract_page_name()
2024-04-24 09:43:56 -04:00
if not page_name:
return
facebook_user = self.facebook_user()
if not facebook_user:
return
2024-04-24 09:43:56 -04:00
# If 'video', use video API to get embed:
# f.get_object('tastyvegetarian', fields='posts')
# f.get_object('1992797300790726', fields='embed_html')
2024-04-24 09:43:56 -04:00
feed = self.fetch_page_feed(facebook_user, page_name, "name,about,posts,videos,photos")
data = {}
2024-04-24 09:43:56 -04:00
data["title"] = feed.get("name", "%s on Facebook" % page_name)
data["link"] = feed.get("link", "https://facebook.com/%s" % page_name)
data["description"] = feed.get("about", "%s on Facebook" % page_name)
data["lastBuildDate"] = datetime.datetime.utcnow()
data["generator"] = "NewsBlur Facebook API Decrapifier - %s" % settings.NEWSBLUR_URL
data["docs"] = None
data["feed_url"] = self.feed.feed_address
rss = feedgenerator.Atom1Feed(**data)
merged_data = []
2024-04-24 09:43:56 -04:00
posts = feed.get("posts", {}).get("data", None)
if posts:
for post in posts:
story_data = self.page_posts_story(facebook_user, post)
if not story_data:
continue
merged_data.append(story_data)
2024-04-24 09:43:56 -04:00
videos = feed.get("videos", {}).get("data", None)
2018-03-26 19:18:46 -07:00
if videos:
for video in videos:
story_data = self.page_video_story(facebook_user, video)
if not story_data:
continue
for seen_data in merged_data:
2024-04-24 09:43:56 -04:00
if story_data["link"] == seen_data["link"]:
2018-03-26 19:18:46 -07:00
# Video wins over posts (and attachments)
2024-04-24 09:43:56 -04:00
seen_data["description"] = story_data["description"]
seen_data["title"] = story_data["title"]
2018-03-26 19:18:46 -07:00
break
2024-04-24 09:43:56 -04:00
for story_data in merged_data:
rss.add_item(**story_data)
2024-04-24 09:43:56 -04:00
return rss.writeString("utf-8")
def extract_page_name(self):
page = None
try:
2024-04-24 09:43:56 -04:00
page_groups = re.search("facebook.com/(\w+)/?", self.feed.feed_address)
if not page_groups:
return
page = page_groups.group(1)
except IndexError:
return
2024-04-24 09:43:56 -04:00
return page
2024-04-24 09:43:56 -04:00
def facebook_user(self):
facebook_api = None
social_services = None
2024-04-24 09:43:56 -04:00
if self.options.get("requesting_user_id", None):
social_services = MSocialServices.get_user(self.options.get("requesting_user_id"))
facebook_api = social_services.facebook_api()
if not facebook_api:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s"
% (self.feed.log_title[:30], self.feed.feed_address, self.options)
)
return
else:
usersubs = UserSubscription.objects.filter(feed=self.feed)
if not usersubs:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook fetch failed: %s: No subscriptions"
% (self.feed.log_title[:30], self.feed.feed_address)
)
return
for sub in usersubs:
social_services = MSocialServices.get_user(sub.user_id)
2024-04-24 09:43:56 -04:00
if not social_services.facebook_uid:
continue
facebook_api = social_services.facebook_api()
2024-04-24 09:43:56 -04:00
if not facebook_api:
continue
else:
break
2024-04-24 09:43:56 -04:00
if not facebook_api:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s"
% (self.feed.log_title[:30], self.feed.feed_address, usersubs[0].user.username)
)
return
2024-04-24 09:43:56 -04:00
return facebook_api
2024-04-24 09:43:56 -04:00
def fetch_page_feed(self, facebook_user, page, fields):
2018-03-26 19:12:33 -07:00
try:
stories = facebook_user.get_object(page, fields=fields)
2020-06-13 13:13:20 -04:00
except GraphAPIError as e:
2018-03-26 19:12:33 -07:00
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "session has expired" in message:
logging.debug(
" ***> [%-30s] ~FRFacebook page failed/expired, disconnecting facebook: %s: %s"
% (self.feed.log_title[:30], self.feed.feed_address, e)
)
2018-03-26 19:12:33 -07:00
self.feed.save_feed_history(560, "Facebook Error: Expired token")
2018-03-26 19:18:07 -07:00
return {}
2024-04-24 09:43:56 -04:00
if not stories:
2018-03-26 19:18:07 -07:00
return {}
return stories
2024-04-24 09:43:56 -04:00
def page_posts_story(self, facebook_user, page_story):
categories = set()
2024-04-24 09:43:56 -04:00
if "message" not in page_story:
# Probably a story shared on the page's timeline, not a published story
return
2024-04-24 09:43:56 -04:00
message = linebreaks(page_story["message"])
created_date = page_story["created_time"]
2020-06-13 13:13:20 -04:00
if isinstance(created_date, str):
created_date = dateutil.parser.parse(created_date)
2024-04-24 09:43:56 -04:00
fields = facebook_user.get_object(page_story["id"], fields="permalink_url,link,attachments")
permalink = fields.get("link", fields["permalink_url"])
attachments_html = ""
2024-04-24 09:43:56 -04:00
if fields.get("attachments", None) and fields["attachments"]["data"]:
for attachment in fields["attachments"]["data"]:
if "media" in attachment:
attachments_html += '<img src="%s" />' % attachment["media"]["image"]["src"]
if attachment.get("subattachments", None):
for subattachment in attachment["subattachments"]["data"]:
attachments_html += '<img src="%s" />' % subattachment["media"]["image"]["src"]
content = """<div class="NB-facebook-rss">
<div class="NB-facebook-rss-message">%s</div>
<div class="NB-facebook-rss-picture">%s</div>
</div>""" % (
message,
2024-04-24 09:43:56 -04:00
attachments_html,
)
2024-04-24 09:43:56 -04:00
story = {
2024-04-24 09:43:56 -04:00
"title": message,
"link": permalink,
"description": content,
"categories": list(categories),
"unique_id": "fb_post:%s" % page_story["id"],
"pubdate": created_date,
}
2024-04-24 09:43:56 -04:00
return story
2024-04-24 09:43:56 -04:00
def page_video_story(self, facebook_user, page_story):
categories = set()
2024-04-24 09:43:56 -04:00
if "description" not in page_story:
return
2024-04-24 09:43:56 -04:00
message = linebreaks(page_story["description"])
created_date = page_story["updated_time"]
2020-06-13 13:13:20 -04:00
if isinstance(created_date, str):
created_date = dateutil.parser.parse(created_date)
2024-04-24 09:43:56 -04:00
permalink = facebook_user.get_object(page_story["id"], fields="permalink_url")["permalink_url"]
embed_html = facebook_user.get_object(page_story["id"], fields="embed_html")
if permalink.startswith("/"):
permalink = "https://www.facebook.com%s" % permalink
2024-04-24 09:43:56 -04:00
content = """<div class="NB-facebook-rss">
<div class="NB-facebook-rss-message">%s</div>
<div class="NB-facebook-rss-embed">%s</div>
</div>""" % (
message,
2024-04-24 09:43:56 -04:00
embed_html.get("embed_html", ""),
)
2024-04-24 09:43:56 -04:00
story = {
2024-04-24 09:43:56 -04:00
"title": page_story.get("story", message),
"link": permalink,
"description": content,
"categories": list(categories),
"unique_id": "fb_post:%s" % page_story["id"],
"pubdate": created_date,
}
2024-04-24 09:43:56 -04:00
return story
2024-04-24 09:43:56 -04:00
def favicon_url(self):
page_name = self.extract_page_name()
facebook_user = self.facebook_user()
2018-07-24 10:00:29 -04:00
if not facebook_user:
2024-04-24 09:43:56 -04:00
logging.debug(
" ***> [%-30s] ~FRFacebook icon failed, disconnecting facebook: %s"
% (self.feed.log_title[:30], self.feed.feed_address)
)
2018-07-24 10:00:29 -04:00
return
2024-04-24 09:43:56 -04:00
2018-03-26 19:15:40 -07:00
try:
2024-04-24 09:43:56 -04:00
picture_data = facebook_user.get_object(page_name, fields="picture")
2020-06-13 13:13:20 -04:00
except GraphAPIError as e:
2018-03-26 19:15:40 -07:00
message = str(e).lower()
2024-04-24 09:43:56 -04:00
if "session has expired" in message:
logging.debug(
" ***> [%-30s] ~FRFacebook icon failed/expired, disconnecting facebook: %s: %s"
% (self.feed.log_title[:30], self.feed.feed_address, e)
)
2018-03-26 19:15:40 -07:00
return
2024-04-24 09:43:56 -04:00
if "picture" in picture_data:
return picture_data["picture"]["data"]["url"]