import re import datetime import dateutil.parser from django.conf import settings from django.utils import feedgenerator from django.utils.html import linebreaks from apps.social.models import MSocialServices from apps.reader.models import UserSubscription from utils import log as logging from vendor.facebook import GraphAPIError class FacebookFetcher: def __init__(self, feed, options=None): self.feed = feed self.options = options or {} def fetch(self): page_name = self.extract_page_name() if not page_name: return facebook_user = self.facebook_user() if not facebook_user: return # If 'video', use video API to get embed: # f.get_object('tastyvegetarian', fields='posts') # f.get_object('1992797300790726', fields='embed_html') feed = self.fetch_page_feed(facebook_user, page_name, 'name,about,posts,videos,photos') data = {} data['title'] = feed.get('name', "%s on Facebook" % page_name) data['link'] = feed.get('link', "https://facebook.com/%s" % page_name) data['description'] = feed.get('about', "%s on Facebook" % page_name) data['lastBuildDate'] = datetime.datetime.utcnow() data['generator'] = 'NewsBlur Facebook API Decrapifier - %s' % settings.NEWSBLUR_URL data['docs'] = None data['feed_url'] = self.feed.feed_address rss = feedgenerator.Atom1Feed(**data) merged_data = [] posts = feed.get('posts', {}).get('data', None) if posts: for post in posts: story_data = self.page_posts_story(facebook_user, post) if not story_data: continue merged_data.append(story_data) videos = feed.get('videos', {}).get('data', None) if videos: for video in videos: story_data = self.page_video_story(facebook_user, video) if not story_data: continue for seen_data in merged_data: if story_data['link'] == seen_data['link']: # Video wins over posts (and attachments) seen_data['description'] = story_data['description'] seen_data['title'] = story_data['title'] break for story_data in merged_data: rss.add_item(**story_data) return rss.writeString('utf-8') def extract_page_name(self): page = None try: page_groups = re.search('facebook.com/(\w+)/?', self.feed.feed_address) if not page_groups: return page = page_groups.group(1) except IndexError: return return page def facebook_user(self): facebook_api = None social_services = None if self.options.get('requesting_user_id', None): social_services = MSocialServices.get_user(self.options.get('requesting_user_id')) facebook_api = social_services.facebook_api() if not facebook_api: logging.debug(' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' % (self.feed.log_title[:30], self.feed.feed_address, self.options)) return else: usersubs = UserSubscription.objects.filter(feed=self.feed) if not usersubs: logging.debug(' ***> [%-30s] ~FRFacebook fetch failed: %s: No subscriptions' % (self.feed.log_title[:30], self.feed.feed_address)) return for sub in usersubs: social_services = MSocialServices.get_user(sub.user_id) if not social_services.facebook_uid: continue facebook_api = social_services.facebook_api() if not facebook_api: continue else: break if not facebook_api: logging.debug(' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' % (self.feed.log_title[:30], self.feed.feed_address, usersubs[0].user.username)) return return facebook_api def fetch_page_feed(self, facebook_user, page, fields): try: stories = facebook_user.get_object(page, fields=fields) except GraphAPIError as e: message = str(e).lower() if 'session has expired' in message: logging.debug(' ***> [%-30s] ~FRFacebook page failed/expired, disconnecting facebook: %s: %s' % (self.feed.log_title[:30], self.feed.feed_address, e)) self.feed.save_feed_history(560, "Facebook Error: Expired token") return {} if not stories: return {} return stories def page_posts_story(self, facebook_user, page_story): categories = set() if 'message' not in page_story: # Probably a story shared on the page's timeline, not a published story return message = linebreaks(page_story['message']) created_date = page_story['created_time'] if isinstance(created_date, str): created_date = dateutil.parser.parse(created_date) fields = facebook_user.get_object(page_story['id'], fields='permalink_url,link,attachments') permalink = fields.get('link', fields['permalink_url']) attachments_html = "" if fields.get('attachments', None) and fields['attachments']['data']: for attachment in fields['attachments']['data']: if 'media' in attachment: attachments_html += "" % attachment['media']['image']['src'] if attachment.get('subattachments', None): for subattachment in attachment['subattachments']['data']: attachments_html += "" % subattachment['media']['image']['src'] content = """
%s
%s
""" % ( message, attachments_html ) story = { 'title': message, 'link': permalink, 'description': content, 'categories': list(categories), 'unique_id': "fb_post:%s" % page_story['id'], 'pubdate': created_date, } return story def page_video_story(self, facebook_user, page_story): categories = set() if 'description' not in page_story: return message = linebreaks(page_story['description']) created_date = page_story['updated_time'] if isinstance(created_date, str): created_date = dateutil.parser.parse(created_date) permalink = facebook_user.get_object(page_story['id'], fields='permalink_url')['permalink_url'] embed_html = facebook_user.get_object(page_story['id'], fields='embed_html') if permalink.startswith('/'): permalink = "https://www.facebook.com%s" % permalink content = """
%s
%s
""" % ( message, embed_html.get('embed_html', '') ) story = { 'title': page_story.get('story', message), 'link': permalink, 'description': content, 'categories': list(categories), 'unique_id': "fb_post:%s" % page_story['id'], 'pubdate': created_date, } return story def favicon_url(self): page_name = self.extract_page_name() facebook_user = self.facebook_user() if not facebook_user: logging.debug(' ***> [%-30s] ~FRFacebook icon failed, disconnecting facebook: %s' % (self.feed.log_title[:30], self.feed.feed_address)) return try: picture_data = facebook_user.get_object(page_name, fields='picture') except GraphAPIError as e: message = str(e).lower() if 'session has expired' in message: logging.debug(' ***> [%-30s] ~FRFacebook icon failed/expired, disconnecting facebook: %s: %s' % (self.feed.log_title[:30], self.feed.feed_address, e)) return if 'picture' in picture_data: return picture_data['picture']['data']['url']