diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py
index f6586bbf4..a4e478ca7 100644
--- a/apps/rss_feeds/models.py
+++ b/apps/rss_feeds/models.py
@@ -410,6 +410,8 @@ class Feed(models.Model):
return cls.objects.get(feed_address=url)
if url and re.match('(https?://)?twitter.com/\w+/?$', url):
without_rss = True
+ if url and re.match(r'(https?://)?(www\.)?facebook.com/\w+/?$', url):
+ without_rss = True
if url and 'youtube.com/user/' in url:
username = re.search('youtube.com/user/(\w+)', url).group(1)
url = "http://gdata.youtube.com/feeds/base/users/%s/uploads" % username
diff --git a/apps/social/models.py b/apps/social/models.py
index 72be26611..174744aea 100644
--- a/apps/social/models.py
+++ b/apps/social/models.py
@@ -2581,10 +2581,10 @@ class MSocialServices(mongo.Document):
self.syncing_facebook = False
self.save()
- facebook_user = graph.request('me', args={'fields':'website,bio,location'})
+ facebook_user = graph.request('me', args={'fields':'website,about,location'})
profile = MSocialProfile.get_user(self.user_id)
profile.location = profile.location or (facebook_user.get('location') and facebook_user['location']['name'])
- profile.bio = profile.bio or facebook_user.get('bio')
+ profile.bio = profile.bio or facebook_user.get('about')
if not profile.website and facebook_user.get('website'):
profile.website = facebook_user.get('website').split()[0]
profile.save()
diff --git a/utils/facebook_fetcher.py b/utils/facebook_fetcher.py
index b4c5a4e6a..550ae881d 100644
--- a/utils/facebook_fetcher.py
+++ b/utils/facebook_fetcher.py
@@ -1,11 +1,9 @@
import re
import datetime
-import tweepy
import dateutil.parser
from django.conf import settings
from django.utils import feedgenerator
from django.utils.html import linebreaks
-from django.utils.dateformat import DateFormat
from apps.social.models import MSocialServices
from apps.reader.models import UserSubscription
from utils import log as logging
@@ -21,57 +19,77 @@ class FacebookFetcher:
address = self.feed.feed_address
self.address = address
- page = self.extract_page()
- if not page:
+ page_name = self.extract_page_name()
+ if not page_name:
return
- facebook_user = self.fetch_user(username)
+ facebook_user = self.facebook_user()
if not facebook_user:
return
# If 'video', use video API to get embed:
# f.get_object('tastyvegetarian', fields='posts')
# f.get_object('1992797300790726', fields='embed_html')
- stories = self.page_feed(page, facebook_user)
+ feed = self.fetch_page_feed(facebook_user, page_name, 'name,about,posts,videos,photos')
data = {}
- data['title'] = "%s on Facebook" % page
- data['link'] = "https://facebook.com/%s" % page
- data['description'] = "%s on Facebook" % page
+ data['title'] = feed.get('name', "%s on Facebook" % page_name)
+ data['link'] = feed.get('link', "https://facebook.com/%s" % page_name)
+ data['description'] = feed.get('about', "%s on Facebook" % page_name)
data['lastBuildDate'] = datetime.datetime.utcnow()
data['generator'] = 'NewsBlur Facebook API Decrapifier - %s' % settings.NEWSBLUR_URL
data['docs'] = None
data['feed_url'] = address
rss = feedgenerator.Atom1Feed(**data)
+ merged_data = []
- for story in stories:
- story_data = self.page_feed_story(story.__dict__)
+ posts = feed.get('posts', {}).get('data', None)
+ if posts:
+ for post in posts:
+ story_data = self.page_posts_story(facebook_user, post)
+ if not story_data:
+ continue
+ merged_data.append(story_data)
+
+ videos = feed.get('videos', {}).get('data', None)
+ for video in videos:
+ story_data = self.page_video_story(facebook_user, video)
+ if not story_data:
+ continue
+ for seen_data in merged_data:
+ if story_data['link'] == seen_data['link']:
+ if len(story_data['description']) > len(seen_data['description']):
+ seen_data['description'] = story_data['description']
+ seen_data['title'] = story_data['title']
+ break
+
+ for story_data in merged_data:
rss.add_item(**story_data)
return rss.writeString('utf-8')
- def extract_page(self):
+ def extract_page_name(self):
page = None
try:
- username_groups = re.search('twitter.com/(\w+)/?', self.address)
- if not username_groups:
+ page_groups = re.search('facebook.com/(\w+)/?', self.address)
+ if not page_groups:
return
- username = username_groups.group(1)
+ page = page_groups.group(1)
except IndexError:
return
- return username
+ return page
- def fetch_user(self, username):
- twitter_api = None
+ def facebook_user(self):
+ facebook_api = None
social_services = None
+
if self.options.get('requesting_user_id', None):
social_services = MSocialServices.get_user(self.options.get('requesting_user_id'))
- try:
- twitter_api = social_services.twitter_api()
- except tweepy.error.TweepError, e:
- logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
+ facebook_api = social_services.facebook_api()
+ if not facebook_api:
+ logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' %
+ (self.feed.log_title[:30], self.address, self.options))
return
else:
usersubs = UserSubscription.objects.filter(feed=self.feed)
@@ -79,210 +97,91 @@ class FacebookFetcher:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No subscriptions' %
(self.feed.log_title[:30], self.address))
return
+
for sub in usersubs:
social_services = MSocialServices.get_user(sub.user_id)
- if not social_services.twitter_uid: continue
- try:
- twitter_api = social_services.twitter_api()
- if not twitter_api:
- continue
- else:
- break
- except tweepy.error.TweepError, e:
- logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
+ if not social_services.facebook_uid:
continue
+
+ facebook_api = social_services.facebook_api()
+ if not facebook_api:
+ continue
+ else:
+ break
- if not twitter_api:
- logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No twitter API for %s' %
- (self.feed.log_title[:30], self.address, usersubs[0].user.username))
- return
+ if not facebook_api:
+ logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' %
+ (self.feed.log_title[:30], self.address, usersubs[0].user.username))
+ return
- try:
- twitter_user = twitter_api.get_user(username)
- except TypeError, e:
- logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: %s" % (e))
- return
- except tweepy.error.TweepError, e:
- message = str(e).lower()
- if ((len(e.args) >= 2 and e.args[2] == 63) or
- ('temporarily locked' in message)):
- # Suspended
- logging.debug(u' ***> [%-30s] ~FRTwitter failed, user suspended, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: User suspended")
- return
- elif 'suspended' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: User suspended")
- return
- elif 'expired token' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter user expired, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: Expired token")
- social_services.disconnect_twitter()
- return
- elif 'not found' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: User not found")
- return
- elif 'over capacity' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(460, "Twitter Error: Over capacity")
- return
- else:
- raise e
-
- return twitter_user
+ return facebook_api
- def page_feed(self, facebook_user, page):
- try:
- stories = facebook_user.get_object(page, fields='feed')
- except Exception, e:
- message = str(e).lower()
- if 'not authorized' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter timeline failed, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: Not authorized")
- return []
- elif 'user not found' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
- (self.feed.log_title[:30], self.address, e))
- self.feed.save_feed_history(560, "Twitter Error: User not found")
- return []
- elif 'blocked from viewing' in message:
- logging.debug(u' ***> [%-30s] ~FRTwitter user blocked, ignoring: %s' %
- (self.feed.log_title[:30], e))
- self.feed.save_feed_history(560, "Twitter Error: Blocked from viewing")
- return []
- else:
- raise e
+ def fetch_page_feed(self, facebook_user, page, fields):
+ stories = facebook_user.get_object(page, fields=fields)
if not stories:
return []
- return stories
-
- def page_feed_story(self, story):
- categories = set()
-
- if user_tweet['full_text'].startswith('RT @'):
- categories.add('retweet')
- elif user_tweet['in_reply_to_status_id'] or user_tweet['full_text'].startswith('@'):
- categories.add('reply')
- else:
- categories.add('tweet')
- if user_tweet['full_text'].startswith('RT @'):
- categories.add('retweet')
- if user_tweet['favorite_count']:
- categories.add('liked')
- if user_tweet['retweet_count']:
- categories.add('retweeted')
- if 'http' in user_tweet['full_text']:
- categories.add('link')
-
- story = {}
- content_tweet = user_tweet
- entities = ""
- author = user_tweet.get('author') or user_tweet.get('user')
- if not isinstance(author, dict): author = author.__dict__
- author_name = author['screen_name']
- original_author_name = author_name
- if user_tweet['in_reply_to_user_id'] == author['id']:
- categories.add('reply-to-self')
- retweet_author = ""
- if 'retweeted_status' in user_tweet:
- retweet_author = """Retweeted by
-
- %s
- on %s""" % (
- author_name,
- author['profile_image_url_https'],
- author_name,
- author_name,
- DateFormat(user_tweet['created_at']).format('l, F jS, Y g:ia').replace('.',''),
- )
- content_tweet = user_tweet['retweeted_status'].__dict__
- author = content_tweet['author']
- if not isinstance(author, dict): author = author.__dict__
- author_name = author['screen_name']
-
- tweet_title = user_tweet['full_text']
- tweet_text = linebreaks(content_tweet['full_text'])
- replaced = {}
- entities_media = content_tweet['entities'].get('media', [])
- if 'extended_entities' in content_tweet:
- entities_media = content_tweet['extended_entities'].get('media', [])
- for media in entities_media:
- if 'media_url_https' not in media: continue
- if media['type'] == 'photo':
- if media.get('url') and media['url'] in tweet_text:
- tweet_title = tweet_title.replace(media['url'], media['display_url'])
- replacement = "%s" % (media['expanded_url'], media['display_url'])
- if not replaced.get(media['url']):
- tweet_text = tweet_text.replace(media['url'], replacement)
- replaced[media['url']] = True
- entities += "
"+self.tweet_story(content_tweet['quoted_status'])['description']+"" - - - created_date = content_tweet['created_at'] + return stories + + def page_posts_story(self, facebook_user, page_story): + categories = set() + if 'message' not in page_story: + # Probably a story shared on the page's timeline, not a published story + return + message = linebreaks(page_story['message']) + created_date = page_story['created_time'] if isinstance(created_date, unicode): created_date = dateutil.parser.parse(created_date) - - content = """