mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Adding videos to facebook posts. Seems to work pretty well.
This commit is contained in:
parent
fdce59bc2c
commit
da4741a3fa
5 changed files with 128 additions and 212 deletions
|
@ -410,6 +410,8 @@ class Feed(models.Model):
|
|||
return cls.objects.get(feed_address=url)
|
||||
if url and re.match('(https?://)?twitter.com/\w+/?$', url):
|
||||
without_rss = True
|
||||
if url and re.match(r'(https?://)?(www\.)?facebook.com/\w+/?$', url):
|
||||
without_rss = True
|
||||
if url and 'youtube.com/user/' in url:
|
||||
username = re.search('youtube.com/user/(\w+)', url).group(1)
|
||||
url = "http://gdata.youtube.com/feeds/base/users/%s/uploads" % username
|
||||
|
|
|
@ -2581,10 +2581,10 @@ class MSocialServices(mongo.Document):
|
|||
self.syncing_facebook = False
|
||||
self.save()
|
||||
|
||||
facebook_user = graph.request('me', args={'fields':'website,bio,location'})
|
||||
facebook_user = graph.request('me', args={'fields':'website,about,location'})
|
||||
profile = MSocialProfile.get_user(self.user_id)
|
||||
profile.location = profile.location or (facebook_user.get('location') and facebook_user['location']['name'])
|
||||
profile.bio = profile.bio or facebook_user.get('bio')
|
||||
profile.bio = profile.bio or facebook_user.get('about')
|
||||
if not profile.website and facebook_user.get('website'):
|
||||
profile.website = facebook_user.get('website').split()[0]
|
||||
profile.save()
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
import re
|
||||
import datetime
|
||||
import tweepy
|
||||
import dateutil.parser
|
||||
from django.conf import settings
|
||||
from django.utils import feedgenerator
|
||||
from django.utils.html import linebreaks
|
||||
from django.utils.dateformat import DateFormat
|
||||
from apps.social.models import MSocialServices
|
||||
from apps.reader.models import UserSubscription
|
||||
from utils import log as logging
|
||||
|
@ -21,57 +19,77 @@ class FacebookFetcher:
|
|||
address = self.feed.feed_address
|
||||
self.address = address
|
||||
|
||||
page = self.extract_page()
|
||||
if not page:
|
||||
page_name = self.extract_page_name()
|
||||
if not page_name:
|
||||
return
|
||||
|
||||
facebook_user = self.fetch_user(username)
|
||||
facebook_user = self.facebook_user()
|
||||
if not facebook_user:
|
||||
return
|
||||
|
||||
# If 'video', use video API to get embed:
|
||||
# f.get_object('tastyvegetarian', fields='posts')
|
||||
# f.get_object('1992797300790726', fields='embed_html')
|
||||
stories = self.page_feed(page, facebook_user)
|
||||
feed = self.fetch_page_feed(facebook_user, page_name, 'name,about,posts,videos,photos')
|
||||
|
||||
data = {}
|
||||
data['title'] = "%s on Facebook" % page
|
||||
data['link'] = "https://facebook.com/%s" % page
|
||||
data['description'] = "%s on Facebook" % page
|
||||
data['title'] = feed.get('name', "%s on Facebook" % page_name)
|
||||
data['link'] = feed.get('link', "https://facebook.com/%s" % page_name)
|
||||
data['description'] = feed.get('about', "%s on Facebook" % page_name)
|
||||
data['lastBuildDate'] = datetime.datetime.utcnow()
|
||||
data['generator'] = 'NewsBlur Facebook API Decrapifier - %s' % settings.NEWSBLUR_URL
|
||||
data['docs'] = None
|
||||
data['feed_url'] = address
|
||||
rss = feedgenerator.Atom1Feed(**data)
|
||||
merged_data = []
|
||||
|
||||
for story in stories:
|
||||
story_data = self.page_feed_story(story.__dict__)
|
||||
posts = feed.get('posts', {}).get('data', None)
|
||||
if posts:
|
||||
for post in posts:
|
||||
story_data = self.page_posts_story(facebook_user, post)
|
||||
if not story_data:
|
||||
continue
|
||||
merged_data.append(story_data)
|
||||
|
||||
videos = feed.get('videos', {}).get('data', None)
|
||||
for video in videos:
|
||||
story_data = self.page_video_story(facebook_user, video)
|
||||
if not story_data:
|
||||
continue
|
||||
for seen_data in merged_data:
|
||||
if story_data['link'] == seen_data['link']:
|
||||
if len(story_data['description']) > len(seen_data['description']):
|
||||
seen_data['description'] = story_data['description']
|
||||
seen_data['title'] = story_data['title']
|
||||
break
|
||||
|
||||
for story_data in merged_data:
|
||||
rss.add_item(**story_data)
|
||||
|
||||
return rss.writeString('utf-8')
|
||||
|
||||
def extract_page(self):
|
||||
def extract_page_name(self):
|
||||
page = None
|
||||
try:
|
||||
username_groups = re.search('twitter.com/(\w+)/?', self.address)
|
||||
if not username_groups:
|
||||
page_groups = re.search('facebook.com/(\w+)/?', self.address)
|
||||
if not page_groups:
|
||||
return
|
||||
username = username_groups.group(1)
|
||||
page = page_groups.group(1)
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
return username
|
||||
return page
|
||||
|
||||
def fetch_user(self, username):
|
||||
twitter_api = None
|
||||
def facebook_user(self):
|
||||
facebook_api = None
|
||||
social_services = None
|
||||
|
||||
if self.options.get('requesting_user_id', None):
|
||||
social_services = MSocialServices.get_user(self.options.get('requesting_user_id'))
|
||||
try:
|
||||
twitter_api = social_services.twitter_api()
|
||||
except tweepy.error.TweepError, e:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
facebook_api = social_services.facebook_api()
|
||||
if not facebook_api:
|
||||
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' %
|
||||
(self.feed.log_title[:30], self.address, self.options))
|
||||
return
|
||||
else:
|
||||
usersubs = UserSubscription.objects.filter(feed=self.feed)
|
||||
|
@ -79,210 +97,91 @@ class FacebookFetcher:
|
|||
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No subscriptions' %
|
||||
(self.feed.log_title[:30], self.address))
|
||||
return
|
||||
|
||||
for sub in usersubs:
|
||||
social_services = MSocialServices.get_user(sub.user_id)
|
||||
if not social_services.twitter_uid: continue
|
||||
try:
|
||||
twitter_api = social_services.twitter_api()
|
||||
if not twitter_api:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
except tweepy.error.TweepError, e:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
if not social_services.facebook_uid:
|
||||
continue
|
||||
|
||||
facebook_api = social_services.facebook_api()
|
||||
if not facebook_api:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
if not twitter_api:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No twitter API for %s' %
|
||||
(self.feed.log_title[:30], self.address, usersubs[0].user.username))
|
||||
return
|
||||
if not facebook_api:
|
||||
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' %
|
||||
(self.feed.log_title[:30], self.address, usersubs[0].user.username))
|
||||
return
|
||||
|
||||
try:
|
||||
twitter_user = twitter_api.get_user(username)
|
||||
except TypeError, e:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: %s" % (e))
|
||||
return
|
||||
except tweepy.error.TweepError, e:
|
||||
message = str(e).lower()
|
||||
if ((len(e.args) >= 2 and e.args[2] == 63) or
|
||||
('temporarily locked' in message)):
|
||||
# Suspended
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter failed, user suspended, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: User suspended")
|
||||
return
|
||||
elif 'suspended' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: User suspended")
|
||||
return
|
||||
elif 'expired token' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter user expired, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: Expired token")
|
||||
social_services.disconnect_twitter()
|
||||
return
|
||||
elif 'not found' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: User not found")
|
||||
return
|
||||
elif 'over capacity' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(460, "Twitter Error: Over capacity")
|
||||
return
|
||||
else:
|
||||
raise e
|
||||
|
||||
return twitter_user
|
||||
return facebook_api
|
||||
|
||||
def page_feed(self, facebook_user, page):
|
||||
try:
|
||||
stories = facebook_user.get_object(page, fields='feed')
|
||||
except Exception, e:
|
||||
message = str(e).lower()
|
||||
if 'not authorized' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter timeline failed, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: Not authorized")
|
||||
return []
|
||||
elif 'user not found' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
|
||||
(self.feed.log_title[:30], self.address, e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: User not found")
|
||||
return []
|
||||
elif 'blocked from viewing' in message:
|
||||
logging.debug(u' ***> [%-30s] ~FRTwitter user blocked, ignoring: %s' %
|
||||
(self.feed.log_title[:30], e))
|
||||
self.feed.save_feed_history(560, "Twitter Error: Blocked from viewing")
|
||||
return []
|
||||
else:
|
||||
raise e
|
||||
def fetch_page_feed(self, facebook_user, page, fields):
|
||||
stories = facebook_user.get_object(page, fields=fields)
|
||||
|
||||
if not stories:
|
||||
return []
|
||||
return stories
|
||||
|
||||
def page_feed_story(self, story):
|
||||
categories = set()
|
||||
|
||||
if user_tweet['full_text'].startswith('RT @'):
|
||||
categories.add('retweet')
|
||||
elif user_tweet['in_reply_to_status_id'] or user_tweet['full_text'].startswith('@'):
|
||||
categories.add('reply')
|
||||
else:
|
||||
categories.add('tweet')
|
||||
if user_tweet['full_text'].startswith('RT @'):
|
||||
categories.add('retweet')
|
||||
if user_tweet['favorite_count']:
|
||||
categories.add('liked')
|
||||
if user_tweet['retweet_count']:
|
||||
categories.add('retweeted')
|
||||
if 'http' in user_tweet['full_text']:
|
||||
categories.add('link')
|
||||
|
||||
story = {}
|
||||
content_tweet = user_tweet
|
||||
entities = ""
|
||||
author = user_tweet.get('author') or user_tweet.get('user')
|
||||
if not isinstance(author, dict): author = author.__dict__
|
||||
author_name = author['screen_name']
|
||||
original_author_name = author_name
|
||||
if user_tweet['in_reply_to_user_id'] == author['id']:
|
||||
categories.add('reply-to-self')
|
||||
retweet_author = ""
|
||||
if 'retweeted_status' in user_tweet:
|
||||
retweet_author = """Retweeted by
|
||||
<a href="https://twitter.com/%s"><img src="%s" style="height: 20px" /></a>
|
||||
<a href="https://twitter.com/%s">%s</a>
|
||||
on %s""" % (
|
||||
author_name,
|
||||
author['profile_image_url_https'],
|
||||
author_name,
|
||||
author_name,
|
||||
DateFormat(user_tweet['created_at']).format('l, F jS, Y g:ia').replace('.',''),
|
||||
)
|
||||
content_tweet = user_tweet['retweeted_status'].__dict__
|
||||
author = content_tweet['author']
|
||||
if not isinstance(author, dict): author = author.__dict__
|
||||
author_name = author['screen_name']
|
||||
|
||||
tweet_title = user_tweet['full_text']
|
||||
tweet_text = linebreaks(content_tweet['full_text'])
|
||||
replaced = {}
|
||||
entities_media = content_tweet['entities'].get('media', [])
|
||||
if 'extended_entities' in content_tweet:
|
||||
entities_media = content_tweet['extended_entities'].get('media', [])
|
||||
for media in entities_media:
|
||||
if 'media_url_https' not in media: continue
|
||||
if media['type'] == 'photo':
|
||||
if media.get('url') and media['url'] in tweet_text:
|
||||
tweet_title = tweet_title.replace(media['url'], media['display_url'])
|
||||
replacement = "<a href=\"%s\">%s</a>" % (media['expanded_url'], media['display_url'])
|
||||
if not replaced.get(media['url']):
|
||||
tweet_text = tweet_text.replace(media['url'], replacement)
|
||||
replaced[media['url']] = True
|
||||
entities += "<img src=\"%s\"> <hr>" % media['media_url_https']
|
||||
if 'photo' not in categories:
|
||||
categories.add('photo')
|
||||
|
||||
for url in content_tweet['entities'].get('urls', []):
|
||||
if url['url'] in tweet_text:
|
||||
replacement = "<a href=\"%s\">%s</a>" % (url['expanded_url'], url['display_url'])
|
||||
if not replaced.get(url['url']):
|
||||
tweet_text = tweet_text.replace(url['url'], replacement)
|
||||
replaced[url['url']] = True
|
||||
tweet_title = tweet_title.replace(url['url'], url['display_url'])
|
||||
|
||||
quote_tweet_content = ""
|
||||
if 'quoted_status' in content_tweet:
|
||||
quote_tweet_content = "<blockquote>"+self.tweet_story(content_tweet['quoted_status'])['description']+"</blockquote>"
|
||||
|
||||
|
||||
created_date = content_tweet['created_at']
|
||||
return stories
|
||||
|
||||
def page_posts_story(self, facebook_user, page_story):
|
||||
categories = set()
|
||||
if 'message' not in page_story:
|
||||
# Probably a story shared on the page's timeline, not a published story
|
||||
return
|
||||
message = linebreaks(page_story['message'])
|
||||
created_date = page_story['created_time']
|
||||
if isinstance(created_date, unicode):
|
||||
created_date = dateutil.parser.parse(created_date)
|
||||
|
||||
content = """<div class="NB-twitter-rss">
|
||||
<div class="NB-twitter-rss-tweet">%s</div>
|
||||
<div class="NB-twitter-rss-quote-tweet">%s</div>
|
||||
<hr />
|
||||
<div class="NB-twitter-rss-entities">%s</div>
|
||||
<div class="NB-twitter-rss-author">
|
||||
Posted by
|
||||
<a href="https://twitter.com/%s"><img src="%s" style="height: 32px" /></a>
|
||||
<a href="https://twitter.com/%s">%s</a>
|
||||
on %s</div>
|
||||
<div class="NB-twitter-rss-retweet">%s</div>
|
||||
<div class="NB-twitter-rss-stats">%s %s%s %s</div>
|
||||
permalink = facebook_user.get_object(page_story['id'], fields='permalink_url')['permalink_url']
|
||||
content = """<div class="NB-facebook-rss">
|
||||
<div class="NB-facebook-rss-message">%s</div>
|
||||
</div>""" % (
|
||||
tweet_text,
|
||||
quote_tweet_content,
|
||||
entities,
|
||||
author_name,
|
||||
author['profile_image_url_https'],
|
||||
author_name,
|
||||
author_name,
|
||||
DateFormat(created_date).format('l, F jS, Y g:ia').replace('.',''),
|
||||
retweet_author,
|
||||
("<br /><br />" if content_tweet['favorite_count'] or content_tweet['retweet_count'] else ""),
|
||||
("<b>%s</b> %s" % (content_tweet['favorite_count'], "like" if content_tweet['favorite_count'] == 1 else "likes")) if content_tweet['favorite_count'] else "",
|
||||
(", " if content_tweet['favorite_count'] and content_tweet['retweet_count'] else ""),
|
||||
("<b>%s</b> %s" % (content_tweet['retweet_count'], "retweet" if content_tweet['retweet_count'] == 1 else "retweets")) if content_tweet['retweet_count'] else "",
|
||||
message
|
||||
)
|
||||
|
||||
story = {
|
||||
'title': tweet_title,
|
||||
'link': "https://twitter.com/%s/status/%s" % (original_author_name, user_tweet['id']),
|
||||
'title': message,
|
||||
'link': permalink,
|
||||
'description': content,
|
||||
'author_name': author_name,
|
||||
'categories': list(categories),
|
||||
'unique_id': "tweet:%s" % user_tweet['id'],
|
||||
'pubdate': user_tweet['created_at'],
|
||||
'unique_id': "fb_post:%s" % page_story['id'],
|
||||
'pubdate': created_date,
|
||||
}
|
||||
|
||||
return story
|
||||
|
||||
def page_video_story(self, facebook_user, page_story):
|
||||
categories = set()
|
||||
if 'description' not in page_story:
|
||||
return
|
||||
message = linebreaks(page_story['description'])
|
||||
created_date = page_story['updated_time']
|
||||
if isinstance(created_date, unicode):
|
||||
created_date = dateutil.parser.parse(created_date)
|
||||
permalink = facebook_user.get_object(page_story['id'], fields='permalink_url')['permalink_url']
|
||||
embed_html = facebook_user.get_object(page_story['id'], fields='embed_html')
|
||||
|
||||
if permalink.startswith('/'):
|
||||
permalink = "https://www.facebook.com%s" % permalink
|
||||
|
||||
content = """<div class="NB-facebook-rss">
|
||||
<div class="NB-facebook-rss-message">%s</div>
|
||||
<div class="NB-facebook-rss-embed">%s</div>
|
||||
</div>""" % (
|
||||
message,
|
||||
embed_html.get('embed_html', '')
|
||||
)
|
||||
|
||||
story = {
|
||||
'title': page_story.get('story', message),
|
||||
'link': permalink,
|
||||
'description': content,
|
||||
'categories': list(categories),
|
||||
'unique_id': "fb_post:%s" % page_story['id'],
|
||||
'pubdate': created_date,
|
||||
}
|
||||
|
||||
return story
|
||||
|
||||
|
|
@ -34,6 +34,7 @@ from django.utils.encoding import smart_unicode
|
|||
from utils import json_functions as json
|
||||
from celery.exceptions import SoftTimeLimitExceeded
|
||||
from utils.twitter_fetcher import TwitterFetcher
|
||||
from utils.facebook_fetcher import FacebookFetcher
|
||||
from utils.json_fetcher import JSONFetcher
|
||||
# from utils.feed_functions import mail_feed_error_to_admin
|
||||
|
||||
|
@ -106,6 +107,13 @@ class FetchFeed:
|
|||
(self.feed.log_title[:30], address))
|
||||
return FEED_ERRHTTP, None
|
||||
self.fpf = feedparser.parse(twitter_feed)
|
||||
elif re.match(r'(https?)?://(www\.)?facebook.com/\w+/?$', qurl(address, remove=['_'])):
|
||||
facebook_feed = self.fetch_facebook(address)
|
||||
if not facebook_feed:
|
||||
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s' %
|
||||
(self.feed.log_title[:30], address))
|
||||
return FEED_ERRHTTP, None
|
||||
self.fpf = feedparser.parse(facebook_feed)
|
||||
|
||||
if not self.fpf:
|
||||
try:
|
||||
|
@ -186,6 +194,10 @@ class FetchFeed:
|
|||
twitter_fetcher = TwitterFetcher(self.feed, self.options)
|
||||
return twitter_fetcher.fetch(address)
|
||||
|
||||
def fetch_facebook(self, address=None):
|
||||
facebook_fetcher = FacebookFetcher(self.feed, self.options)
|
||||
return facebook_fetcher.fetch(address)
|
||||
|
||||
def fetch_json_feed(self, address, headers):
|
||||
json_fetcher = JSONFetcher(self.feed, self.options)
|
||||
return json_fetcher.fetch(address, headers)
|
||||
|
|
|
@ -85,7 +85,10 @@ def find_feeds(url, check_all=False, user_agent=None):
|
|||
|
||||
# Look for <link> tags.
|
||||
logging.info("Looking for <link> tags.")
|
||||
tree = BeautifulSoup(feed_text)
|
||||
try:
|
||||
tree = BeautifulSoup(feed_text)
|
||||
except ValueError:
|
||||
return []
|
||||
links = []
|
||||
for link in tree.findAll("link"):
|
||||
if link.get("type") in ["application/rss+xml",
|
||||
|
|
Loading…
Add table
Reference in a new issue