Adding videos to facebook posts. Seems to work pretty well.

This commit is contained in:
Samuel Clay 2018-03-26 17:31:12 -07:00
parent fdce59bc2c
commit da4741a3fa
5 changed files with 128 additions and 212 deletions

View file

@ -410,6 +410,8 @@ class Feed(models.Model):
return cls.objects.get(feed_address=url)
if url and re.match('(https?://)?twitter.com/\w+/?$', url):
without_rss = True
if url and re.match(r'(https?://)?(www\.)?facebook.com/\w+/?$', url):
without_rss = True
if url and 'youtube.com/user/' in url:
username = re.search('youtube.com/user/(\w+)', url).group(1)
url = "http://gdata.youtube.com/feeds/base/users/%s/uploads" % username

View file

@ -2581,10 +2581,10 @@ class MSocialServices(mongo.Document):
self.syncing_facebook = False
self.save()
facebook_user = graph.request('me', args={'fields':'website,bio,location'})
facebook_user = graph.request('me', args={'fields':'website,about,location'})
profile = MSocialProfile.get_user(self.user_id)
profile.location = profile.location or (facebook_user.get('location') and facebook_user['location']['name'])
profile.bio = profile.bio or facebook_user.get('bio')
profile.bio = profile.bio or facebook_user.get('about')
if not profile.website and facebook_user.get('website'):
profile.website = facebook_user.get('website').split()[0]
profile.save()

View file

@ -1,11 +1,9 @@
import re
import datetime
import tweepy
import dateutil.parser
from django.conf import settings
from django.utils import feedgenerator
from django.utils.html import linebreaks
from django.utils.dateformat import DateFormat
from apps.social.models import MSocialServices
from apps.reader.models import UserSubscription
from utils import log as logging
@ -21,57 +19,77 @@ class FacebookFetcher:
address = self.feed.feed_address
self.address = address
page = self.extract_page()
if not page:
page_name = self.extract_page_name()
if not page_name:
return
facebook_user = self.fetch_user(username)
facebook_user = self.facebook_user()
if not facebook_user:
return
# If 'video', use video API to get embed:
# f.get_object('tastyvegetarian', fields='posts')
# f.get_object('1992797300790726', fields='embed_html')
stories = self.page_feed(page, facebook_user)
feed = self.fetch_page_feed(facebook_user, page_name, 'name,about,posts,videos,photos')
data = {}
data['title'] = "%s on Facebook" % page
data['link'] = "https://facebook.com/%s" % page
data['description'] = "%s on Facebook" % page
data['title'] = feed.get('name', "%s on Facebook" % page_name)
data['link'] = feed.get('link', "https://facebook.com/%s" % page_name)
data['description'] = feed.get('about', "%s on Facebook" % page_name)
data['lastBuildDate'] = datetime.datetime.utcnow()
data['generator'] = 'NewsBlur Facebook API Decrapifier - %s' % settings.NEWSBLUR_URL
data['docs'] = None
data['feed_url'] = address
rss = feedgenerator.Atom1Feed(**data)
merged_data = []
for story in stories:
story_data = self.page_feed_story(story.__dict__)
posts = feed.get('posts', {}).get('data', None)
if posts:
for post in posts:
story_data = self.page_posts_story(facebook_user, post)
if not story_data:
continue
merged_data.append(story_data)
videos = feed.get('videos', {}).get('data', None)
for video in videos:
story_data = self.page_video_story(facebook_user, video)
if not story_data:
continue
for seen_data in merged_data:
if story_data['link'] == seen_data['link']:
if len(story_data['description']) > len(seen_data['description']):
seen_data['description'] = story_data['description']
seen_data['title'] = story_data['title']
break
for story_data in merged_data:
rss.add_item(**story_data)
return rss.writeString('utf-8')
def extract_page(self):
def extract_page_name(self):
page = None
try:
username_groups = re.search('twitter.com/(\w+)/?', self.address)
if not username_groups:
page_groups = re.search('facebook.com/(\w+)/?', self.address)
if not page_groups:
return
username = username_groups.group(1)
page = page_groups.group(1)
except IndexError:
return
return username
return page
def fetch_user(self, username):
twitter_api = None
def facebook_user(self):
facebook_api = None
social_services = None
if self.options.get('requesting_user_id', None):
social_services = MSocialServices.get_user(self.options.get('requesting_user_id'))
try:
twitter_api = social_services.twitter_api()
except tweepy.error.TweepError, e:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
(self.feed.log_title[:30], self.address, e))
facebook_api = social_services.facebook_api()
if not facebook_api:
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' %
(self.feed.log_title[:30], self.address, self.options))
return
else:
usersubs = UserSubscription.objects.filter(feed=self.feed)
@ -79,210 +97,91 @@ class FacebookFetcher:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No subscriptions' %
(self.feed.log_title[:30], self.address))
return
for sub in usersubs:
social_services = MSocialServices.get_user(sub.user_id)
if not social_services.twitter_uid: continue
try:
twitter_api = social_services.twitter_api()
if not twitter_api:
continue
else:
break
except tweepy.error.TweepError, e:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %s' %
(self.feed.log_title[:30], self.address, e))
if not social_services.facebook_uid:
continue
facebook_api = social_services.facebook_api()
if not facebook_api:
continue
else:
break
if not twitter_api:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: No twitter API for %s' %
(self.feed.log_title[:30], self.address, usersubs[0].user.username))
return
if not facebook_api:
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s: No facebook API for %s' %
(self.feed.log_title[:30], self.address, usersubs[0].user.username))
return
try:
twitter_user = twitter_api.get_user(username)
except TypeError, e:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: %s" % (e))
return
except tweepy.error.TweepError, e:
message = str(e).lower()
if ((len(e.args) >= 2 and e.args[2] == 63) or
('temporarily locked' in message)):
# Suspended
logging.debug(u' ***> [%-30s] ~FRTwitter failed, user suspended, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: User suspended")
return
elif 'suspended' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user suspended, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: User suspended")
return
elif 'expired token' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user expired, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: Expired token")
social_services.disconnect_twitter()
return
elif 'not found' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: User not found")
return
elif 'over capacity' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter over capacity, ignoring... %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(460, "Twitter Error: Over capacity")
return
else:
raise e
return twitter_user
return facebook_api
def page_feed(self, facebook_user, page):
try:
stories = facebook_user.get_object(page, fields='feed')
except Exception, e:
message = str(e).lower()
if 'not authorized' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter timeline failed, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: Not authorized")
return []
elif 'user not found' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user not found, disconnecting twitter: %s: %s' %
(self.feed.log_title[:30], self.address, e))
self.feed.save_feed_history(560, "Twitter Error: User not found")
return []
elif 'blocked from viewing' in message:
logging.debug(u' ***> [%-30s] ~FRTwitter user blocked, ignoring: %s' %
(self.feed.log_title[:30], e))
self.feed.save_feed_history(560, "Twitter Error: Blocked from viewing")
return []
else:
raise e
def fetch_page_feed(self, facebook_user, page, fields):
stories = facebook_user.get_object(page, fields=fields)
if not stories:
return []
return stories
def page_feed_story(self, story):
categories = set()
if user_tweet['full_text'].startswith('RT @'):
categories.add('retweet')
elif user_tweet['in_reply_to_status_id'] or user_tweet['full_text'].startswith('@'):
categories.add('reply')
else:
categories.add('tweet')
if user_tweet['full_text'].startswith('RT @'):
categories.add('retweet')
if user_tweet['favorite_count']:
categories.add('liked')
if user_tweet['retweet_count']:
categories.add('retweeted')
if 'http' in user_tweet['full_text']:
categories.add('link')
story = {}
content_tweet = user_tweet
entities = ""
author = user_tweet.get('author') or user_tweet.get('user')
if not isinstance(author, dict): author = author.__dict__
author_name = author['screen_name']
original_author_name = author_name
if user_tweet['in_reply_to_user_id'] == author['id']:
categories.add('reply-to-self')
retweet_author = ""
if 'retweeted_status' in user_tweet:
retweet_author = """Retweeted by
<a href="https://twitter.com/%s"><img src="%s" style="height: 20px" /></a>
<a href="https://twitter.com/%s">%s</a>
on %s""" % (
author_name,
author['profile_image_url_https'],
author_name,
author_name,
DateFormat(user_tweet['created_at']).format('l, F jS, Y g:ia').replace('.',''),
)
content_tweet = user_tweet['retweeted_status'].__dict__
author = content_tweet['author']
if not isinstance(author, dict): author = author.__dict__
author_name = author['screen_name']
tweet_title = user_tweet['full_text']
tweet_text = linebreaks(content_tweet['full_text'])
replaced = {}
entities_media = content_tweet['entities'].get('media', [])
if 'extended_entities' in content_tweet:
entities_media = content_tweet['extended_entities'].get('media', [])
for media in entities_media:
if 'media_url_https' not in media: continue
if media['type'] == 'photo':
if media.get('url') and media['url'] in tweet_text:
tweet_title = tweet_title.replace(media['url'], media['display_url'])
replacement = "<a href=\"%s\">%s</a>" % (media['expanded_url'], media['display_url'])
if not replaced.get(media['url']):
tweet_text = tweet_text.replace(media['url'], replacement)
replaced[media['url']] = True
entities += "<img src=\"%s\"> <hr>" % media['media_url_https']
if 'photo' not in categories:
categories.add('photo')
for url in content_tweet['entities'].get('urls', []):
if url['url'] in tweet_text:
replacement = "<a href=\"%s\">%s</a>" % (url['expanded_url'], url['display_url'])
if not replaced.get(url['url']):
tweet_text = tweet_text.replace(url['url'], replacement)
replaced[url['url']] = True
tweet_title = tweet_title.replace(url['url'], url['display_url'])
quote_tweet_content = ""
if 'quoted_status' in content_tweet:
quote_tweet_content = "<blockquote>"+self.tweet_story(content_tweet['quoted_status'])['description']+"</blockquote>"
created_date = content_tweet['created_at']
return stories
def page_posts_story(self, facebook_user, page_story):
categories = set()
if 'message' not in page_story:
# Probably a story shared on the page's timeline, not a published story
return
message = linebreaks(page_story['message'])
created_date = page_story['created_time']
if isinstance(created_date, unicode):
created_date = dateutil.parser.parse(created_date)
content = """<div class="NB-twitter-rss">
<div class="NB-twitter-rss-tweet">%s</div>
<div class="NB-twitter-rss-quote-tweet">%s</div>
<hr />
<div class="NB-twitter-rss-entities">%s</div>
<div class="NB-twitter-rss-author">
Posted by
<a href="https://twitter.com/%s"><img src="%s" style="height: 32px" /></a>
<a href="https://twitter.com/%s">%s</a>
on %s</div>
<div class="NB-twitter-rss-retweet">%s</div>
<div class="NB-twitter-rss-stats">%s %s%s %s</div>
permalink = facebook_user.get_object(page_story['id'], fields='permalink_url')['permalink_url']
content = """<div class="NB-facebook-rss">
<div class="NB-facebook-rss-message">%s</div>
</div>""" % (
tweet_text,
quote_tweet_content,
entities,
author_name,
author['profile_image_url_https'],
author_name,
author_name,
DateFormat(created_date).format('l, F jS, Y g:ia').replace('.',''),
retweet_author,
("<br /><br />" if content_tweet['favorite_count'] or content_tweet['retweet_count'] else ""),
("<b>%s</b> %s" % (content_tweet['favorite_count'], "like" if content_tweet['favorite_count'] == 1 else "likes")) if content_tweet['favorite_count'] else "",
(", " if content_tweet['favorite_count'] and content_tweet['retweet_count'] else ""),
("<b>%s</b> %s" % (content_tweet['retweet_count'], "retweet" if content_tweet['retweet_count'] == 1 else "retweets")) if content_tweet['retweet_count'] else "",
message
)
story = {
'title': tweet_title,
'link': "https://twitter.com/%s/status/%s" % (original_author_name, user_tweet['id']),
'title': message,
'link': permalink,
'description': content,
'author_name': author_name,
'categories': list(categories),
'unique_id': "tweet:%s" % user_tweet['id'],
'pubdate': user_tweet['created_at'],
'unique_id': "fb_post:%s" % page_story['id'],
'pubdate': created_date,
}
return story
def page_video_story(self, facebook_user, page_story):
categories = set()
if 'description' not in page_story:
return
message = linebreaks(page_story['description'])
created_date = page_story['updated_time']
if isinstance(created_date, unicode):
created_date = dateutil.parser.parse(created_date)
permalink = facebook_user.get_object(page_story['id'], fields='permalink_url')['permalink_url']
embed_html = facebook_user.get_object(page_story['id'], fields='embed_html')
if permalink.startswith('/'):
permalink = "https://www.facebook.com%s" % permalink
content = """<div class="NB-facebook-rss">
<div class="NB-facebook-rss-message">%s</div>
<div class="NB-facebook-rss-embed">%s</div>
</div>""" % (
message,
embed_html.get('embed_html', '')
)
story = {
'title': page_story.get('story', message),
'link': permalink,
'description': content,
'categories': list(categories),
'unique_id': "fb_post:%s" % page_story['id'],
'pubdate': created_date,
}
return story

View file

@ -34,6 +34,7 @@ from django.utils.encoding import smart_unicode
from utils import json_functions as json
from celery.exceptions import SoftTimeLimitExceeded
from utils.twitter_fetcher import TwitterFetcher
from utils.facebook_fetcher import FacebookFetcher
from utils.json_fetcher import JSONFetcher
# from utils.feed_functions import mail_feed_error_to_admin
@ -106,6 +107,13 @@ class FetchFeed:
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(twitter_feed)
elif re.match(r'(https?)?://(www\.)?facebook.com/\w+/?$', qurl(address, remove=['_'])):
facebook_feed = self.fetch_facebook(address)
if not facebook_feed:
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s' %
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(facebook_feed)
if not self.fpf:
try:
@ -186,6 +194,10 @@ class FetchFeed:
twitter_fetcher = TwitterFetcher(self.feed, self.options)
return twitter_fetcher.fetch(address)
def fetch_facebook(self, address=None):
facebook_fetcher = FacebookFetcher(self.feed, self.options)
return facebook_fetcher.fetch(address)
def fetch_json_feed(self, address, headers):
json_fetcher = JSONFetcher(self.feed, self.options)
return json_fetcher.fetch(address, headers)

View file

@ -85,7 +85,10 @@ def find_feeds(url, check_all=False, user_agent=None):
# Look for <link> tags.
logging.info("Looking for <link> tags.")
tree = BeautifulSoup(feed_text)
try:
tree = BeautifulSoup(feed_text)
except ValueError:
return []
links = []
for link in tree.findAll("link"):
if link.get("type") in ["application/rss+xml",