2011-08-29 18:43:17 -07:00
|
|
|
import datetime
|
|
|
|
import oauth2 as oauth
|
2012-07-20 19:43:28 -07:00
|
|
|
import mongoengine as mongo
|
2010-06-29 20:16:09 -04:00
|
|
|
from collections import defaultdict
|
2011-08-29 18:43:17 -07:00
|
|
|
from StringIO import StringIO
|
|
|
|
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
|
|
|
|
from lxml import etree
|
2010-06-29 20:16:09 -04:00
|
|
|
from django.db import models
|
|
|
|
from django.contrib.auth.models import User
|
2011-08-29 18:43:17 -07:00
|
|
|
from django.conf import settings
|
|
|
|
from mongoengine.queryset import OperationError
|
|
|
|
import vendor.opml as opml
|
|
|
|
from apps.rss_feeds.models import Feed, DuplicateFeed, MStarredStory
|
2010-06-29 20:16:09 -04:00
|
|
|
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
2010-10-23 13:06:28 -04:00
|
|
|
from utils import json_functions as json, urlnorm
|
2010-08-16 15:45:35 -04:00
|
|
|
from utils import log as logging
|
2012-07-20 19:43:28 -07:00
|
|
|
from utils.feed_functions import timelimit
|
2011-03-10 20:27:40 -05:00
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
class OAuthToken(models.Model):
|
2010-07-05 17:11:04 -04:00
|
|
|
user = models.OneToOneField(User, null=True, blank=True)
|
|
|
|
session_id = models.CharField(max_length=50, null=True, blank=True)
|
2011-04-01 17:25:28 -04:00
|
|
|
uuid = models.CharField(max_length=50, null=True, blank=True)
|
2010-07-17 11:50:16 -04:00
|
|
|
remote_ip = models.CharField(max_length=50, null=True, blank=True)
|
2010-06-29 20:16:09 -04:00
|
|
|
request_token = models.CharField(max_length=50)
|
|
|
|
request_token_secret = models.CharField(max_length=50)
|
|
|
|
access_token = models.CharField(max_length=50)
|
|
|
|
access_token_secret = models.CharField(max_length=50)
|
2010-07-17 11:50:16 -04:00
|
|
|
created_date = models.DateTimeField(default=datetime.datetime.now)
|
2010-06-29 20:16:09 -04:00
|
|
|
|
2011-03-11 11:22:28 -05:00
|
|
|
|
2011-03-10 20:27:40 -05:00
|
|
|
class OPMLExporter:
|
|
|
|
|
|
|
|
def __init__(self, user):
|
|
|
|
self.user = user
|
|
|
|
self.fetch_feeds()
|
|
|
|
|
|
|
|
def process(self):
|
|
|
|
now = str(datetime.datetime.now())
|
|
|
|
|
|
|
|
root = Element('opml')
|
|
|
|
root.set('version', '1.1')
|
|
|
|
root.append(Comment('Generated by NewsBlur - www.newsblur.com'))
|
|
|
|
|
|
|
|
head = SubElement(root, 'head')
|
|
|
|
title = SubElement(head, 'title')
|
|
|
|
title.text = 'NewsBlur Feeds'
|
|
|
|
dc = SubElement(head, 'dateCreated')
|
|
|
|
dc.text = now
|
|
|
|
dm = SubElement(head, 'dateModified')
|
|
|
|
dm.text = now
|
|
|
|
folders = self.get_folders()
|
|
|
|
body = SubElement(root, 'body')
|
|
|
|
self.process_outline(body, folders)
|
|
|
|
return tostring(root)
|
|
|
|
|
|
|
|
def process_outline(self, body, folders):
|
|
|
|
for obj in folders:
|
2011-03-12 17:54:42 -05:00
|
|
|
if isinstance(obj, int) and obj in self.feeds:
|
2011-03-10 20:27:40 -05:00
|
|
|
feed = self.feeds[obj]
|
|
|
|
feed_attrs = self.make_feed_row(feed)
|
|
|
|
body.append(Element('outline', feed_attrs))
|
|
|
|
elif isinstance(obj, dict):
|
|
|
|
for folder_title, folder_objs in obj.items():
|
|
|
|
folder_element = Element('outline', {'text': folder_title, 'title': folder_title})
|
|
|
|
body.append(self.process_outline(folder_element, folder_objs))
|
|
|
|
return body
|
|
|
|
|
|
|
|
def make_feed_row(self, feed):
|
|
|
|
feed_attrs = {
|
|
|
|
'text': feed['feed_title'],
|
|
|
|
'title': feed['feed_title'],
|
|
|
|
'type': 'rss',
|
|
|
|
'version': 'RSS',
|
|
|
|
'htmlUrl': feed['feed_link'],
|
|
|
|
'xmlUrl': feed['feed_address'],
|
|
|
|
}
|
|
|
|
return feed_attrs
|
|
|
|
|
|
|
|
def get_folders(self):
|
|
|
|
folders = UserSubscriptionFolders.objects.get(user=self.user)
|
|
|
|
return json.decode(folders.folders)
|
|
|
|
|
|
|
|
def fetch_feeds(self):
|
|
|
|
subs = UserSubscription.objects.filter(user=self.user)
|
2012-01-26 09:32:24 -08:00
|
|
|
self.feeds = dict((sub.feed_id, sub.canonical()) for sub in subs)
|
2011-03-10 20:27:40 -05:00
|
|
|
|
2011-03-11 11:22:28 -05:00
|
|
|
|
|
|
|
class Importer:
|
|
|
|
|
|
|
|
def clear_feeds(self):
|
|
|
|
UserSubscriptionFolders.objects.filter(user=self.user).delete()
|
|
|
|
UserSubscription.objects.filter(user=self.user).delete()
|
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
|
|
|
|
class OPMLImporter(Importer):
|
|
|
|
|
|
|
|
def __init__(self, opml_xml, user):
|
|
|
|
self.user = user
|
|
|
|
self.opml_xml = opml_xml
|
2012-07-20 19:43:28 -07:00
|
|
|
|
|
|
|
def try_processing(self):
|
|
|
|
folders = timelimit(20)(self.process)()
|
|
|
|
return folders
|
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
def process(self):
|
|
|
|
self.clear_feeds()
|
2012-07-20 19:43:28 -07:00
|
|
|
outline = opml.from_string(self.opml_xml)
|
2010-06-29 20:16:09 -04:00
|
|
|
folders = self.process_outline(outline)
|
|
|
|
UserSubscriptionFolders.objects.create(user=self.user, folders=json.encode(folders))
|
2012-07-20 19:43:28 -07:00
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
return folders
|
|
|
|
|
|
|
|
def process_outline(self, outline):
|
|
|
|
folders = []
|
|
|
|
for item in outline:
|
2012-05-14 15:45:25 -07:00
|
|
|
if (not hasattr(item, 'xmlUrl') and
|
|
|
|
(hasattr(item, 'text') or hasattr(item, 'title'))):
|
2010-06-29 20:16:09 -04:00
|
|
|
folder = item
|
2012-05-14 15:45:25 -07:00
|
|
|
title = getattr(item, 'text', None) or getattr(item, 'title', None)
|
2010-12-13 18:00:59 -05:00
|
|
|
# if hasattr(folder, 'text'):
|
|
|
|
# logging.info(' ---> [%s] ~FRNew Folder: %s' % (self.user, folder.text))
|
2012-05-14 15:45:25 -07:00
|
|
|
folders.append({title: self.process_outline(folder)})
|
2010-06-29 20:16:09 -04:00
|
|
|
elif hasattr(item, 'xmlUrl'):
|
|
|
|
feed = item
|
|
|
|
if not hasattr(feed, 'htmlUrl'):
|
|
|
|
setattr(feed, 'htmlUrl', None)
|
2012-04-01 20:04:03 -07:00
|
|
|
# If feed title matches what's in the DB, don't override it on subscription.
|
|
|
|
feed_title = getattr(feed, 'title', None) or getattr(feed, 'text', None)
|
|
|
|
if not feed_title:
|
2010-12-13 18:00:59 -05:00
|
|
|
setattr(feed, 'title', feed.htmlUrl or feed.xmlUrl)
|
2012-04-01 20:04:03 -07:00
|
|
|
user_feed_title = None
|
|
|
|
else:
|
|
|
|
setattr(feed, 'title', feed_title)
|
|
|
|
user_feed_title = feed.title
|
|
|
|
|
2010-07-27 22:11:23 -04:00
|
|
|
feed_address = urlnorm.normalize(feed.xmlUrl)
|
|
|
|
feed_link = urlnorm.normalize(feed.htmlUrl)
|
2010-08-27 18:35:33 -04:00
|
|
|
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
|
|
|
|
continue
|
2010-09-06 11:23:44 -07:00
|
|
|
if feed_link and len(feed_link) > Feed._meta.get_field('feed_link').max_length:
|
2010-08-27 18:35:33 -04:00
|
|
|
continue
|
2010-12-13 18:00:59 -05:00
|
|
|
# logging.info(' ---> \t~FR%s - %s - %s' % (feed.title, feed_link, feed_address,))
|
2010-07-27 22:11:23 -04:00
|
|
|
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
|
2010-06-29 20:16:09 -04:00
|
|
|
# feeds.append(feed_data)
|
2010-08-19 10:43:07 -04:00
|
|
|
|
|
|
|
# See if it exists as a duplicate first
|
|
|
|
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
|
|
|
if duplicate_feed:
|
|
|
|
feed_db = duplicate_feed[0].feed
|
|
|
|
else:
|
2010-09-23 16:33:06 -04:00
|
|
|
feed_data['active_subscribers'] = 1
|
|
|
|
feed_data['num_subscribers'] = 1
|
2012-04-01 20:48:07 -07:00
|
|
|
feed_db, _ = Feed.find_or_create(feed_address=feed_address,
|
|
|
|
feed_link=feed_link,
|
2012-03-12 18:11:13 -07:00
|
|
|
defaults=dict(**feed_data))
|
2012-04-01 20:04:03 -07:00
|
|
|
|
|
|
|
if user_feed_title == feed_db.feed_title:
|
|
|
|
user_feed_title = None
|
2012-05-14 15:01:30 -07:00
|
|
|
|
2010-12-13 23:48:21 -05:00
|
|
|
us, _ = UserSubscription.objects.get_or_create(
|
2010-06-29 20:16:09 -04:00
|
|
|
feed=feed_db,
|
|
|
|
user=self.user,
|
|
|
|
defaults={
|
|
|
|
'needs_unread_recalc': True,
|
2010-10-29 17:17:36 -04:00
|
|
|
'mark_read_date': datetime.datetime.utcnow() - datetime.timedelta(days=1),
|
|
|
|
'active': self.user.profile.is_premium,
|
2012-04-01 20:04:03 -07:00
|
|
|
'user_title': user_feed_title
|
2010-06-29 20:16:09 -04:00
|
|
|
}
|
|
|
|
)
|
2010-12-13 23:48:21 -05:00
|
|
|
if self.user.profile.is_premium and not us.active:
|
2010-12-13 18:04:34 -05:00
|
|
|
us.active = True
|
|
|
|
us.save()
|
2012-03-12 12:33:54 -07:00
|
|
|
if not us.needs_unread_recalc:
|
|
|
|
us.needs_unread_recalc = True
|
|
|
|
us.save()
|
2010-06-29 20:16:09 -04:00
|
|
|
folders.append(feed_db.pk)
|
|
|
|
return folders
|
2012-07-20 19:43:28 -07:00
|
|
|
|
|
|
|
def count_feeds_in_opml(self):
|
2010-06-29 20:16:09 -04:00
|
|
|
|
2012-07-20 19:43:28 -07:00
|
|
|
opml_count = len(opml.from_string(self.opml_xml))
|
|
|
|
sub_count = UserSubscription.objects.filter(user=self.user).count()
|
|
|
|
return opml_count + sub_count
|
|
|
|
|
|
|
|
|
|
|
|
class UploadedOPML(mongo.Document):
|
|
|
|
user_id = mongo.IntField()
|
|
|
|
opml_file = mongo.StringField()
|
|
|
|
upload_date = mongo.DateTimeField(default=datetime.datetime.now)
|
|
|
|
|
|
|
|
def __unicode__(self):
|
|
|
|
user = User.objects.get(pk=self.user_id)
|
|
|
|
return "%s: %s characters" % (user.username, len(self.opml_file))
|
|
|
|
|
|
|
|
meta = {
|
|
|
|
'collection': 'uploaded_opml',
|
|
|
|
'allow_inheritance': False,
|
|
|
|
'order': '-upload_date',
|
|
|
|
'indexes': ['user_id', '-upload_date'],
|
|
|
|
}
|
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
|
|
|
|
class GoogleReaderImporter(Importer):
|
|
|
|
|
2011-08-31 09:41:34 -07:00
|
|
|
def __init__(self, user, xml=None):
|
2010-06-29 20:16:09 -04:00
|
|
|
self.user = user
|
|
|
|
self.subscription_folders = []
|
2011-08-29 18:43:17 -07:00
|
|
|
self.scope = "http://www.google.com/reader/api"
|
2011-08-31 09:41:34 -07:00
|
|
|
self.xml = xml
|
2011-08-29 18:43:17 -07:00
|
|
|
|
|
|
|
def import_feeds(self):
|
|
|
|
sub_url = "%s/0/subscription/list" % self.scope
|
2011-08-31 09:41:34 -07:00
|
|
|
if not self.xml:
|
|
|
|
feeds_xml = self.send_request(sub_url)
|
|
|
|
else:
|
|
|
|
feeds_xml = self.xml
|
2011-08-29 18:43:17 -07:00
|
|
|
self.process_feeds(feeds_xml)
|
2010-06-29 20:16:09 -04:00
|
|
|
|
2011-08-29 18:43:17 -07:00
|
|
|
def send_request(self, url):
|
|
|
|
user_tokens = OAuthToken.objects.filter(user=self.user)
|
|
|
|
|
|
|
|
if user_tokens.count():
|
|
|
|
user_token = user_tokens[0]
|
|
|
|
consumer = oauth.Consumer(settings.OAUTH_KEY, settings.OAUTH_SECRET)
|
|
|
|
token = oauth.Token(user_token.access_token, user_token.access_token_secret)
|
|
|
|
client = oauth.Client(consumer, token)
|
|
|
|
_, content = client.request(url, 'GET')
|
|
|
|
return content
|
|
|
|
|
|
|
|
def process_feeds(self, feeds_xml):
|
2010-06-29 20:16:09 -04:00
|
|
|
self.clear_feeds()
|
2011-08-29 18:43:17 -07:00
|
|
|
self.feeds = self.parse(feeds_xml)
|
2010-06-29 20:16:09 -04:00
|
|
|
|
|
|
|
folders = defaultdict(list)
|
|
|
|
for item in self.feeds:
|
|
|
|
folders = self.process_item(item, folders)
|
2011-07-12 10:07:57 -07:00
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
self.rearrange_folders(folders)
|
2011-02-23 13:46:47 -05:00
|
|
|
logging.user(self.user, "~BB~FW~SBGoogle Reader import: ~BT~FW%s" % (self.subscription_folders))
|
2010-11-08 18:26:36 -05:00
|
|
|
UserSubscriptionFolders.objects.get_or_create(user=self.user, defaults=dict(
|
|
|
|
folders=json.encode(self.subscription_folders)))
|
|
|
|
|
2011-08-29 18:43:17 -07:00
|
|
|
def parse(self, feeds_xml):
|
2011-02-06 15:04:21 -05:00
|
|
|
parser = etree.XMLParser(recover=True)
|
2011-08-29 18:43:17 -07:00
|
|
|
tree = etree.parse(StringIO(feeds_xml), parser)
|
|
|
|
feeds = tree.xpath('/object/list/object')
|
|
|
|
return feeds
|
2010-06-29 20:16:09 -04:00
|
|
|
|
|
|
|
def process_item(self, item, folders):
|
|
|
|
feed_title = item.xpath('./string[@name="title"]') and \
|
|
|
|
item.xpath('./string[@name="title"]')[0].text
|
|
|
|
feed_address = item.xpath('./string[@name="id"]') and \
|
|
|
|
item.xpath('./string[@name="id"]')[0].text.replace('feed/', '')
|
|
|
|
feed_link = item.xpath('./string[@name="htmlUrl"]') and \
|
|
|
|
item.xpath('./string[@name="htmlUrl"]')[0].text
|
|
|
|
category = item.xpath('./list[@name="categories"]/object/string[@name="label"]') and \
|
|
|
|
item.xpath('./list[@name="categories"]/object/string[@name="label"]')[0].text
|
|
|
|
|
2010-07-21 11:38:33 -04:00
|
|
|
if not feed_address:
|
|
|
|
feed_address = feed_link
|
2010-07-27 22:11:23 -04:00
|
|
|
|
2010-08-11 11:05:46 -04:00
|
|
|
try:
|
2010-08-11 17:44:31 -04:00
|
|
|
feed_link = urlnorm.normalize(feed_link)
|
|
|
|
feed_address = urlnorm.normalize(feed_address)
|
|
|
|
|
2010-08-27 18:35:33 -04:00
|
|
|
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
|
2010-08-25 14:54:28 -04:00
|
|
|
return folders
|
|
|
|
|
2010-08-19 10:43:07 -04:00
|
|
|
# See if it exists as a duplicate first
|
|
|
|
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
|
|
|
if duplicate_feed:
|
|
|
|
feed_db = duplicate_feed[0].feed
|
|
|
|
else:
|
2012-03-12 18:11:13 -07:00
|
|
|
feed_data = dict(feed_title=feed_title)
|
2010-09-23 16:33:06 -04:00
|
|
|
feed_data['active_subscribers'] = 1
|
|
|
|
feed_data['num_subscribers'] = 1
|
2012-03-12 18:11:13 -07:00
|
|
|
feed_db, _ = Feed.find_or_create(feed_address=feed_address, feed_link=feed_link,
|
|
|
|
defaults=dict(**feed_data))
|
2010-08-19 10:43:07 -04:00
|
|
|
|
2010-08-11 11:05:46 -04:00
|
|
|
us, _ = UserSubscription.objects.get_or_create(
|
|
|
|
feed=feed_db,
|
|
|
|
user=self.user,
|
|
|
|
defaults={
|
|
|
|
'needs_unread_recalc': True,
|
2010-10-29 17:17:36 -04:00
|
|
|
'mark_read_date': datetime.datetime.utcnow() - datetime.timedelta(days=1),
|
|
|
|
'active': self.user.profile.is_premium,
|
2010-08-11 11:05:46 -04:00
|
|
|
}
|
|
|
|
)
|
2012-03-12 12:33:54 -07:00
|
|
|
if not us.needs_unread_recalc:
|
|
|
|
us.needs_unread_recalc = True
|
|
|
|
us.save()
|
2010-08-11 11:05:46 -04:00
|
|
|
if not category: category = "Root"
|
|
|
|
folders[category].append(feed_db.pk)
|
|
|
|
except Exception, e:
|
2012-04-12 13:37:19 -07:00
|
|
|
logging.info(' *** -> Exception: %s: %s' % (e, item))
|
2010-08-11 11:05:46 -04:00
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
return folders
|
|
|
|
|
|
|
|
def rearrange_folders(self, folders, depth=0):
|
|
|
|
for folder, items in folders.items():
|
|
|
|
if folder == 'Root':
|
|
|
|
self.subscription_folders += items
|
|
|
|
else:
|
2010-08-19 10:43:07 -04:00
|
|
|
# folder_parents = folder.split(u' \u2014 ')
|
2010-06-29 20:16:09 -04:00
|
|
|
self.subscription_folders.append({folder: items})
|
2011-08-29 18:43:17 -07:00
|
|
|
|
2011-08-30 08:35:22 -07:00
|
|
|
def import_starred_items(self, count=10):
|
|
|
|
sub_url = "%s/0/stream/contents/user/-/state/com.google/starred?n=%s" % (self.scope, count)
|
2011-08-29 18:43:17 -07:00
|
|
|
stories_str = self.send_request(sub_url)
|
2011-08-30 09:23:16 -07:00
|
|
|
try:
|
|
|
|
stories = json.decode(stories_str)
|
|
|
|
except:
|
2011-09-04 14:47:47 -07:00
|
|
|
logging.user(self.user, "~BB~FW~SBGoogle Reader starred stories: ~BT~FWNo stories")
|
2011-08-30 09:23:16 -07:00
|
|
|
stories = None
|
2011-08-29 18:43:17 -07:00
|
|
|
if stories:
|
|
|
|
logging.user(self.user, "~BB~FW~SBGoogle Reader starred stories: ~BT~FW%s stories" % (len(stories['items'])))
|
|
|
|
self.process_starred_items(stories['items'])
|
|
|
|
|
|
|
|
def process_starred_items(self, stories):
|
|
|
|
for story in stories:
|
|
|
|
try:
|
2011-08-29 21:19:57 -07:00
|
|
|
original_feed = Feed.get_feed_from_url(story['origin']['streamId'], create=False, fetch=False)
|
2011-08-29 18:43:17 -07:00
|
|
|
if not original_feed:
|
2011-08-29 21:19:57 -07:00
|
|
|
original_feed = Feed.get_feed_from_url(story['origin']['htmlUrl'], create=False, fetch=False)
|
2011-08-29 18:43:17 -07:00
|
|
|
content = story.get('content') or story.get('summary')
|
|
|
|
story_db = {
|
2012-07-09 12:41:25 -07:00
|
|
|
"user_id": self.user.pk,
|
2011-08-29 18:43:17 -07:00
|
|
|
"starred_date": datetime.datetime.fromtimestamp(story['updated']),
|
|
|
|
"story_date": datetime.datetime.fromtimestamp(story['published']),
|
2012-04-23 15:31:46 -07:00
|
|
|
"story_title": story.get('title', story.get('origin', {}).get('title', '[Untitled]')),
|
2011-08-29 18:43:17 -07:00
|
|
|
"story_permalink": story['alternate'][0]['href'],
|
|
|
|
"story_guid": story['id'],
|
|
|
|
"story_content": content.get('content'),
|
|
|
|
"story_author_name": story.get('author'),
|
2011-08-29 21:09:41 -07:00
|
|
|
"story_feed_id": original_feed and original_feed.pk,
|
2011-08-29 18:43:17 -07:00
|
|
|
"story_tags": [tag for tag in story.get('categories', []) if 'user/' not in tag]
|
|
|
|
}
|
2011-08-29 21:09:41 -07:00
|
|
|
logging.user(self.user, "~FCStarring: ~SB%s~SN in ~SB%s" % (story_db['story_title'][:50], original_feed and original_feed))
|
2011-08-29 18:43:17 -07:00
|
|
|
MStarredStory.objects.create(**story_db)
|
|
|
|
except OperationError:
|
|
|
|
logging.user(self.user, "~FCAlready starred: ~SB%s" % (story_db['story_title'][:50]))
|
2012-04-23 15:31:46 -07:00
|
|
|
except Exception, e:
|
|
|
|
logging.user(self.user, "~FC~BRFailed to star: ~SB%s / %s" % (story, e))
|
2011-08-29 18:43:17 -07:00
|
|
|
|