2010-06-29 20:16:09 -04:00
|
|
|
from collections import defaultdict
|
|
|
|
from django.db import models
|
|
|
|
from django.contrib.auth.models import User
|
2010-08-19 10:43:07 -04:00
|
|
|
from apps.rss_feeds.models import Feed, DuplicateFeed
|
2010-06-29 20:16:09 -04:00
|
|
|
from apps.reader.models import UserSubscription, UserSubscriptionFolders
|
|
|
|
import datetime
|
|
|
|
import lxml.etree
|
2010-07-27 22:11:23 -04:00
|
|
|
from utils import json, urlnorm
|
2010-06-29 20:16:09 -04:00
|
|
|
import utils.opml as opml
|
2010-08-16 15:45:35 -04:00
|
|
|
from utils import log as logging
|
2010-06-29 20:16:09 -04:00
|
|
|
|
|
|
|
class OAuthToken(models.Model):
|
2010-07-05 17:11:04 -04:00
|
|
|
user = models.OneToOneField(User, null=True, blank=True)
|
|
|
|
session_id = models.CharField(max_length=50, null=True, blank=True)
|
2010-07-17 11:50:16 -04:00
|
|
|
remote_ip = models.CharField(max_length=50, null=True, blank=True)
|
2010-06-29 20:16:09 -04:00
|
|
|
request_token = models.CharField(max_length=50)
|
|
|
|
request_token_secret = models.CharField(max_length=50)
|
|
|
|
access_token = models.CharField(max_length=50)
|
|
|
|
access_token_secret = models.CharField(max_length=50)
|
2010-07-17 11:50:16 -04:00
|
|
|
created_date = models.DateTimeField(default=datetime.datetime.now)
|
2010-06-29 20:16:09 -04:00
|
|
|
|
|
|
|
|
|
|
|
class Importer:
|
|
|
|
|
|
|
|
def clear_feeds(self):
|
|
|
|
UserSubscriptionFolders.objects.filter(user=self.user).delete()
|
|
|
|
UserSubscription.objects.filter(user=self.user).delete()
|
|
|
|
|
|
|
|
|
|
|
|
class OPMLImporter(Importer):
|
|
|
|
|
|
|
|
def __init__(self, opml_xml, user):
|
|
|
|
self.user = user
|
|
|
|
self.opml_xml = opml_xml
|
|
|
|
|
|
|
|
def process(self):
|
|
|
|
outline = opml.from_string(self.opml_xml)
|
|
|
|
self.clear_feeds()
|
|
|
|
folders = self.process_outline(outline)
|
|
|
|
UserSubscriptionFolders.objects.create(user=self.user, folders=json.encode(folders))
|
|
|
|
|
|
|
|
return folders
|
|
|
|
|
|
|
|
def process_outline(self, outline):
|
|
|
|
folders = []
|
|
|
|
|
|
|
|
for item in outline:
|
|
|
|
if not hasattr(item, 'xmlUrl'):
|
|
|
|
folder = item
|
2010-08-15 12:04:26 -04:00
|
|
|
logging.info(' ---> [%s] New Folder: %s' % (self.user, folder.text))
|
2010-06-29 20:16:09 -04:00
|
|
|
folders.append({folder.text: self.process_outline(folder)})
|
|
|
|
elif hasattr(item, 'xmlUrl'):
|
|
|
|
feed = item
|
|
|
|
if not hasattr(feed, 'htmlUrl'):
|
|
|
|
setattr(feed, 'htmlUrl', None)
|
|
|
|
if not hasattr(feed, 'title'):
|
|
|
|
setattr(feed, 'title', feed.htmlUrl)
|
2010-07-27 22:11:23 -04:00
|
|
|
feed_address = urlnorm.normalize(feed.xmlUrl)
|
|
|
|
feed_link = urlnorm.normalize(feed.htmlUrl)
|
2010-08-27 18:35:33 -04:00
|
|
|
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
|
|
|
|
continue
|
|
|
|
if len(feed_link) > Feed._meta.get_field('feed_link').max_length:
|
|
|
|
continue
|
|
|
|
if len(feed.title) > Feed._meta.get_field('feed_title').max_length:
|
|
|
|
feed.title = feed.title[:255]
|
2010-08-15 12:04:26 -04:00
|
|
|
logging.info(' ---> \t%s - %s - %s' % (feed.title, feed_link, feed_address,))
|
2010-07-27 22:11:23 -04:00
|
|
|
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
|
2010-06-29 20:16:09 -04:00
|
|
|
# feeds.append(feed_data)
|
2010-08-19 10:43:07 -04:00
|
|
|
|
|
|
|
# See if it exists as a duplicate first
|
|
|
|
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
|
|
|
if duplicate_feed:
|
|
|
|
feed_db = duplicate_feed[0].feed
|
|
|
|
else:
|
|
|
|
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address,
|
|
|
|
defaults=dict(**feed_data))
|
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
us, _ = UserSubscription.objects.get_or_create(
|
|
|
|
feed=feed_db,
|
|
|
|
user=self.user,
|
|
|
|
defaults={
|
|
|
|
'needs_unread_recalc': True,
|
|
|
|
'mark_read_date': datetime.datetime.now() - datetime.timedelta(days=1)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
folders.append(feed_db.pk)
|
|
|
|
return folders
|
|
|
|
|
|
|
|
|
|
|
|
class GoogleReaderImporter(Importer):
|
|
|
|
|
|
|
|
def __init__(self, feeds_xml, user):
|
|
|
|
self.user = user
|
|
|
|
self.feeds_xml = feeds_xml
|
|
|
|
self.subscription_folders = []
|
|
|
|
|
|
|
|
def process(self):
|
|
|
|
self.clear_feeds()
|
|
|
|
self.parse()
|
|
|
|
|
|
|
|
folders = defaultdict(list)
|
|
|
|
for item in self.feeds:
|
|
|
|
folders = self.process_item(item, folders)
|
2010-07-05 17:11:04 -04:00
|
|
|
# print dict(folders)
|
2010-06-29 20:16:09 -04:00
|
|
|
self.rearrange_folders(folders)
|
2010-08-17 17:45:51 -04:00
|
|
|
logging.info(" ---> [%s] Google Reader import: %s" % (self.user, self.subscription_folders))
|
2010-06-29 20:16:09 -04:00
|
|
|
UserSubscriptionFolders.objects.create(user=self.user,
|
|
|
|
folders=json.encode(self.subscription_folders))
|
|
|
|
|
|
|
|
def parse(self):
|
|
|
|
self.feeds = lxml.etree.fromstring(self.feeds_xml).xpath('/object/list/object')
|
|
|
|
|
|
|
|
def process_item(self, item, folders):
|
|
|
|
feed_title = item.xpath('./string[@name="title"]') and \
|
|
|
|
item.xpath('./string[@name="title"]')[0].text
|
|
|
|
feed_address = item.xpath('./string[@name="id"]') and \
|
|
|
|
item.xpath('./string[@name="id"]')[0].text.replace('feed/', '')
|
|
|
|
feed_link = item.xpath('./string[@name="htmlUrl"]') and \
|
|
|
|
item.xpath('./string[@name="htmlUrl"]')[0].text
|
|
|
|
category = item.xpath('./list[@name="categories"]/object/string[@name="label"]') and \
|
|
|
|
item.xpath('./list[@name="categories"]/object/string[@name="label"]')[0].text
|
|
|
|
|
2010-07-21 11:38:33 -04:00
|
|
|
if not feed_address:
|
|
|
|
feed_address = feed_link
|
2010-07-27 22:11:23 -04:00
|
|
|
|
2010-08-11 11:05:46 -04:00
|
|
|
try:
|
2010-08-11 17:44:31 -04:00
|
|
|
feed_link = urlnorm.normalize(feed_link)
|
|
|
|
feed_address = urlnorm.normalize(feed_address)
|
|
|
|
|
2010-08-27 18:35:33 -04:00
|
|
|
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
|
2010-08-25 14:54:28 -04:00
|
|
|
return folders
|
|
|
|
|
2010-08-19 10:43:07 -04:00
|
|
|
# See if it exists as a duplicate first
|
|
|
|
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
|
|
|
|
if duplicate_feed:
|
|
|
|
feed_db = duplicate_feed[0].feed
|
|
|
|
else:
|
|
|
|
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed_title)
|
|
|
|
feed_db, _ = Feed.objects.get_or_create(feed_address=feed_address,
|
|
|
|
defaults=dict(**feed_data))
|
|
|
|
|
2010-08-11 11:05:46 -04:00
|
|
|
us, _ = UserSubscription.objects.get_or_create(
|
|
|
|
feed=feed_db,
|
|
|
|
user=self.user,
|
|
|
|
defaults={
|
|
|
|
'needs_unread_recalc': True,
|
|
|
|
'mark_read_date': datetime.datetime.now() - datetime.timedelta(days=1)
|
|
|
|
}
|
|
|
|
)
|
|
|
|
if not category: category = "Root"
|
|
|
|
folders[category].append(feed_db.pk)
|
|
|
|
except Exception, e:
|
2010-08-23 16:23:16 -04:00
|
|
|
logging.info(' *** -> Exception: %s' % e)
|
2010-08-11 11:05:46 -04:00
|
|
|
|
2010-06-29 20:16:09 -04:00
|
|
|
return folders
|
|
|
|
|
|
|
|
def rearrange_folders(self, folders, depth=0):
|
|
|
|
for folder, items in folders.items():
|
|
|
|
if folder == 'Root':
|
|
|
|
self.subscription_folders += items
|
|
|
|
else:
|
2010-08-19 10:43:07 -04:00
|
|
|
# folder_parents = folder.split(u' \u2014 ')
|
2010-06-29 20:16:09 -04:00
|
|
|
self.subscription_folders.append({folder: items})
|
|
|
|
|