NewsBlur/apps/feed_import/models.py

236 lines
8.8 KiB
Python
Raw Normal View History

2024-04-24 09:50:42 -04:00
import base64
import datetime
import pickle
2024-04-24 09:50:42 -04:00
from xml.etree.ElementTree import Comment, Element, SubElement, tostring
import mongoengine as mongo
from django.contrib.auth.models import User
2024-04-24 09:50:42 -04:00
from django.db import models
from lxml import etree
from mongoengine.queryset import OperationError
2024-04-24 09:50:42 -04:00
from oauth2client.client import Error as OAuthError
import vendor.opml as opml
from apps.reader.models import UserSubscription, UserSubscriptionFolders
2024-04-24 09:50:42 -04:00
from apps.rss_feeds.models import DuplicateFeed, Feed
from utils import json_functions as json
2010-08-16 15:45:35 -04:00
from utils import log as logging
2024-04-24 09:50:42 -04:00
from utils import urlnorm
from utils.feed_functions import add_object_to_folder, timelimit
class OAuthToken(models.Model):
user = models.OneToOneField(User, null=True, blank=True, on_delete=models.CASCADE)
session_id = models.CharField(max_length=50, null=True, blank=True)
uuid = models.CharField(max_length=50, null=True, blank=True)
remote_ip = models.CharField(max_length=50, null=True, blank=True)
request_token = models.CharField(max_length=50)
request_token_secret = models.CharField(max_length=50)
access_token = models.CharField(max_length=50)
access_token_secret = models.CharField(max_length=50)
credential = models.TextField(null=True, blank=True)
created_date = models.DateTimeField(default=datetime.datetime.now)
2024-04-24 09:43:56 -04:00
class Importer:
def clear_feeds(self):
UserSubscription.objects.filter(user=self.user).delete()
def clear_folders(self):
UserSubscriptionFolders.objects.filter(user=self.user).delete()
2024-04-24 09:43:56 -04:00
def get_folders(self):
2024-04-24 09:43:56 -04:00
self.usf, _ = UserSubscriptionFolders.objects.get_or_create(
user=self.user, defaults={"folders": "[]"}
)
return json.decode(self.usf.folders)
2024-04-24 09:43:56 -04:00
class OPMLExporter(Importer):
def __init__(self, user):
self.user = user
self.fetch_feeds()
2024-04-24 09:43:56 -04:00
2012-12-10 15:58:51 -08:00
def process(self, verbose=False):
now = str(datetime.datetime.now())
2024-04-24 09:43:56 -04:00
root = Element("opml")
root.set("version", "1.1")
root.append(Comment("Generated by NewsBlur - newsblur.com"))
head = SubElement(root, "head")
title = SubElement(head, "title")
title.text = "NewsBlur Feeds"
dc = SubElement(head, "dateCreated")
dc.text = now
dm = SubElement(head, "dateModified")
dm.text = now
folders = self.get_folders()
body = SubElement(root, "body")
2012-12-10 15:58:51 -08:00
self.process_outline(body, folders, verbose=verbose)
2024-04-24 09:43:56 -04:00
return tostring(root, encoding="utf8", method="xml")
2012-12-10 15:58:51 -08:00
def process_outline(self, body, folders, verbose=False):
for obj in folders:
if isinstance(obj, int) and obj in self.feeds:
feed = self.feeds[obj]
2012-12-10 15:58:51 -08:00
if verbose:
2024-04-24 09:43:56 -04:00
print(" ---> Adding feed: %s - %s" % (feed["id"], feed["feed_title"][:30]))
feed_attrs = self.make_feed_row(feed)
2024-04-24 09:43:56 -04:00
body.append(Element("outline", feed_attrs))
elif isinstance(obj, dict):
for folder_title, folder_objs in list(obj.items()):
2012-12-10 15:58:51 -08:00
if verbose:
print(" ---> Adding folder: %s" % folder_title)
2024-04-24 09:43:56 -04:00
folder_element = Element("outline", {"text": folder_title, "title": folder_title})
body.append(self.process_outline(folder_element, folder_objs, verbose=verbose))
return body
2024-04-24 09:43:56 -04:00
def make_feed_row(self, feed):
feed_attrs = {
2024-04-24 09:43:56 -04:00
"text": feed["feed_title"],
"title": feed["feed_title"],
"type": "rss",
"version": "RSS",
"htmlUrl": feed["feed_link"] or "",
"xmlUrl": feed["feed_address"] or "",
}
return feed_attrs
2024-04-24 09:43:56 -04:00
def fetch_feeds(self):
subs = UserSubscription.objects.filter(user=self.user)
self.feeds = []
for sub in subs:
try:
self.feeds.append((sub.feed_id, sub.canonical()))
except Feed.DoesNotExist:
continue
self.feeds = dict(self.feeds)
@property
def feed_count(self):
return len(self.feeds)
class OPMLImporter(Importer):
def __init__(self, opml_xml, user):
self.user = user
self.opml_xml = opml_xml
2024-04-24 09:43:56 -04:00
@timelimit(10)
def try_processing(self):
folders = self.process()
return folders
2024-04-24 09:43:56 -04:00
def process(self):
# self.clear_feeds()
outline = opml.from_string(self.opml_xml)
folders = self.get_folders()
2013-07-05 17:41:20 -07:00
try:
folders = self.process_outline(outline, folders)
except AttributeError:
folders = None
else:
# self.clear_folders()
self.usf.folders = json.encode(folders)
self.usf.save()
2024-04-24 09:43:56 -04:00
return folders
2024-04-24 09:43:56 -04:00
def process_outline(self, outline, folders, in_folder=""):
for item in outline:
2024-04-24 09:43:56 -04:00
if not hasattr(item, "xmlUrl") and (hasattr(item, "text") or hasattr(item, "title")):
folder = item
2024-04-24 09:43:56 -04:00
title = getattr(item, "text", None) or getattr(item, "title", None)
# if hasattr(folder, 'text'):
# logging.info(' ---> [%s] ~FRNew Folder: %s' % (self.user, folder.text))
obj = {title: []}
folders = add_object_to_folder(obj, in_folder, folders)
folders = self.process_outline(folder, folders, title)
2024-04-24 09:43:56 -04:00
elif hasattr(item, "xmlUrl"):
feed = item
2024-04-24 09:43:56 -04:00
if not hasattr(feed, "htmlUrl"):
setattr(feed, "htmlUrl", None)
# If feed title matches what's in the DB, don't override it on subscription.
2024-04-24 09:43:56 -04:00
feed_title = getattr(feed, "title", None) or getattr(feed, "text", None)
if not feed_title:
2024-04-24 09:43:56 -04:00
setattr(feed, "title", feed.htmlUrl or feed.xmlUrl)
user_feed_title = None
else:
2024-04-24 09:43:56 -04:00
setattr(feed, "title", feed_title)
user_feed_title = feed.title
feed_address = urlnorm.normalize(feed.xmlUrl)
feed_link = urlnorm.normalize(feed.htmlUrl)
2024-04-24 09:43:56 -04:00
if len(feed_address) > Feed._meta.get_field("feed_address").max_length:
continue
2024-04-24 09:43:56 -04:00
if feed_link and len(feed_link) > Feed._meta.get_field("feed_link").max_length:
continue
# logging.info(' ---> \t~FR%s - %s - %s' % (feed.title, feed_link, feed_address,))
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
# feeds.append(feed_data)
# See if it exists as a duplicate first
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
if duplicate_feed:
feed_db = duplicate_feed[0].feed
else:
2024-04-24 09:43:56 -04:00
feed_data["active_subscribers"] = 1
feed_data["num_subscribers"] = 1
feed_db, _ = Feed.find_or_create(
feed_address=feed_address, feed_link=feed_link, defaults=dict(**feed_data)
)
if user_feed_title == feed_db.feed_title:
user_feed_title = None
2024-04-24 09:43:56 -04:00
try:
2024-04-24 09:43:56 -04:00
us = UserSubscription.objects.get(feed=feed_db, user=self.user)
except UserSubscription.DoesNotExist:
us = None
2024-04-24 09:43:56 -04:00
if not us:
us = UserSubscription(
2024-04-24 09:43:56 -04:00
feed=feed_db,
user=self.user,
needs_unread_recalc=True,
mark_read_date=datetime.datetime.utcnow() - datetime.timedelta(days=1),
active=self.user.profile.is_premium,
2024-04-24 09:43:56 -04:00
user_title=user_feed_title,
)
us.save()
2024-04-24 09:43:56 -04:00
if self.user.profile.is_premium and not us.active:
us.active = True
us.save()
if not us.needs_unread_recalc:
us.needs_unread_recalc = True
us.save()
folders = add_object_to_folder(feed_db.pk, in_folder, folders)
return folders
2024-04-24 09:43:56 -04:00
def count_feeds_in_opml(self):
opml_count = len(opml.from_string(self.opml_xml))
sub_count = UserSubscription.objects.filter(user=self.user).count()
return max(sub_count, opml_count)
2024-04-24 09:43:56 -04:00
class UploadedOPML(mongo.Document):
user_id = mongo.IntField()
opml_file = mongo.StringField()
upload_date = mongo.DateTimeField(default=datetime.datetime.now)
2024-04-24 09:43:56 -04:00
def __str__(self):
user = User.objects.get(pk=self.user_id)
return "%s: %s characters" % (user.username, len(self.opml_file))
2024-04-24 09:43:56 -04:00
meta = {
2024-04-24 09:43:56 -04:00
"collection": "uploaded_opml",
"allow_inheritance": False,
"order": "-upload_date",
"indexes": ["user_id", "-upload_date"],
}