NewsBlur/apps/feed_import/models.py
Samuel Clay 0d6cb69548 Merge branch 'django1.11' into django2.0
* django1.11: (73 commits)
  Switching to new celery 4 standalone binary.
  Fixing various mongo data calls.
  Upgrading to latest celery 4 (holy moly), which required some big changes to project layout. Still needs supervisor scripts updated.
  Removing unused log on cookies.
  I believe this Context wrapping is still preserved. See this django ticket: https://code.djangoproject.com/ticket/28125. Reverting this fixes the error, so I'm assuming this is that type of render.
  Have to revert 3f122d5e03 because this broke existing sessions (logged me out) because the model has changed and the serialized model stored in redis no longer matches. Whew, this took a while to figure out.
  Upgrading redis cache.
  Adding cookies to path inspector.
  Removing dupe db log.
  Fixing missing DB logs (redis and mongo) due to this change in django 1.8: "connections.queries is now a read-only attribute."
  Removing migrations that set a default date of 2020-05-08. Not sure why this was committed. I thought we resolved the issue with default datetimes?
  Fixing CallableBool.
  Missing import
  Fixing runtime errors on django 1.10
  Fixing OAuth connect.
  Fixing various django1.9 issues, mainly around templates.
  BASE_DIR
  Not every story is from a feed.
  Styling background colors for newsletters.
  Styling more newsletter elements.
  ...
2020-06-30 12:34:59 -04:00

232 lines
9.1 KiB
Python

import datetime
import mongoengine as mongo
import pickle
import base64
from io import StringIO
from oauth2client.client import Error as OAuthError
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
from lxml import etree
from django.db import models
from django.contrib.auth.models import User
from mongoengine.queryset import OperationError
import vendor.opml as opml
from apps.rss_feeds.models import Feed, DuplicateFeed, MStarredStory
from apps.reader.models import UserSubscription, UserSubscriptionFolders
from utils import json_functions as json, urlnorm
from utils import log as logging
from utils.feed_functions import timelimit
from utils.feed_functions import add_object_to_folder
class OAuthToken(models.Model):
user = models.OneToOneField(User, null=True, blank=True, on_delete=models.CASCADE)
session_id = models.CharField(max_length=50, null=True, blank=True)
uuid = models.CharField(max_length=50, null=True, blank=True)
remote_ip = models.CharField(max_length=50, null=True, blank=True)
request_token = models.CharField(max_length=50)
request_token_secret = models.CharField(max_length=50)
access_token = models.CharField(max_length=50)
access_token_secret = models.CharField(max_length=50)
credential = models.TextField(null=True, blank=True)
created_date = models.DateTimeField(default=datetime.datetime.now)
class Importer:
def clear_feeds(self):
UserSubscription.objects.filter(user=self.user).delete()
def clear_folders(self):
UserSubscriptionFolders.objects.filter(user=self.user).delete()
def get_folders(self):
self.usf, _ = UserSubscriptionFolders.objects.get_or_create(user=self.user,
defaults={'folders': '[]'})
return json.decode(self.usf.folders)
class OPMLExporter(Importer):
def __init__(self, user):
self.user = user
self.fetch_feeds()
def process(self, verbose=False):
now = str(datetime.datetime.now())
root = Element('opml')
root.set('version', '1.1')
root.append(Comment('Generated by NewsBlur - www.newsblur.com'))
head = SubElement(root, 'head')
title = SubElement(head, 'title')
title.text = 'NewsBlur Feeds'
dc = SubElement(head, 'dateCreated')
dc.text = now
dm = SubElement(head, 'dateModified')
dm.text = now
folders = self.get_folders()
body = SubElement(root, 'body')
self.process_outline(body, folders, verbose=verbose)
return tostring(root)
def process_outline(self, body, folders, verbose=False):
for obj in folders:
if isinstance(obj, int) and obj in self.feeds:
feed = self.feeds[obj]
if verbose:
print(" ---> Adding feed: %s - %s" % (feed['id'],
feed['feed_title'][:30]))
feed_attrs = self.make_feed_row(feed)
body.append(Element('outline', feed_attrs))
elif isinstance(obj, dict):
for folder_title, folder_objs in list(obj.items()):
if verbose:
print(" ---> Adding folder: %s" % folder_title)
folder_element = Element('outline', {'text': folder_title, 'title': folder_title})
body.append(self.process_outline(folder_element, folder_objs, verbose=verbose))
return body
def make_feed_row(self, feed):
feed_attrs = {
'text': feed['feed_title'],
'title': feed['feed_title'],
'type': 'rss',
'version': 'RSS',
'htmlUrl': feed['feed_link'] or "",
'xmlUrl': feed['feed_address'] or "",
}
return feed_attrs
def fetch_feeds(self):
subs = UserSubscription.objects.filter(user=self.user)
self.feeds = []
for sub in subs:
try:
self.feeds.append((sub.feed_id, sub.canonical()))
except Feed.DoesNotExist:
continue
self.feeds = dict(self.feeds)
class OPMLImporter(Importer):
def __init__(self, opml_xml, user):
self.user = user
self.opml_xml = opml_xml
@timelimit(10)
def try_processing(self):
folders = self.process()
return folders
def process(self):
# self.clear_feeds()
outline = opml.from_string(self.opml_xml)
folders = self.get_folders()
try:
folders = self.process_outline(outline, folders)
except AttributeError:
folders = None
else:
# self.clear_folders()
self.usf.folders = json.encode(folders)
self.usf.save()
return folders
def process_outline(self, outline, folders, in_folder=''):
for item in outline:
if (not hasattr(item, 'xmlUrl') and
(hasattr(item, 'text') or hasattr(item, 'title'))):
folder = item
title = getattr(item, 'text', None) or getattr(item, 'title', None)
# if hasattr(folder, 'text'):
# logging.info(' ---> [%s] ~FRNew Folder: %s' % (self.user, folder.text))
obj = {title: []}
folders = add_object_to_folder(obj, in_folder, folders)
folders = self.process_outline(folder, folders, title)
elif hasattr(item, 'xmlUrl'):
feed = item
if not hasattr(feed, 'htmlUrl'):
setattr(feed, 'htmlUrl', None)
# If feed title matches what's in the DB, don't override it on subscription.
feed_title = getattr(feed, 'title', None) or getattr(feed, 'text', None)
if not feed_title:
setattr(feed, 'title', feed.htmlUrl or feed.xmlUrl)
user_feed_title = None
else:
setattr(feed, 'title', feed_title)
user_feed_title = feed.title
feed_address = urlnorm.normalize(feed.xmlUrl)
feed_link = urlnorm.normalize(feed.htmlUrl)
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
continue
if feed_link and len(feed_link) > Feed._meta.get_field('feed_link').max_length:
continue
# logging.info(' ---> \t~FR%s - %s - %s' % (feed.title, feed_link, feed_address,))
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
# feeds.append(feed_data)
# See if it exists as a duplicate first
duplicate_feed = DuplicateFeed.objects.filter(duplicate_address=feed_address)
if duplicate_feed:
feed_db = duplicate_feed[0].feed
else:
feed_data['active_subscribers'] = 1
feed_data['num_subscribers'] = 1
feed_db, _ = Feed.find_or_create(feed_address=feed_address,
feed_link=feed_link,
defaults=dict(**feed_data))
if user_feed_title == feed_db.feed_title:
user_feed_title = None
try:
us = UserSubscription.objects.get(
feed=feed_db,
user=self.user)
except UserSubscription.DoesNotExist:
us = UserSubscription(
feed=feed_db,
user=self.user,
needs_unread_recalc=True,
mark_read_date=datetime.datetime.utcnow() - datetime.timedelta(days=1),
active=self.user.profile.is_premium,
user_title=user_feed_title)
us.save()
if self.user.profile.is_premium and not us.active:
us.active = True
us.save()
if not us.needs_unread_recalc:
us.needs_unread_recalc = True
us.save()
folders = add_object_to_folder(feed_db.pk, in_folder, folders)
return folders
def count_feeds_in_opml(self):
opml_count = len(opml.from_string(self.opml_xml))
sub_count = UserSubscription.objects.filter(user=self.user).count()
return max(sub_count, opml_count)
class UploadedOPML(mongo.Document):
user_id = mongo.IntField()
opml_file = mongo.StringField()
upload_date = mongo.DateTimeField(default=datetime.datetime.now)
def __unicode__(self):
user = User.objects.get(pk=self.user_id)
return "%s: %s characters" % (user.username, len(self.opml_file))
meta = {
'collection': 'uploaded_opml',
'allow_inheritance': False,
'order': '-upload_date',
'indexes': ['user_id', '-upload_date'],
}