NewsBlur-viq/apps/rss_feeds/models.py

import base64
import datetime
import difflib
import hashlib
import html
import math
import random
import re
import time
import urllib.parse
import zlib
from collections import defaultdict
from operator import itemgetter

import bson
import mongoengine as mongo
import pymongo
import redis
import requests
from bs4 import BeautifulSoup
from bson.objectid import ObjectId
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.sites.models import Site

# from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures
from django.db import IntegrityError, models
from django.db.models.query import QuerySet
from django.db.utils import DatabaseError
from django.template.defaultfilters import slugify
from django.urls import reverse
from django.utils.encoding import DjangoUnicodeDecodeError, smart_bytes, smart_str
from mongoengine.errors import ValidationError
from mongoengine.queryset import NotUniqueError, OperationError, Q

from apps.rss_feeds.tasks import PushFeeds, ScheduleCountTagsForUser, UpdateFeeds
from apps.rss_feeds.text_importer import TextImporter
from apps.search.models import SearchFeed, SearchStory
from apps.statistics.rstats import RStats
from utils import feedfinder_forman, feedfinder_pilgrim
from utils import json_functions as json
from utils import log as logging
from utils import urlnorm
from utils.feed_functions import (
    TimeoutError,
    levenshtein_distance,
    relative_timesince,
    seconds_timesince,
    strip_underscore_from_feed_address,
    timelimit,
)
from utils.fields import AutoOneToOneField
from utils.story_functions import (
    create_imageproxy_signed_url,
    htmldiff,
    prep_for_search,
    strip_comments,
    strip_comments__lxml,
    strip_tags,
)
from vendor.timezones.utilities import localtime_for_timezone

ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = list(range(4))


class Feed(models.Model):
    feed_address = models.URLField(max_length=764, db_index=True)
    feed_address_locked = models.BooleanField(default=False, blank=True, null=True)
    feed_link = models.URLField(max_length=1000, default="", blank=True, null=True)
    feed_link_locked = models.BooleanField(default=False)
    hash_address_and_link = models.CharField(max_length=64, unique=True)
    feed_title = models.CharField(max_length=255, default="[Untitled]", blank=True, null=True)
    is_push = models.BooleanField(default=False, blank=True, null=True)
    active = models.BooleanField(default=True, db_index=True)
    num_subscribers = models.IntegerField(default=-1)
    active_subscribers = models.IntegerField(default=-1, db_index=True)
    premium_subscribers = models.IntegerField(default=-1)
    archive_subscribers = models.IntegerField(default=0, null=True, blank=True)
    pro_subscribers = models.IntegerField(default=0, null=True, blank=True)
    active_premium_subscribers = models.IntegerField(default=-1)
    branch_from_feed = models.ForeignKey(
        "Feed", blank=True, null=True, db_index=True, on_delete=models.CASCADE
    )
    last_update = models.DateTimeField(db_index=True)
    next_scheduled_update = models.DateTimeField()
    last_story_date = models.DateTimeField(null=True, blank=True)
    fetched_once = models.BooleanField(default=False)
    known_good = models.BooleanField(default=False)
    has_feed_exception = models.BooleanField(default=False, db_index=True)
    has_page_exception = models.BooleanField(default=False, db_index=True)
    has_page = models.BooleanField(default=True)
    exception_code = models.IntegerField(default=0)
    errors_since_good = models.IntegerField(default=0)
    min_to_decay = models.IntegerField(default=0)
    days_to_trim = models.IntegerField(default=90)
    creation = models.DateField(auto_now_add=True)
    etag = models.CharField(max_length=255, blank=True, null=True)
    last_modified = models.DateTimeField(null=True, blank=True)
    stories_last_month = models.IntegerField(default=0)
    average_stories_per_month = models.IntegerField(default=0)
    last_load_time = models.IntegerField(default=0)
    favicon_color = models.CharField(max_length=6, null=True, blank=True)
    favicon_not_found = models.BooleanField(default=False)
    s3_page = models.BooleanField(default=False, blank=True, null=True)
    s3_icon = models.BooleanField(default=False, blank=True, null=True)
    search_indexed = models.BooleanField(default=None, null=True, blank=True)
    fs_size_bytes = models.IntegerField(null=True, blank=True)
    archive_count = models.IntegerField(null=True, blank=True)

    class Meta:
        db_table = "feeds"
        ordering = ["feed_title"]
        # unique_together=[('feed_address', 'feed_link')]

    def __str__(self):
        if not self.feed_title:
            self.feed_title = "[Untitled]"
            self.save()
        return "%s%s: %s - %s/%s/%s/%s/%s %s stories (%s bytes)" % (
            self.pk,
            (" [B: %s]" % self.branch_from_feed.pk if self.branch_from_feed else ""),
            self.feed_title,
            self.num_subscribers,
            self.active_subscribers,
            self.active_premium_subscribers,
            self.archive_subscribers,
            self.pro_subscribers,
            self.archive_count,
            self.fs_size_bytes,
        )

    @property
    def title(self):
        title = self.feed_title or "[Untitled]"
        if self.active_premium_subscribers >= 1:
            title = "%s*" % title[:29]
        return title

    @property
    def log_title(self):
        return self.__str__()

    @property
    def permalink(self):
        return "%s/site/%s/%s" % (settings.NEWSBLUR_URL, self.pk, slugify(self.feed_title.lower()[:50]))

    @property
    def favicon_url(self):
        if settings.BACKED_BY_AWS["icons_on_s3"] and self.s3_icon:
            return "https://s3.amazonaws.com/%s/%s.png" % (settings.S3_ICONS_BUCKET_NAME, self.pk)
        return reverse("feed-favicon", kwargs={"feed_id": self.pk})

    @property
    def favicon_url_fqdn(self):
        if settings.BACKED_BY_AWS["icons_on_s3"] and self.s3_icon:
            return self.favicon_url
        return "https://%s%s" % (Site.objects.get_current().domain, self.favicon_url)

    @property
    def s3_pages_key(self):
        return "%s.gz.html" % self.pk

    @property
    def s3_icons_key(self):
        return "%s.png" % self.pk

    @property
    def unread_cutoff(self):
        if self.archive_subscribers and self.archive_subscribers > 0:
            return datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD_ARCHIVE)
        if self.premium_subscribers > 0:
            return datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD)

        return datetime.datetime.utcnow() - datetime.timedelta(days=settings.DAYS_OF_UNREAD_FREE)

    @classmethod
    def days_of_story_hashes_for_feed(cls, feed_id):
        try:
            feed = cls.objects.only("archive_subscribers").get(pk=feed_id)
            return feed.days_of_story_hashes
        except cls.DoesNotExist:
            return settings.DAYS_OF_STORY_HASHES

    @property
    def days_of_story_hashes(self):
        if self.archive_subscribers and self.archive_subscribers > 0:
            return settings.DAYS_OF_STORY_HASHES_ARCHIVE
        return settings.DAYS_OF_STORY_HASHES

    @property
    def story_hashes_in_unread_cutoff(self):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        current_time = int(time.time() + 60 * 60 * 24)
        unread_cutoff = self.unread_cutoff.strftime("%s")
        story_hashes = r.zrevrangebyscore("zF:%s" % self.pk, current_time, unread_cutoff)

        return story_hashes

    @classmethod
    def generate_hash_address_and_link(cls, feed_address, feed_link):
        if not feed_address:
            feed_address = ""
        if not feed_link:
            feed_link = ""
        return hashlib.sha1((feed_address + feed_link).encode(encoding="utf-8")).hexdigest()

    @property
    def is_newsletter(self):
        return self.feed_address.startswith("newsletter:") or self.feed_address.startswith(
            "http://newsletter:"
        )

    def canonical(self, full=False, include_favicon=True):
        feed = {
            "id": self.pk,
            "feed_title": self.feed_title,
            "feed_address": self.feed_address,
            "feed_link": self.feed_link,
            "num_subscribers": self.num_subscribers,
            "updated": relative_timesince(self.last_update),
            "updated_seconds_ago": seconds_timesince(self.last_update),
            "fs_size_bytes": self.fs_size_bytes,
            "archive_count": self.archive_count,
            "last_story_date": self.last_story_date,
            "last_story_seconds_ago": seconds_timesince(self.last_story_date),
            "stories_last_month": self.stories_last_month,
            "average_stories_per_month": self.average_stories_per_month,
            "min_to_decay": self.min_to_decay,
            "subs": self.num_subscribers,
            "is_push": self.is_push,
            "is_newsletter": self.is_newsletter,
            "fetched_once": self.fetched_once,
            "search_indexed": self.search_indexed,
            "not_yet_fetched": not self.fetched_once,  # Legacy. Doh.
            "favicon_color": self.favicon_color,
            "favicon_fade": self.favicon_fade(),
            "favicon_border": self.favicon_border(),
            "favicon_text_color": self.favicon_text_color(),
            "favicon_fetching": self.favicon_fetching,
            "favicon_url": self.favicon_url,
            "s3_page": self.s3_page,
            "s3_icon": self.s3_icon,
            "disabled_page": not self.has_page,
        }

        if include_favicon:
            try:
                feed_icon = MFeedIcon.objects.get(feed_id=self.pk)
                feed["favicon"] = feed_icon.data
            except MFeedIcon.DoesNotExist:
                pass
        if self.has_page_exception or self.has_feed_exception:
            feed["has_exception"] = True
            feed["exception_type"] = "feed" if self.has_feed_exception else "page"
            feed["exception_code"] = self.exception_code
        elif full:
            feed["has_exception"] = False
            feed["exception_type"] = None
            feed["exception_code"] = self.exception_code

        if full:
            feed["average_stories_per_month"] = self.average_stories_per_month
            feed["tagline"] = self.data.feed_tagline
            feed["feed_tags"] = json.decode(self.data.popular_tags) if self.data.popular_tags else []
            feed["feed_authors"] = json.decode(self.data.popular_authors) if self.data.popular_authors else []

        return feed

    def save(self, *args, **kwargs):
        if not self.last_update:
            self.last_update = datetime.datetime.utcnow()
        if not self.next_scheduled_update:
            self.next_scheduled_update = datetime.datetime.utcnow()
        self.fix_google_alerts_urls()

        feed_address = self.feed_address or ""
        feed_link = self.feed_link or ""
        self.hash_address_and_link = self.generate_hash_address_and_link(feed_address, feed_link)

        max_feed_title = Feed._meta.get_field("feed_title").max_length
        if len(self.feed_title) > max_feed_title:
            self.feed_title = self.feed_title[:max_feed_title]
        max_feed_address = Feed._meta.get_field("feed_address").max_length
        if len(feed_address) > max_feed_address:
            self.feed_address = feed_address[:max_feed_address]
        max_feed_link = Feed._meta.get_field("feed_link").max_length
        if len(feed_link) > max_feed_link:
            self.feed_link = feed_link[:max_feed_link]

        try:
            super(Feed, self).save(*args, **kwargs)
        except IntegrityError as e:
            logging.debug(" ---> ~FRFeed save collision (%s), checking dupe hash..." % e)
            feed_address = self.feed_address or ""
            feed_link = self.feed_link or ""
            hash_address_and_link = self.generate_hash_address_and_link(feed_address, feed_link)
            logging.debug(" ---> ~FRNo dupes, checking hash collision: %s" % hash_address_and_link)
            duplicate_feeds = Feed.objects.filter(hash_address_and_link=hash_address_and_link)

            if not duplicate_feeds:
                duplicate_feeds = Feed.objects.filter(
                    feed_address=self.feed_address, feed_link=self.feed_link
                )
            if not duplicate_feeds:
                # Feed has been deleted. Just ignore it.
                logging.debug(
                    " ***> Changed to: %s - %s: %s" % (self.feed_address, self.feed_link, duplicate_feeds)
                )
                logging.debug(" ***> [%-30s] Feed deleted (%s)." % (self.log_title[:30], self.pk))
                return

            for duplicate_feed in duplicate_feeds:
                if duplicate_feed.pk != self.pk:
                    logging.debug(
                        " ---> ~FRFound different feed (%s), merging %s in..." % (duplicate_feeds[0], self.pk)
                    )
                    feed = Feed.get_by_id(merge_feeds(duplicate_feeds[0].pk, self.pk))
                    return feed
            else:
                logging.debug(" ---> ~FRFeed is its own dupe? %s == %s" % (self, duplicate_feeds))
        except DatabaseError as e:
            logging.debug(
                " ---> ~FBFeed update failed, no change: %s / %s..." % (kwargs.get("update_fields", None), e)
            )
            pass

        return self

    @classmethod
    def index_all_for_search(cls, offset=0, subscribers=2):
        if not offset:
            SearchFeed.create_elasticsearch_mapping(delete=True)

        last_pk = cls.objects.latest("pk").pk
        for f in range(offset, last_pk, 1000):
            print(
                " ---> {f} / {last_pk} ({pct}%)".format(
                    f=f, last_pk=last_pk, pct=str(float(f) / last_pk * 100)[:2]
                )
            )
            feeds = Feed.objects.filter(
                pk__in=range(f, f + 1000), active=True, active_subscribers__gte=subscribers
            ).values_list("pk")
            for (feed_id,) in feeds:
                Feed.objects.get(pk=feed_id).index_feed_for_search()

    def index_feed_for_search(self):
        min_subscribers = 1
        if settings.DEBUG:
            min_subscribers = 0
        if self.num_subscribers > min_subscribers and not self.branch_from_feed and not self.is_newsletter:
            SearchFeed.index(
                feed_id=self.pk,
                title=self.feed_title,
                address=self.feed_address,
                link=self.feed_link,
                num_subscribers=self.num_subscribers,
            )

    def index_stories_for_search(self):
        if self.search_indexed:
            return

        stories = MStory.objects(story_feed_id=self.pk)
        for story in stories:
            story.index_story_for_search()

        self.search_indexed = True
        self.save()

    def sync_redis(self, allow_skip_resync=False):
        return MStory.sync_feed_redis(self.pk, allow_skip_resync=allow_skip_resync)

    def expire_redis(self, r=None):
        if not r:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)

        r.expire("F:%s" % self.pk, self.days_of_story_hashes * 24 * 60 * 60)
        r.expire("zF:%s" % self.pk, self.days_of_story_hashes * 24 * 60 * 60)

    @classmethod
    def low_volume_feeds(cls, feed_ids, stories_per_month=30):
        try:
            stories_per_month = int(stories_per_month)
        except ValueError:
            stories_per_month = 30
        feeds = Feed.objects.filter(pk__in=feed_ids, average_stories_per_month__lte=stories_per_month).only(
            "pk"
        )

        return [f.pk for f in feeds]

    @classmethod
    def autocomplete(self, prefix, limit=5):
        results = SearchFeed.query(prefix)
        feed_ids = [result["_source"]["feed_id"] for result in results[:5]]

        # results = SearchQuerySet().autocomplete(address=prefix).order_by('-num_subscribers')[:limit]
        #
        # if len(results) < limit:
        #     results += SearchQuerySet().autocomplete(title=prefix).order_by('-num_subscribers')[:limit-len(results)]
        #
        return feed_ids

    @classmethod
    def find_or_create(cls, feed_address, feed_link, defaults=None, **kwargs):
        feeds = cls.objects.filter(feed_address=feed_address, feed_link=feed_link)
        if feeds:
            return feeds[0], False

        if feed_link and feed_link.endswith("/"):
            feeds = cls.objects.filter(feed_address=feed_address, feed_link=feed_link[:-1])
            if feeds:
                return feeds[0], False

        try:
            feed = cls.objects.get(feed_address=feed_address, feed_link=feed_link)
            return feed, False
        except cls.DoesNotExist:
            feed = cls(**defaults)
            feed = feed.save()
            return feed, True

    @classmethod
    def merge_feeds(cls, *args, **kwargs):
        return merge_feeds(*args, **kwargs)

    def fix_google_alerts_urls(self):
        if self.feed_address.startswith("http://user/") and "/state/com.google/alerts/" in self.feed_address:
            match = re.match(r"http://user/(\d+)/state/com.google/alerts/(\d+)", self.feed_address)
            if match:
                user_id, alert_id = match.groups()
                self.feed_address = "http://www.google.com/alerts/feeds/%s/%s" % (user_id, alert_id)

    @classmethod
    def schedule_feed_fetches_immediately(cls, feed_ids, user_id=None):
        if settings.DEBUG:
            logging.info(
                " ---> ~SN~FMSkipping the scheduling immediate fetch of ~SB%s~SN feeds (in DEBUG)..."
                % len(feed_ids)
            )
            return

        if user_id:
            user = User.objects.get(pk=user_id)
            logging.user(user, "~SN~FMScheduling immediate fetch of ~SB%s~SN feeds..." % len(feed_ids))
        else:
            logging.debug(" ---> ~SN~FMScheduling immediate fetch of ~SB%s~SN feeds..." % len(feed_ids))

        if len(feed_ids) > 100:
            logging.debug(" ---> ~SN~FMFeeds scheduled: %s" % feed_ids)
        day_ago = datetime.datetime.now() - datetime.timedelta(days=1)
        feeds = Feed.objects.filter(pk__in=feed_ids)
        for feed in feeds:
            if feed.active_subscribers <= 0:
                feed.count_subscribers()
            if not feed.active or feed.next_scheduled_update < day_ago:
                feed.schedule_feed_fetch_immediately(verbose=False)

    @property
    def favicon_fetching(self):
        return bool(not (self.favicon_not_found or self.favicon_color))

    @classmethod
    def get_feed_by_url(self, *args, **kwargs):
        return self.get_feed_from_url(*args, **kwargs)

    @classmethod
    def get_feed_from_url(
        cls, url, create=True, aggressive=False, fetch=True, offset=0, user=None, interactive=False
    ):
        feed = None
        without_rss = False
        original_url = url

        if url and url.startswith("newsletter:"):
            try:
                return cls.objects.get(feed_address=url)
            except cls.MultipleObjectsReturned:
                return cls.objects.filter(feed_address=url)[0]
        if url and re.match("(https?://)?twitter.com/\w+/?", url):
            without_rss = True
        if url and re.match(r"(https?://)?(www\.)?facebook.com/\w+/?$", url):
            without_rss = True
        # Turn url @username@domain.com into domain.com/users/username.rss
        if url and url.startswith("@") and "@" in url[1:]:
            username, domain = url[1:].split("@")
            url = f"https://{domain}/users/{username}.rss"
        if url and "youtube.com/user/" in url:
            username = re.search("youtube.com/user/(\w+)", url).group(1)
            url = "http://gdata.youtube.com/feeds/base/users/%s/uploads" % username
            without_rss = True
        if url and "youtube.com/@" in url:
            username = url.split("youtube.com/@")[1]
            url = "http://gdata.youtube.com/feeds/base/users/%s/uploads" % username
            without_rss = True
        if url and "youtube.com/channel/" in url:
            channel_id = re.search("youtube.com/channel/([-_\w]+)", url).group(1)
            url = "https://www.youtube.com/feeds/videos.xml?channel_id=%s" % channel_id
            without_rss = True
        if url and "youtube.com/feeds" in url:
            without_rss = True
        if url and "youtube.com/playlist" in url:
            without_rss = True

        def criteria(key, value):
            if aggressive:
                return {"%s__icontains" % key: value}
            else:
                return {"%s" % key: value}

        def by_url(address):
            feed = (
                cls.objects.filter(branch_from_feed=None)
                .filter(**criteria("feed_address", address))
                .order_by("-num_subscribers")
            )
            if not feed:
                duplicate_feed = DuplicateFeed.objects.filter(**criteria("duplicate_address", address))
                if duplicate_feed and len(duplicate_feed) > offset:
                    feed = [duplicate_feed[offset].feed]
            if not feed and aggressive:
                feed = (
                    cls.objects.filter(branch_from_feed=None)
                    .filter(**criteria("feed_link", address))
                    .order_by("-num_subscribers")
                )

            return feed

        @timelimit(10)
        def _feedfinder_forman(url):
            found_feed_urls = feedfinder_forman.find_feeds(url)
            return found_feed_urls

        @timelimit(10)
        def _feedfinder_pilgrim(url):
            found_feed_urls = feedfinder_pilgrim.feeds(url)
            return found_feed_urls

        # Normalize and check for feed_address, dupes, and feed_link
        url = urlnorm.normalize(url)
        if not url:
            logging.debug(" ---> ~FRCouldn't normalize url: ~SB%s" % url)
            return

        feed = by_url(url)
        found_feed_urls = []

        if interactive:
            import pdb

            pdb.set_trace()

        # Create if it looks good
        if feed and len(feed) > offset:
            feed = feed[offset]
        else:
            try:
                found_feed_urls = _feedfinder_forman(url)
            except TimeoutError:
                logging.debug("   ---> Feed finder timed out...")
                found_feed_urls = []
            if not found_feed_urls:
                try:
                    found_feed_urls = _feedfinder_pilgrim(url)
                except TimeoutError:
                    logging.debug("   ---> Feed finder old timed out...")
                    found_feed_urls = []

            if len(found_feed_urls):
                feed_finder_url = found_feed_urls[0]
                logging.debug(" ---> Found feed URLs for %s: %s" % (url, found_feed_urls))
                feed = by_url(feed_finder_url)
                if feed and len(feed) > offset:
                    feed = feed[offset]
                    logging.debug(" ---> Feed exists (%s), updating..." % (feed))
                    feed = feed.update()
                elif create:
                    logging.debug(" ---> Feed doesn't exist, creating: %s" % (feed_finder_url))
                    feed = cls.objects.create(feed_address=feed_finder_url)
                    feed = feed.update()
            elif without_rss:
                logging.debug(" ---> Found without_rss feed: %s / %s" % (url, original_url))
                feed = cls.objects.create(feed_address=url, feed_link=original_url)
                feed = feed.update(requesting_user_id=user.pk if user else None)

        # Check for JSON feed
        if not feed and fetch and create:
            try:
                r = requests.get(url)
            except (requests.ConnectionError, requests.models.InvalidURL):
                r = None
            if r and "application/json" in r.headers.get("Content-Type"):
                feed = cls.objects.create(feed_address=url)
                feed = feed.update()

        # Still nothing? Maybe the URL has some clues.
        if not feed and fetch and len(found_feed_urls):
            feed_finder_url = found_feed_urls[0]
            feed = by_url(feed_finder_url)
            if not feed and create:
                feed = cls.objects.create(feed_address=feed_finder_url)
                feed = feed.update()
            elif feed and len(feed) > offset:
                feed = feed[offset]

        # Not created and not within bounds, so toss results.
        if isinstance(feed, QuerySet):
            logging.debug(" ---> ~FRNot created and not within bounds, tossing: ~SB%s" % feed)
            return

        return feed

    @classmethod
    def task_feeds(cls, feeds, queue_size=12, verbose=True):
        if not feeds:
            return
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)

        if isinstance(feeds, Feed):
            if verbose:
                logging.debug(" ---> ~SN~FBTasking feed: ~SB%s" % feeds)
            feeds = [feeds.pk]
        elif verbose:
            logging.debug(" ---> ~SN~FBTasking ~SB~FC%s~FB~SN feeds..." % len(feeds))

        if isinstance(feeds, QuerySet):
            feeds = [f.pk for f in feeds]

        r.srem("queued_feeds", *feeds)
        now = datetime.datetime.now().strftime("%s")
        p = r.pipeline()
        for feed_id in feeds:
            p.zadd("tasked_feeds", {feed_id: now})
        p.execute()

        # for feed_ids in (feeds[pos:pos + queue_size] for pos in xrange(0, len(feeds), queue_size)):
        for feed_id in feeds:
            UpdateFeeds.apply_async(args=(feed_id,), queue="update_feeds")

    @classmethod
    def drain_task_feeds(cls):
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)

        tasked_feeds = r.zrange("tasked_feeds", 0, -1)
        if tasked_feeds:
            logging.debug(" ---> ~FRDraining %s tasked feeds..." % len(tasked_feeds))
            r.sadd("queued_feeds", *tasked_feeds)
            r.zremrangebyrank("tasked_feeds", 0, -1)
        else:
            logging.debug(" ---> No tasked feeds to drain")

        errored_feeds = r.zrange("error_feeds", 0, -1)
        if errored_feeds:
            logging.debug(" ---> ~FRDraining %s errored feeds..." % len(errored_feeds))
            r.sadd("queued_feeds", *errored_feeds)
            r.zremrangebyrank("error_feeds", 0, -1)
        else:
            logging.debug(" ---> No errored feeds to drain")

    def update_all_statistics(self, has_new_stories=False, force=False):
        recount = not self.counts_converted_to_redis
        count_extra = False
        if random.random() < 0.01 or not self.data.popular_tags or not self.data.popular_authors:
            count_extra = True

        self.count_subscribers(recount=recount)
        self.calculate_last_story_date()

        if force or has_new_stories or count_extra:
            self.save_feed_stories_last_month()

        if not self.fs_size_bytes or not self.archive_count:
            self.count_fs_size_bytes()

        if force or (has_new_stories and count_extra):
            self.save_popular_authors()
            self.save_popular_tags()
            self.save_feed_story_history_statistics()

    def calculate_last_story_date(self):
        last_story_date = None

        try:
            latest_story = (
                MStory.objects(story_feed_id=self.pk)
                .limit(1)
                .order_by("-story_date")
                .only("story_date")
                .first()
            )
            if latest_story:
                last_story_date = latest_story.story_date
        except MStory.DoesNotExist:
            pass

        if not last_story_date or seconds_timesince(last_story_date) < 0:
            last_story_date = datetime.datetime.now()

        if last_story_date != self.last_story_date:
            self.last_story_date = last_story_date
            self.save(update_fields=["last_story_date"])

    @classmethod
    def setup_feeds_for_premium_subscribers(cls, feed_ids):
        logging.info(f" ---> ~SN~FMScheduling immediate premium setup of ~SB{len(feed_ids)}~SN feeds...")

        feeds = Feed.objects.filter(pk__in=feed_ids)
        for feed in feeds:
            feed.setup_feed_for_premium_subscribers()

    def setup_feed_for_premium_subscribers(self, allow_skip_resync=False):
        self.count_subscribers()
        self.set_next_scheduled_update(verbose=settings.DEBUG)
        self.sync_redis(allow_skip_resync=allow_skip_resync)

    def schedule_fetch_archive_feed(self):
        from apps.profile.tasks import FetchArchiveFeedsChunk

        logging.debug(f"~FC~SBScheduling fetch of archive feed ~SB{self.log_title}")
        FetchArchiveFeedsChunk.apply_async(
            kwargs=dict(feed_ids=[self.pk]),
            queue="search_indexer",
            time_limit=settings.MAX_SECONDS_ARCHIVE_FETCH_SINGLE_FEED,
        )

    def check_feed_link_for_feed_address(self):
        @timelimit(10)
        def _1():
            feed_address = None
            feed = self
            found_feed_urls = []
            try:
                logging.debug(" ---> Checking: %s" % self.feed_address)
                found_feed_urls = feedfinder_forman.find_feeds(self.feed_address)
                if found_feed_urls:
                    feed_address = found_feed_urls[0]
            except KeyError:
                pass
            if not len(found_feed_urls) and self.feed_link:
                found_feed_urls = feedfinder_forman.find_feeds(self.feed_link)
                if len(found_feed_urls) and found_feed_urls[0] != self.feed_address:
                    feed_address = found_feed_urls[0]

            if feed_address:
                if any(
                    ignored_domain in feed_address
                    for ignored_domain in [
                        "feedburner.com/atom.xml",
                        "feedburner.com/feed/",
                        "feedsportal.com",
                    ]
                ):
                    logging.debug("  ---> Feed points to 'Wierdo' or 'feedsportal', ignoring.")
                    return False, self
                try:
                    self.feed_address = strip_underscore_from_feed_address(feed_address)
                    feed = self.save()
                    feed.count_subscribers()
                    # feed.schedule_feed_fetch_immediately() # Don't fetch as it can get stuck in a loop
                    feed.has_feed_exception = False
                    feed.active = True
                    feed = feed.save()
                except IntegrityError:
                    original_feed = Feed.objects.get(feed_address=feed_address, feed_link=self.feed_link)
                    original_feed.has_feed_exception = False
                    original_feed.active = True
                    original_feed.save()
                    merge_feeds(original_feed.pk, self.pk)
            return feed_address, feed

        if self.feed_address_locked:
            return False, self

        try:
            feed_address, feed = _1()
        except TimeoutError as e:
            logging.debug("   ---> [%-30s] Feed address check timed out..." % (self.log_title[:30]))
            self.save_feed_history(505, "Timeout", e)
            feed = self
            feed_address = None

        return bool(feed_address), feed

    def save_feed_history(self, status_code, message, exception=None, date=None):
        fetch_history = MFetchHistory.add(
            feed_id=self.pk,
            fetch_type="feed",
            code=int(status_code),
            date=date,
            message=message,
            exception=exception,
        )

        if status_code not in (200, 304):
            self.errors_since_good += 1
            self.count_errors_in_history("feed", status_code, fetch_history=fetch_history)
            self.set_next_scheduled_update(verbose=settings.DEBUG)
        elif self.has_feed_exception or self.errors_since_good:
            self.errors_since_good = 0
            self.has_feed_exception = False
            self.active = True
            self.save()

    def save_page_history(self, status_code, message, exception=None, date=None):
        fetch_history = MFetchHistory.add(
            feed_id=self.pk,
            fetch_type="page",
            code=int(status_code),
            date=date,
            message=message,
            exception=exception,
        )

        if status_code not in (200, 304):
            self.count_errors_in_history("page", status_code, fetch_history=fetch_history)
        elif self.has_page_exception or not self.has_page:
            self.has_page_exception = False
            self.has_page = True
            self.active = True
            self.save()

    def save_raw_feed(self, raw_feed, fetch_date):
        MFetchHistory.add(feed_id=self.pk, fetch_type="raw_feed", code=200, message=raw_feed, date=fetch_date)

    def count_errors_in_history(self, exception_type="feed", status_code=None, fetch_history=None):
        if not fetch_history:
            fetch_history = MFetchHistory.feed(self.pk)
        fh = fetch_history[exception_type + "_fetch_history"]
        non_errors = [h for h in fh if h["status_code"] and int(h["status_code"]) in (200, 304)]
        errors = [h for h in fh if h["status_code"] and int(h["status_code"]) not in (200, 304)]

        if len(non_errors) == 0 and len(errors) > 1:
            self.active = True
            if exception_type == "feed":
                self.has_feed_exception = True
                # self.active = False # No longer, just geometrically fetch
            elif exception_type == "page":
                self.has_page_exception = True
            self.exception_code = status_code or int(errors[0])
            self.save()
        elif self.exception_code > 0:
            self.active = True
            self.exception_code = 0
            if exception_type == "feed":
                self.has_feed_exception = False
            elif exception_type == "page":
                self.has_page_exception = False
            self.save()

        logging.debug(
            "   ---> [%-30s] ~FBCounting any errors in history: %s (%s non errors)"
            % (self.log_title[:30], len(errors), len(non_errors))
        )

        return errors, non_errors

    def count_redirects_in_history(self, fetch_type="feed", fetch_history=None):
        logging.debug("   ---> [%-30s] Counting redirects in history..." % (self.log_title[:30]))
        if not fetch_history:
            fetch_history = MFetchHistory.feed(self.pk)
        fh = fetch_history[fetch_type + "_fetch_history"]
        redirects = [h for h in fh if h["status_code"] and int(h["status_code"]) in (301, 302)]
        non_redirects = [h for h in fh if h["status_code"] and int(h["status_code"]) not in (301, 302)]

        return redirects, non_redirects

    @property
    def original_feed_id(self):
        if self.branch_from_feed:
            return self.branch_from_feed.pk
        else:
            return self.pk

    @property
    def counts_converted_to_redis(self):
        SUBSCRIBER_EXPIRE_DATE = datetime.datetime.now() - datetime.timedelta(days=settings.SUBSCRIBER_EXPIRE)
        subscriber_expire = int(SUBSCRIBER_EXPIRE_DATE.strftime("%s"))
        r = redis.Redis(connection_pool=settings.REDIS_FEED_SUB_POOL)
        total_key = "s:%s" % self.original_feed_id
        premium_key = "sp:%s" % self.original_feed_id
        last_recount = r.zscore(total_key, -1)  # Need to subtract this extra when counting subs

        # Check for expired feeds with no active users who would have triggered a cleanup
        if last_recount and last_recount > subscriber_expire:
            return True
        elif last_recount:
            logging.info(
                "   ---> [%-30s] ~SN~FBFeed has expired redis subscriber counts (%s < %s), clearing..."
                % (self.log_title[:30], last_recount, subscriber_expire)
            )
            r.delete(total_key, -1)
            r.delete(premium_key, -1)

        return False

    def count_subscribers(self, recount=True, verbose=False):
        if recount or not self.counts_converted_to_redis:
            from apps.profile.models import Profile

            Profile.count_feed_subscribers(feed_id=self.pk)
        SUBSCRIBER_EXPIRE_DATE = datetime.datetime.now() - datetime.timedelta(days=settings.SUBSCRIBER_EXPIRE)
        subscriber_expire = int(SUBSCRIBER_EXPIRE_DATE.strftime("%s"))
        now = int(datetime.datetime.now().strftime("%s"))
        r = redis.Redis(connection_pool=settings.REDIS_FEED_SUB_POOL)
        total = 0
        active = 0
        premium = 0
        archive = 0
        pro = 0
        active_premium = 0

        # Include all branched feeds in counts
        feed_ids = [f["id"] for f in Feed.objects.filter(branch_from_feed=self.original_feed_id).values("id")]
        feed_ids.append(self.original_feed_id)
        feed_ids = list(set(feed_ids))

        if self.counts_converted_to_redis:
            # For each branched feed, count different subscribers
            for feed_id in feed_ids:
                pipeline = r.pipeline()

                # now+1 ensures `-1` flag will be corrected for later with - 1
                total_key = "s:%s" % feed_id
                premium_key = "sp:%s" % feed_id
                archive_key = "sarchive:%s" % feed_id
                pro_key = "spro:%s" % feed_id
                pipeline.zcard(total_key)
                pipeline.zcount(total_key, subscriber_expire, now + 1)
                pipeline.zcard(premium_key)
                pipeline.zcount(premium_key, subscriber_expire, now + 1)
                pipeline.zcard(archive_key)
                pipeline.zcard(pro_key)

                results = pipeline.execute()

                # -1 due to counts_converted_to_redis using key=-1 for last_recount date
                total += max(0, results[0] - 1)
                active += max(0, results[1] - 1)
                premium += max(0, results[2] - 1)
                active_premium += max(0, results[3] - 1)
                archive += max(0, results[4] - 1)
                pro += max(0, results[5] - 1)

            original_num_subscribers = self.num_subscribers
            original_active_subs = self.active_subscribers
            original_premium_subscribers = self.premium_subscribers
            original_active_premium_subscribers = self.active_premium_subscribers
            original_archive_subscribers = self.archive_subscribers
            original_pro_subscribers = self.pro_subscribers
            logging.info(
                "   ---> [%-30s] ~SN~FBCounting subscribers from ~FCredis~FB: ~FMt:~SB~FM%s~SN a:~SB%s~SN p:~SB%s~SN ap:~SB%s~SN archive:~SB%s~SN pro:~SB%s ~SN~FC%s"
                % (
                    self.log_title[:30],
                    total,
                    active,
                    premium,
                    active_premium,
                    archive,
                    pro,
                    "(%s branches)" % (len(feed_ids) - 1) if len(feed_ids) > 1 else "",
                )
            )
        else:
            from apps.reader.models import UserSubscription

            subs = UserSubscription.objects.filter(feed__in=feed_ids)
            original_num_subscribers = self.num_subscribers
            total = subs.count()

            active_subs = UserSubscription.objects.filter(
                feed__in=feed_ids, active=True, user__profile__last_seen_on__gte=SUBSCRIBER_EXPIRE_DATE
            )
            original_active_subs = self.active_subscribers
            active = active_subs.count()

            premium_subs = UserSubscription.objects.filter(
                feed__in=feed_ids, active=True, user__profile__is_premium=True
            )
            original_premium_subscribers = self.premium_subscribers
            premium = premium_subs.count()

            archive_subs = UserSubscription.objects.filter(
                feed__in=feed_ids, active=True, user__profile__is_archive=True
            )
            original_archive_subscribers = self.archive_subscribers
            archive = archive_subs.count()

            pro_subs = UserSubscription.objects.filter(
                feed__in=feed_ids, active=True, user__profile__is_pro=True
            )
            original_pro_subscribers = self.pro_subscribers
            pro = pro_subs.count()

            active_premium_subscribers = UserSubscription.objects.filter(
                feed__in=feed_ids,
                active=True,
                user__profile__is_premium=True,
                user__profile__last_seen_on__gte=SUBSCRIBER_EXPIRE_DATE,
            )
            original_active_premium_subscribers = self.active_premium_subscribers
            active_premium = active_premium_subscribers.count()
            logging.debug(
                "   ---> [%-30s] ~SN~FBCounting subscribers from ~FYpostgres~FB: ~FMt:~SB~FM%s~SN a:~SB%s~SN p:~SB%s~SN ap:~SB%s~SN archive:~SB%s~SN pro:~SB%s"
                % (self.log_title[:30], total, active, premium, active_premium, archive, pro)
            )

        if settings.DOCKERBUILD:
            # Local installs enjoy 100% active feeds
            active = total

        # If any counts have changed, save them
        self.num_subscribers = total
        self.active_subscribers = active
        self.premium_subscribers = premium
        self.active_premium_subscribers = active_premium
        self.archive_subscribers = archive
        self.pro_subscribers = pro
        if (
            self.num_subscribers != original_num_subscribers
            or self.active_subscribers != original_active_subs
            or self.premium_subscribers != original_premium_subscribers
            or self.active_premium_subscribers != original_active_premium_subscribers
            or self.archive_subscribers != original_archive_subscribers
            or self.pro_subscribers != original_pro_subscribers
        ):
            if original_premium_subscribers == -1 or original_active_premium_subscribers == -1:
                self.save()
            else:
                self.save(
                    update_fields=[
                        "num_subscribers",
                        "active_subscribers",
                        "premium_subscribers",
                        "active_premium_subscribers",
                        "archive_subscribers",
                        "pro_subscribers",
                    ]
                )

        if verbose:
            if self.num_subscribers <= 1:
                print(".", end=" ")
            else:
                print(
                    "\n %s> %s subscriber%s: %s"
                    % (
                        "-" * min(self.num_subscribers, 20),
                        self.num_subscribers,
                        "" if self.num_subscribers == 1 else "s",
                        self.feed_title,
                    ),
                    end=" ",
                )

    def _split_favicon_color(self, color=None):
        if not color:
            color = self.favicon_color
        if not color:
            return None, None, None
        splitter = lambda s, p: [s[i : i + p] for i in range(0, len(s), p)]
        red, green, blue = splitter(color[:6], 2)
        return red, green, blue

    def favicon_fade(self):
        return self.adjust_color(adjust=30)

    def adjust_color(self, color=None, adjust=0):
        red, green, blue = self._split_favicon_color(color=color)
        if red and green and blue:
            fade_red = hex(min(int(red, 16) + adjust, 255))[2:].zfill(2)
            fade_green = hex(min(int(green, 16) + adjust, 255))[2:].zfill(2)
            fade_blue = hex(min(int(blue, 16) + adjust, 255))[2:].zfill(2)
            return "%s%s%s" % (fade_red, fade_green, fade_blue)

    def favicon_border(self):
        red, green, blue = self._split_favicon_color()
        if red and green and blue:
            fade_red = hex(min(int(int(red, 16) * 0.75), 255))[2:].zfill(2)
            fade_green = hex(min(int(int(green, 16) * 0.75), 255))[2:].zfill(2)
            fade_blue = hex(min(int(int(blue, 16) * 0.75), 255))[2:].zfill(2)
            return "%s%s%s" % (fade_red, fade_green, fade_blue)

    def favicon_text_color(self):
        # Color format: {r: 1, g: .5, b: 0}
        def contrast(color1, color2):
            lum1 = luminosity(color1)
            lum2 = luminosity(color2)
            if lum1 > lum2:
                return (lum1 + 0.05) / (lum2 + 0.05)
            else:
                return (lum2 + 0.05) / (lum1 + 0.05)

        def luminosity(color):
            r = color["red"]
            g = color["green"]
            b = color["blue"]
            val = lambda c: c / 12.92 if c <= 0.02928 else math.pow(((c + 0.055) / 1.055), 2.4)
            red = val(r)
            green = val(g)
            blue = val(b)
            return 0.2126 * red + 0.7152 * green + 0.0722 * blue

        red, green, blue = self._split_favicon_color()
        if red and green and blue:
            color = {
                "red": int(red, 16) / 256.0,
                "green": int(green, 16) / 256.0,
                "blue": int(blue, 16) / 256.0,
            }
            white = {
                "red": 1,
                "green": 1,
                "blue": 1,
            }
            grey = {
                "red": 0.5,
                "green": 0.5,
                "blue": 0.5,
            }

            if contrast(color, white) > contrast(color, grey):
                return "white"
            else:
                return "black"

    def fill_out_archive_stories(self, force=False, starting_page=1):
        """
        Starting from page 1 and iterating through N pages, determine whether
        page(i) matches page(i-1) and if there are any new stories.
        """
        before_story_count = MStory.objects(story_feed_id=self.pk).count()

        if not force and not self.archive_subscribers:
            logging.debug(
                "   ---> [%-30s] ~FBNot filling out archive stories, no archive subscribers"
                % (self.log_title[:30])
            )
            return before_story_count, before_story_count

        self.update(archive_page=starting_page)

        after_story_count = MStory.objects(story_feed_id=self.pk).count()
        logging.debug(
            "   ---> [%-30s] ~FCFilled out archive, ~FM~SB%s~SN new stories~FC, total of ~SB%s~SN stories"
            % (self.log_title[:30], after_story_count - before_story_count, after_story_count)
        )

    def save_feed_stories_last_month(self, verbose=False):
        month_ago = datetime.datetime.utcnow() - datetime.timedelta(days=30)
        stories_last_month = MStory.objects(story_feed_id=self.pk, story_date__gte=month_ago).count()
        if self.stories_last_month != stories_last_month:
            self.stories_last_month = stories_last_month
            self.save(update_fields=["stories_last_month"])

        if verbose:
            print(f"  ---> {self.feed} [{self.pk}]: {self.stories_last_month} stories last month")

    def save_feed_story_history_statistics(self, current_counts=None):
        """
        Fills in missing months between earlier occurances and now.

        Save format: [('YYYY-MM, #), ...]
        Example output: [(2010-12, 123), (2011-01, 146)]
        """
        now = datetime.datetime.utcnow()
        min_year = now.year
        total = 0
        month_count = 0
        if not current_counts:
            current_counts = self.data.story_count_history and json.decode(self.data.story_count_history)

        if isinstance(current_counts, dict):
            current_counts = current_counts["months"]

        if not current_counts:
            current_counts = []

        # Count stories, aggregate by year and month. Map Reduce!
        map_f = """
            function() {
                var date = (this.story_date.getFullYear()) + "-" + (this.story_date.getMonth()+1);
                var hour = this.story_date.getUTCHours();
                var day = this.story_date.getDay();
                emit(this.story_hash, {'month': date, 'hour': hour, 'day': day});
            }
        """
        reduce_f = """
            function(key, values) {
                return values;
            }
        """
        dates = defaultdict(int)
        hours = defaultdict(int)
        days = defaultdict(int)
        results = MStory.objects(story_feed_id=self.pk).map_reduce(map_f, reduce_f, output="inline")
        for result in results:
            dates[result.value["month"]] += 1
            hours[int(result.value["hour"])] += 1
            days[int(result.value["day"])] += 1
            year = int(re.findall(r"(\d{4})-\d{1,2}", result.value["month"])[0])
            if year < min_year and year > 2000:
                min_year = year

        # Add on to existing months, always amending up, never down. (Current month
        # is guaranteed to be accurate, since trim_feeds won't delete it until after
        # a month. Hacker News can have 1,000+ and still be counted.)
        for current_month, current_count in current_counts:
            year = int(re.findall(r"(\d{4})-\d{1,2}", current_month)[0])
            if current_month not in dates or dates[current_month] < current_count:
                dates[current_month] = current_count
            if year < min_year and year > 2000:
                min_year = year

        # Assemble a list with 0's filled in for missing months,
        # trimming left and right 0's.
        months = []
        start = False
        for year in range(min_year, now.year + 1):
            for month in range(1, 12 + 1):
                if datetime.datetime(year, month, 1) < now:
                    key = "%s-%s" % (year, month)
                    if dates.get(key) or start:
                        start = True
                        months.append((key, dates.get(key, 0)))
                        total += dates.get(key, 0)
                        if dates.get(key, 0) > 0:
                            month_count += 1  # Only count months that have stories for the average
        original_story_count_history = self.data.story_count_history
        self.data.story_count_history = json.encode({"months": months, "hours": hours, "days": days})
        if self.data.story_count_history != original_story_count_history:
            self.data.save(update_fields=["story_count_history"])

        original_average_stories_per_month = self.average_stories_per_month
        if not total or not month_count:
            self.average_stories_per_month = 0
        else:
            self.average_stories_per_month = int(round(total / float(month_count)))
        if self.average_stories_per_month != original_average_stories_per_month:
            self.save(update_fields=["average_stories_per_month"])

    def save_classifier_counts(self):
        from apps.analyzer.models import (
            MClassifierAuthor,
            MClassifierFeed,
            MClassifierTag,
            MClassifierTitle,
        )

        def calculate_scores(cls, facet):
            map_f = """
                function() {
                    emit(this["%s"], {
                        pos: this.score>0 ? this.score : 0,
                        neg: this.score<0 ? Math.abs(this.score) : 0
                    });
                }
            """ % (
                facet
            )
            reduce_f = """
                function(key, values) {
                    var result = {pos: 0, neg: 0};
                    values.forEach(function(value) {
                        result.pos += value.pos;
                        result.neg += value.neg;
                    });
                    return result;
                }
            """
            scores = []
            res = cls.objects(feed_id=self.pk).map_reduce(map_f, reduce_f, output="inline")
            for r in res:
                facet_values = dict([(k, int(v)) for k, v in r.value.items()])
                facet_values[facet] = r.key
                if facet_values["pos"] + facet_values["neg"] >= 1:
                    scores.append(facet_values)
            scores = sorted(scores, key=lambda v: v["neg"] - v["pos"])

            return scores

        scores = {}
        for cls, facet in [
            (MClassifierTitle, "title"),
            (MClassifierAuthor, "author"),
            (MClassifierTag, "tag"),
            (MClassifierFeed, "feed_id"),
        ]:
            scores[facet] = calculate_scores(cls, facet)
            if facet == "feed_id" and scores[facet]:
                scores["feed"] = scores[facet]
                del scores["feed_id"]
            elif not scores[facet]:
                del scores[facet]

        if scores:
            self.data.feed_classifier_counts = json.encode(scores)
            self.data.save()

        return scores

    @property
    def user_agent(self):
        feed_parts = urllib.parse.urlparse(self.feed_address)
        if feed_parts.netloc.find(".tumblr.com") != -1:
            # Certain tumblr feeds will redirect to tumblr's login page when fetching.
            # A known workaround is using facebook's user agent.
            return "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)"

        ua = "NewsBlur Feed Fetcher - %s subscriber%s - %s %s" % (
            self.num_subscribers,
            "s" if self.num_subscribers != 1 else "",
            self.permalink,
            self.fake_user_agent,
        )

        return ua

    @property
    def fake_user_agent(self):
        ua = (
            '("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
            "AppleWebKit/605.1.15 (KHTML, like Gecko) "
            'Version/14.0.1 Safari/605.1.15")'
        )

        return ua

    def fetch_headers(self, fake=False):
        headers = {
            "User-Agent": self.user_agent if not fake else self.fake_user_agent,
            "Accept": "application/atom+xml, application/rss+xml, application/xml;q=0.8, text/xml;q=0.6, */*;q=0.2",
            "Accept-Encoding": "gzip, deflate",
        }

        return headers

    def update(self, **kwargs):
        try:
            from utils import feed_fetcher
        except ImportError as e:
            logging.info(" ***> ~BR~FRImportError: %s" % e)
            return
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
        original_feed_id = int(self.pk)

        options = {
            "verbose": kwargs.get("verbose"),
            "timeout": 10,
            "single_threaded": kwargs.get("single_threaded", True),
            "force": kwargs.get("force"),
            "force_fp": kwargs.get("force_fp"),
            "compute_scores": kwargs.get("compute_scores", True),
            "mongodb_replication_lag": kwargs.get("mongodb_replication_lag", None),
            "fake": kwargs.get("fake"),
            "quick": kwargs.get("quick"),
            "updates_off": kwargs.get("updates_off"),
            "debug": kwargs.get("debug"),
            "fpf": kwargs.get("fpf"),
            "feed_xml": kwargs.get("feed_xml"),
            "requesting_user_id": kwargs.get("requesting_user_id", None),
            "archive_page": kwargs.get("archive_page", None),
        }

        if getattr(settings, "TEST_DEBUG", False) and "NEWSBLUR_DIR" in self.feed_address:
            print(" ---> Testing feed fetch: %s" % self.log_title)
            # options['force_fp'] = True # No, why would this be needed?
            original_feed_address = self.feed_address
            original_feed_link = self.feed_link
            self.feed_address = self.feed_address.replace("%(NEWSBLUR_DIR)s", settings.NEWSBLUR_DIR)
            if self.feed_link:
                self.feed_link = self.feed_link.replace("%(NEWSBLUR_DIR)s", settings.NEWSBLUR_DIR)
            if self.feed_address != original_feed_address or self.feed_link != original_feed_link:
                self.save(update_fields=["feed_address", "feed_link"])

        if self.is_newsletter:
            feed = self.update_newsletter_icon()
        else:
            disp = feed_fetcher.Dispatcher(options, 1)
            disp.add_jobs([[self.pk]])
            feed = disp.run_jobs()

        if feed:
            feed = Feed.get_by_id(feed.pk)
        if feed:
            feed.last_update = datetime.datetime.utcnow()
            feed.set_next_scheduled_update(verbose=settings.DEBUG)
            r.zadd("fetched_feeds_last_hour", {feed.pk: int(datetime.datetime.now().strftime("%s"))})

        if not feed or original_feed_id != feed.pk:
            logging.info(
                " ---> ~FRFeed changed id, removing %s from tasked_feeds queue..." % original_feed_id
            )
            r.zrem("tasked_feeds", original_feed_id)
            r.zrem("error_feeds", original_feed_id)
        if feed:
            r.zrem("tasked_feeds", feed.pk)
            r.zrem("error_feeds", feed.pk)

        return feed

    def update_newsletter_icon(self):
        from apps.rss_feeds.icon_importer import IconImporter

        icon_importer = IconImporter(self)
        icon_importer.save()

        return self

    @classmethod
    def get_by_id(cls, feed_id, feed_address=None):
        try:
            feed = Feed.objects.get(pk=feed_id)
            return feed
        except Feed.DoesNotExist:
            # Feed has been merged after updating. Find the right feed.
            duplicate_feeds = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
            if duplicate_feeds:
                return duplicate_feeds[0].feed
            if feed_address:
                duplicate_feeds = DuplicateFeed.objects.filter(duplicate_address=feed_address)
                if duplicate_feeds:
                    return duplicate_feeds[0].feed

    @classmethod
    def get_by_name(cls, query, limit=1):
        results = SearchFeed.query(query)
        feed_ids = [result.feed_id for result in results]

        if limit == 1:
            return Feed.get_by_id(feed_ids[0])
        else:
            return [Feed.get_by_id(f) for f in feed_ids][:limit]

    def add_update_stories(self, stories, existing_stories, verbose=False, updates_off=False):
        ret_values = dict(new=0, updated=0, same=0, error=0)
        error_count = self.error_count
        new_story_hashes = [s.get("story_hash") for s in stories]

        if settings.DEBUG or verbose:
            logging.debug(
                "   ---> [%-30s] ~FBChecking ~SB%s~SN new/updated against ~SB%s~SN stories"
                % (self.log_title[:30], len(stories), len(list(existing_stories.keys())))
            )

        @timelimit(5)
        def _1(story, story_content, existing_stories, new_story_hashes):
            existing_story, story_has_changed = self._exists_story(
                story, story_content, existing_stories, new_story_hashes
            )
            return existing_story, story_has_changed

        for story in stories:
            if verbose:
                logging.debug(
                    "   ---> [%-30s] ~FBChecking ~SB%s~SN / ~SB%s"
                    % (self.log_title[:30], story.get("title"), story.get("guid"))
                )

            story_content = story.get("story_content")
            if error_count:
                story_content = strip_comments__lxml(story_content)
            else:
                story_content = strip_comments(story_content)
            story_tags = self.get_tags(story)
            story_link = self.get_permalink(story)
            replace_story_date = False

            try:
                existing_story, story_has_changed = _1(
                    story, story_content, existing_stories, new_story_hashes
                )
            except TimeoutError:
                logging.debug(
                    "   ---> [%-30s] ~SB~FRExisting story check timed out..." % (self.log_title[:30])
                )
                existing_story = None
                story_has_changed = False

            if existing_story is None:
                if settings.DEBUG and False:
                    logging.debug(
                        "   ---> New story in feed (%s - %s): %s"
                        % (self.feed_title, story.get("title"), len(story_content))
                    )

                s = MStory(
                    story_feed_id=self.pk,
                    story_date=story.get("published"),
                    story_title=story.get("title"),
                    story_content=story_content,
                    story_author_name=story.get("author"),
                    story_permalink=story_link,
                    story_guid=story.get("guid"),
                    story_tags=story_tags,
                )
                try:
                    s.save()
                    ret_values["new"] += 1
                    s.publish_to_subscribers()
                except (IntegrityError, OperationError) as e:
                    ret_values["error"] += 1
                    if settings.DEBUG:
                        logging.info(
                            "   ---> [%-30s] ~SN~FRIntegrityError on new story: %s - %s"
                            % (self.feed_title[:30], story.get("guid"), e)
                        )
                if self.search_indexed:
                    s.index_story_for_search()
            elif existing_story and story_has_changed and not updates_off and ret_values["updated"] < 3:
                # update story
                original_content = None
                try:
                    if existing_story and existing_story.id:
                        try:
                            existing_story = MStory.objects.get(id=existing_story.id)
                        except ValidationError:
                            existing_story, _ = MStory.find_story(
                                existing_story.story_feed_id, existing_story.id, original_only=True
                            )
                    elif existing_story and existing_story.story_hash:
                        existing_story, _ = MStory.find_story(
                            existing_story.story_feed_id, existing_story.story_hash, original_only=True
                        )
                    else:
                        raise MStory.DoesNotExist
                except (MStory.DoesNotExist, OperationError) as e:
                    ret_values["error"] += 1
                    if verbose:
                        logging.info(
                            "   ---> [%-30s] ~SN~FROperation on existing story: %s - %s"
                            % (self.feed_title[:30], story.get("guid"), e)
                        )
                    continue
                if existing_story.story_original_content_z:
                    original_content = zlib.decompress(existing_story.story_original_content_z)
                elif existing_story.story_content_z:
                    original_content = zlib.decompress(existing_story.story_content_z)
                if story_content and len(story_content) > 10:
                    if "<code" in story_content:
                        # Don't mangle stories with code, just use new
                        story_content_diff = story_content
                    else:
                        story_content_diff = htmldiff(smart_str(original_content), smart_str(story_content))
                else:
                    story_content_diff = original_content
                # logging.debug("\t\tDiff: %s %s %s" % diff.getStats())
                # logging.debug("\t\tDiff content: %s" % diff.getDiff())
                # if existing_story.story_title != story.get('title'):
                #    logging.debug('\tExisting title / New: : \n\t\t- %s\n\t\t- %s' % (existing_story.story_title, story.get('title')))
                if existing_story.story_hash != story.get("story_hash"):
                    self.update_story_with_new_guid(existing_story, story.get("guid"))

                if verbose:
                    logging.debug(
                        "- Updated story in feed (%s - %s): %s / %s"
                        % (self.feed_title, story.get("title"), len(story_content_diff), len(story_content))
                    )

                existing_story.story_feed = self.pk
                existing_story.story_title = story.get("title")
                existing_story.story_content = story_content_diff
                existing_story.story_latest_content = story_content
                existing_story.story_original_content = original_content
                existing_story.story_author_name = story.get("author")
                existing_story.story_permalink = story_link
                existing_story.story_guid = story.get("guid")
                existing_story.story_tags = story_tags
                existing_story.original_text_z = None  # Reset Text view cache
                # Do not allow publishers to change the story date once a story is published.
                # Leads to incorrect unread story counts.
                if replace_story_date:
                    existing_story.story_date = story.get("published")  # Really shouldn't do this.
                existing_story.extract_image_urls(force=True)
                try:
                    existing_story.save()
                    ret_values["updated"] += 1
                except (IntegrityError, OperationError):
                    ret_values["error"] += 1
                    if verbose:
                        logging.info(
                            "   ---> [%-30s] ~SN~FRIntegrityError on updated story: %s"
                            % (self.feed_title[:30], story.get("title")[:30])
                        )
                except ValidationError:
                    ret_values["error"] += 1
                    if verbose:
                        logging.info(
                            "   ---> [%-30s] ~SN~FRValidationError on updated story: %s"
                            % (self.feed_title[:30], story.get("title")[:30])
                        )
                if self.search_indexed:
                    existing_story.index_story_for_search()
            else:
                ret_values["same"] += 1
                if verbose:
                    logging.debug(
                        "Unchanged story (%s): %s / %s "
                        % (story.get("story_hash"), story.get("guid"), story.get("title"))
                    )

        return ret_values

    def update_story_with_new_guid(self, existing_story, new_story_guid):
        from apps.reader.models import RUserStory
        from apps.social.models import MSharedStory

        existing_story.remove_from_redis()
        existing_story.remove_from_search_index()

        old_hash = existing_story.story_hash
        new_hash = MStory.ensure_story_hash(new_story_guid, self.pk)
        RUserStory.switch_hash(feed=self, old_hash=old_hash, new_hash=new_hash)

        shared_stories = MSharedStory.objects.filter(story_feed_id=self.pk, story_hash=old_hash)
        for story in shared_stories:
            story.story_guid = new_story_guid
            story.story_hash = new_hash
            try:
                story.save()
            except NotUniqueError:
                # Story is already shared, skip.
                pass

    def save_popular_tags(self, feed_tags=None, verbose=False):
        if not feed_tags:
            all_tags = MStory.objects(story_feed_id=self.pk, story_tags__exists=True).item_frequencies(
                "story_tags"
            )
            feed_tags = sorted(
                [(k, v) for k, v in list(all_tags.items()) if int(v) > 0], key=itemgetter(1), reverse=True
            )[:25]
        popular_tags = json.encode(feed_tags)
        if verbose:
            print("Found %s tags: %s" % (len(feed_tags), popular_tags))

        # TODO: This len() bullshit will be gone when feeds move to mongo
        #       On second thought, it might stay, because we don't want
        #       popular tags the size of a small planet. I'm looking at you
        #       Tumblr writers.
        if len(popular_tags) < 1024:
            if self.data.popular_tags != popular_tags:
                self.data.popular_tags = popular_tags
                self.data.save(update_fields=["popular_tags"])
            return

        tags_list = []
        if feed_tags and isinstance(feed_tags, str):
            tags_list = json.decode(feed_tags)
        if len(tags_list) >= 1:
            self.save_popular_tags(tags_list[:-1])

    def save_popular_authors(self, feed_authors=None):
        if not feed_authors:
            authors = defaultdict(int)
            for story in MStory.objects(story_feed_id=self.pk).only("story_author_name"):
                authors[story.story_author_name] += 1
            feed_authors = sorted(
                [(k, v) for k, v in list(authors.items()) if k], key=itemgetter(1), reverse=True
            )[:20]

        popular_authors = json.encode(feed_authors)
        if len(popular_authors) < 1023:
            if self.data.popular_authors != popular_authors:
                self.data.popular_authors = popular_authors
                self.data.save(update_fields=["popular_authors"])
            return

        if len(feed_authors) > 1:
            self.save_popular_authors(feed_authors=feed_authors[:-1])

    @classmethod
    def trim_old_stories(cls, start=0, verbose=True, dryrun=False, total=0, end=None):
        now = datetime.datetime.now()
        month_ago = now - datetime.timedelta(days=settings.DAYS_OF_STORY_HASHES)
        feed_count = end or Feed.objects.latest("pk").pk

        for feed_id in range(start, feed_count):
            if feed_id % 1000 == 0:
                print(
                    "\n\n -------------------------- %s (%s deleted so far) --------------------------\n\n"
                    % (feed_id, total)
                )
            try:
                feed = Feed.objects.get(pk=feed_id)
            except Feed.DoesNotExist:
                continue
            # Ensure only feeds with no active subscribers are being trimmed
            if (
                feed.active_subscribers <= 0
                and (not feed.archive_subscribers or feed.archive_subscribers <= 0)
                and (not feed.last_story_date or feed.last_story_date < month_ago)
            ):
                # 1 month since last story = keep 5 stories, >6 months since, only keep 1 story
                months_ago = 6
                if feed.last_story_date:
                    months_ago = int((now - feed.last_story_date).days / 30.0)
                cutoff = max(1, 6 - months_ago)
                if dryrun:
                    print(" DRYRUN: %s cutoff - %s" % (cutoff, feed))
                else:
                    total += MStory.trim_feed(feed=feed, cutoff=cutoff, verbose=verbose)
            else:
                if dryrun:
                    print(" DRYRUN: %s/%s cutoff - %s" % (cutoff, feed.story_cutoff, feed))
                else:
                    total += feed.trim_feed(verbose=verbose)

        print(" ---> Deleted %s stories in total." % total)

    @property
    def story_cutoff(self):
        return self.number_of_stories_to_store()

    def number_of_stories_to_store(self, pre_archive=False):
        if self.archive_subscribers and self.archive_subscribers > 0 and not pre_archive:
            return 10000

        cutoff = 500
        if self.active_subscribers <= 0:
            cutoff = 25
        elif self.active_premium_subscribers < 1:
            cutoff = 100
        elif self.active_premium_subscribers <= 2:
            cutoff = 200
        elif self.active_premium_subscribers <= 5:
            cutoff = 300
        elif self.active_premium_subscribers <= 10:
            cutoff = 350
        elif self.active_premium_subscribers <= 15:
            cutoff = 400
        elif self.active_premium_subscribers <= 20:
            cutoff = 450

        if self.active_subscribers and self.average_stories_per_month < 5 and self.stories_last_month < 5:
            cutoff /= 2
        if (
            self.active_premium_subscribers <= 1
            and self.average_stories_per_month <= 1
            and self.stories_last_month <= 1
        ):
            cutoff /= 2

        r = redis.Redis(connection_pool=settings.REDIS_FEED_READ_POOL)
        pipeline = r.pipeline()
        read_stories_per_week = []
        now = datetime.datetime.now()

        # Check to see how many stories have been read each week since the feed's days of story hashes
        for weeks_back in range(2 * int(math.floor(settings.DAYS_OF_STORY_HASHES / 7))):
            weeks_ago = now - datetime.timedelta(days=7 * weeks_back)
            week_of_year = weeks_ago.strftime("%Y-%U")
            feed_read_key = "fR:%s:%s" % (self.pk, week_of_year)
            pipeline.get(feed_read_key)
        read_stories_per_week = pipeline.execute()
        read_stories_last_month = sum([int(rs) for rs in read_stories_per_week if rs])
        if not pre_archive and read_stories_last_month == 0:
            original_cutoff = cutoff
            cutoff = min(cutoff, 10)
            try:
                logging.debug(
                    "   ---> [%-30s] ~FBTrimming down to ~SB%s (instead of %s)~SN stories (~FM%s~FB)"
                    % (
                        self.log_title[:30],
                        cutoff,
                        original_cutoff,
                        (
                            self.last_story_date.strftime("%Y-%m-%d")
                            if self.last_story_date
                            else "No last story date"
                        ),
                    )
                )
            except ValueError as e:
                logging.debug("   ***> [%-30s] Error trimming: %s" % (self.log_title[:30], e))
                pass

        if getattr(settings, "OVERRIDE_STORY_COUNT_MAX", None):
            cutoff = settings.OVERRIDE_STORY_COUNT_MAX

        return int(cutoff)

    def trim_feed(self, verbose=False, cutoff=None):
        if not cutoff:
            cutoff = self.story_cutoff

        stories_removed = MStory.trim_feed(feed=self, cutoff=cutoff, verbose=verbose)

        if not self.fs_size_bytes:
            self.count_fs_size_bytes()

        return stories_removed

    def count_fs_size_bytes(self):
        stories = MStory.objects.filter(story_feed_id=self.pk)
        sum_bytes = 0
        count = 0

        for story in stories:
            count += 1
            story_with_content = story.to_mongo()
            if story_with_content.get("story_content_z", None):
                story_with_content["story_content"] = zlib.decompress(story_with_content["story_content_z"])
                del story_with_content["story_content_z"]
            if story_with_content.get("original_page_z", None):
                story_with_content["original_page"] = zlib.decompress(story_with_content["original_page_z"])
                del story_with_content["original_page_z"]
            if story_with_content.get("original_text_z", None):
                story_with_content["original_text"] = zlib.decompress(story_with_content["original_text_z"])
                del story_with_content["original_text_z"]
            if story_with_content.get("story_latest_content_z", None):
                story_with_content["story_latest_content"] = zlib.decompress(
                    story_with_content["story_latest_content_z"]
                )
                del story_with_content["story_latest_content_z"]
            if story_with_content.get("story_original_content_z", None):
                story_with_content["story_original_content"] = zlib.decompress(
                    story_with_content["story_original_content_z"]
                )
                del story_with_content["story_original_content_z"]
            sum_bytes += len(bson.BSON.encode(story_with_content))

        self.fs_size_bytes = sum_bytes
        self.archive_count = count
        self.save()

        return sum_bytes

    def purge_feed_stories(self, update=True):
        MStory.purge_feed_stories(feed=self, cutoff=self.story_cutoff)
        if update:
            self.update()

    def purge_author(self, author):
        all_stories = MStory.objects.filter(story_feed_id=self.pk)
        author_stories = MStory.objects.filter(story_feed_id=self.pk, story_author_name__iexact=author)
        logging.debug(
            " ---> Deleting %s of %s stories in %s by '%s'."
            % (author_stories.count(), all_stories.count(), self, author)
        )
        author_stories.delete()

    def purge_tag(self, tag):
        all_stories = MStory.objects.filter(story_feed_id=self.pk)
        tagged_stories = MStory.objects.filter(story_feed_id=self.pk, story_tags__icontains=tag)
        logging.debug(
            " ---> Deleting %s of %s stories in %s by '%s'."
            % (tagged_stories.count(), all_stories.count(), self, tag)
        )
        tagged_stories.delete()

    # @staticmethod
    # def clean_invalid_ids():
    #     history = MFeedFetchHistory.objects(status_code=500, exception__contains='InvalidId:')
    #     urls = set()
    #     for h in history:
    #         u = re.split('InvalidId: (.*?) is not a valid ObjectId\\n$', h.exception)[1]
    #         urls.add((h.feed_id, u))
    #
    #     for f, u in urls:
    #         print "db.stories.remove({\"story_feed_id\": %s, \"_id\": \"%s\"})" % (f, u)

    def get_stories(self, offset=0, limit=25, order="neweat", force=False):
        if order == "newest":
            stories_db = MStory.objects(story_feed_id=self.pk)[offset : offset + limit]
        elif order == "oldest":
            stories_db = MStory.objects(story_feed_id=self.pk).order_by("story_date")[offset : offset + limit]
        stories = self.format_stories(stories_db, self.pk)

        return stories

    @classmethod
    def find_feed_stories(cls, feed_ids, query, order="newest", offset=0, limit=25):
        story_ids = SearchStory.query(feed_ids=feed_ids, query=query, order=order, offset=offset, limit=limit)
        stories_db = MStory.objects(story_hash__in=story_ids).order_by(
            "-story_date" if order == "newest" else "story_date"
        )
        stories = cls.format_stories(stories_db)

        return stories

    @classmethod
    def query_popularity(cls, query, limit, order="newest"):
        popularity = {}
        seen_feeds = set()
        feed_title_to_id = dict()

        # Collect stories, sort by feed
        story_ids = SearchStory.global_query(query, order=order, offset=0, limit=limit)
        for story_hash in story_ids:
            feed_id, story_id = MStory.split_story_hash(story_hash)
            feed = Feed.get_by_id(feed_id)
            if not feed:
                continue
            if feed.feed_title in seen_feeds:
                feed_id = feed_title_to_id[feed.feed_title]
            else:
                feed_title_to_id[feed.feed_title] = feed_id
            seen_feeds.add(feed.feed_title)
            if feed_id not in popularity:
                # feed.update_all_statistics()
                # classifiers = feed.save_classifier_counts()
                well_read_score = feed.well_read_score()
                popularity[feed_id] = {
                    "feed_title": feed.feed_title,
                    "feed_url": feed.feed_link,
                    "num_subscribers": feed.num_subscribers,
                    "feed_id": feed.pk,
                    "story_ids": [],
                    "authors": {},
                    "read_pct": well_read_score["read_pct"],
                    "reader_count": well_read_score["reader_count"],
                    "story_count": well_read_score["story_count"],
                    "reach_score": well_read_score["reach_score"],
                    "share_count": well_read_score["share_count"],
                    "ps": 0,
                    "ng": 0,
                    "classifiers": json.decode(feed.data.feed_classifier_counts),
                }
                if popularity[feed_id]["classifiers"]:
                    for classifier in popularity[feed_id]["classifiers"].get("feed", []):
                        if int(classifier["feed_id"]) == int(feed_id):
                            popularity[feed_id]["ps"] = classifier["pos"]
                            popularity[feed_id]["ng"] = -1 * classifier["neg"]
            popularity[feed_id]["story_ids"].append(story_hash)

        sorted_popularity = sorted(list(popularity.values()), key=lambda x: x["reach_score"], reverse=True)

        # Extract story authors from feeds
        for feed in sorted_popularity:
            story_ids = feed["story_ids"]
            stories_db = MStory.objects(story_hash__in=story_ids)
            stories = cls.format_stories(stories_db)
            for story in stories:
                story["story_permalink"] = story["story_permalink"][:250]
                if story["story_authors"] not in feed["authors"]:
                    feed["authors"][story["story_authors"]] = {
                        "name": story["story_authors"],
                        "count": 0,
                        "ps": 0,
                        "ng": 0,
                        "tags": {},
                        "stories": [],
                    }
                author = feed["authors"][story["story_authors"]]
                seen = False
                for seen_story in author["stories"]:
                    if seen_story["url"] == story["story_permalink"]:
                        seen = True
                        break
                else:
                    author["stories"].append(
                        {
                            "title": story["story_title"],
                            "url": story["story_permalink"],
                            "date": story["story_date"],
                        }
                    )
                    author["count"] += 1
                if seen:
                    continue  # Don't recount tags

                if feed["classifiers"]:
                    for classifier in feed["classifiers"].get("author", []):
                        if classifier["author"] == author["name"]:
                            author["ps"] = classifier["pos"]
                            author["ng"] = -1 * classifier["neg"]

                for tag in story["story_tags"]:
                    if tag not in author["tags"]:
                        author["tags"][tag] = {"name": tag, "count": 0, "ps": 0, "ng": 0}
                    author["tags"][tag]["count"] += 1
                    if feed["classifiers"]:
                        for classifier in feed["classifiers"].get("tag", []):
                            if classifier["tag"] == tag:
                                author["tags"][tag]["ps"] = classifier["pos"]
                                author["tags"][tag]["ng"] = -1 * classifier["neg"]

            sorted_authors = sorted(list(feed["authors"].values()), key=lambda x: x["count"])
            feed["authors"] = sorted_authors

        # pprint(sorted_popularity)
        return sorted_popularity

    def well_read_score(self):
        """Average percentage of stories read vs published across recently active subscribers"""
        from apps.reader.models import UserSubscription
        from apps.social.models import MSharedStory

        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        p = r.pipeline()

        shared_stories = MSharedStory.objects(story_feed_id=self.pk).count()

        subscribing_users = UserSubscription.objects.filter(feed_id=self.pk).values("user_id")
        subscribing_user_ids = [sub["user_id"] for sub in subscribing_users]

        for user_id in subscribing_user_ids:
            user_rs = "RS:%s:%s" % (user_id, self.pk)
            p.scard(user_rs)

        counts = p.execute()
        counts = [c for c in counts if c > 0]
        reader_count = len(counts)

        now = datetime.datetime.now().strftime("%s")
        unread_cutoff = self.unread_cutoff.strftime("%s")
        story_count = len(r.zrangebyscore("zF:%s" % self.pk, max=now, min=unread_cutoff))
        if reader_count and story_count:
            average_pct = (sum(counts) / float(reader_count)) / float(story_count)
        else:
            average_pct = 0

        reach_score = average_pct * reader_count * story_count

        return {
            "read_pct": average_pct,
            "reader_count": reader_count,
            "reach_score": reach_score,
            "story_count": story_count,
            "share_count": shared_stories,
        }

    @classmethod
    def xls_query_popularity(cls, queries, limit):
        import xlsxwriter
        from xlsxwriter.utility import xl_rowcol_to_cell

        if isinstance(queries, str):
            queries = [q.strip() for q in queries.split(",")]

        title = "NewsBlur-%s.xlsx" % slugify("-".join(queries))
        workbook = xlsxwriter.Workbook(title)
        bold = workbook.add_format({"bold": 1})
        date_format = workbook.add_format({"num_format": "mmm d yyyy"})
        unread_format = workbook.add_format({"font_color": "#E0E0E0"})

        for query in queries:
            worksheet = workbook.add_worksheet(query)
            row = 1
            col = 0
            worksheet.write(0, col, "Publisher", bold)
            worksheet.set_column(col, col, 15)
            col += 1
            worksheet.write(0, col, "Feed URL", bold)
            worksheet.set_column(col, col, 20)
            col += 1
            worksheet.write(0, col, "Reach score", bold)
            worksheet.write_comment(
                0,
                col,
                "Feeds are sorted based on this score. It's simply the # of readers * # of stories in the past 30 days * the percentage of stories that are actually read.",
            )
            worksheet.set_column(col, col, 9)
            col += 1
            worksheet.write(0, col, "# subs", bold)
            worksheet.write_comment(0, col, "Total number of subscribers on NewsBlur, not necessarily active")
            worksheet.set_column(col, col, 5)
            col += 1
            worksheet.write(0, col, "# readers", bold)
            worksheet.write_comment(
                0,
                col,
                "Total number of active subscribers who have read a story from the feed in the past 30 days.",
            )
            worksheet.set_column(col, col, 8)
            col += 1
            worksheet.write(0, col, "read pct", bold)
            worksheet.write_comment(
                0,
                col,
                "Of the active subscribers reading this feed in the past 30 days, this is the percentage of stories the average subscriber reads. Values over 100 pct signify that the feed has many shared stories, which throws off the number slightly but not significantly.",
            )
            worksheet.set_column(col, col, 8)
            col += 1
            worksheet.write(0, col, "# stories 30d", bold)
            worksheet.write_comment(
                0,
                col,
                "It's important to ignore feeds that haven't published anything in the last 30 days, which is why this is part of the Reach Score.",
            )
            worksheet.set_column(col, col, 10)
            col += 1
            worksheet.write(0, col, "# shared", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of stories from this feed that were shared on NewsBlur. This is a strong signal of interest although it is not included in the Reach Score.",
            )
            worksheet.set_column(col, col, 7)
            col += 1
            worksheet.write(0, col, "# feed pos", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this feed was trained with a thumbs up. Users use training to hide stories they don't want to see while highlighting those that they do.",
            )
            worksheet.set_column(col, col, 8)
            col += 1
            worksheet.write(0, col, "# feed neg", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this feed was trained with a thumbs down. Users use training to hide stories they don't want to see while highlighting those that they do.",
            )
            worksheet.set_column(col, col, 8)
            col += 1
            worksheet.write(0, col, "Author", bold)
            worksheet.set_column(col, col, 15)
            col += 1
            worksheet.write(0, col, "# author pos", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this author was trained with a thumbs up. Users use training to hide stories they don't want to see while highlighting those that they do.",
            )
            worksheet.set_column(col, col, 10)
            col += 1
            worksheet.write(0, col, "# author neg", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this author was trained with a thumbs down. Users use training to hide stories they don't want to see while highlighting those that they do.",
            )
            worksheet.set_column(col, col, 10)
            col += 1
            worksheet.write(0, col, "Story title", bold)
            worksheet.set_column(col, col, 30)
            col += 1
            worksheet.write(0, col, "Story URL", bold)
            worksheet.set_column(col, col, 20)
            col += 1
            worksheet.write(0, col, "Story date", bold)
            worksheet.set_column(col, col, 10)
            col += 1
            worksheet.write(0, col, "Tag", bold)
            worksheet.set_column(col, col, 15)
            col += 1
            worksheet.write(0, col, "Tag count", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this tag is used in other stories that also contain the search query.",
            )
            worksheet.set_column(col, col, 8)
            col += 1
            worksheet.write(0, col, "# tag pos", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this tag was trained with a thumbs up. Users use training to hide stories they don't want to see while highlighting those that they do.",
            )
            worksheet.set_column(col, col, 7)
            col += 1
            worksheet.write(0, col, "# tag neg", bold)
            worksheet.write_comment(
                0,
                col,
                "Number of times this tag was trained with a thumbs down. Users use training to hide stories they don't want to see while highlighting those that they do.",
            )
            worksheet.set_column(col, col, 7)
            col += 1
            popularity = cls.query_popularity(query, limit=limit)

            for feed in popularity:
                col = 0
                worksheet.write(row, col, feed["feed_title"])
                col += 1
                worksheet.write_url(row, col, feed.get("feed_url") or "")
                col += 1
                worksheet.conditional_format(
                    row,
                    col,
                    row,
                    col + 8,
                    {"type": "cell", "criteria": "==", "value": 0, "format": unread_format},
                )
                worksheet.write(
                    row,
                    col,
                    "=%s*%s*%s"
                    % (
                        xl_rowcol_to_cell(row, col + 2),
                        xl_rowcol_to_cell(row, col + 3),
                        xl_rowcol_to_cell(row, col + 4),
                    ),
                )
                col += 1
                worksheet.write(row, col, feed["num_subscribers"])
                col += 1
                worksheet.write(row, col, feed["reader_count"])
                col += 1
                worksheet.write(row, col, feed["read_pct"])
                col += 1
                worksheet.write(row, col, feed["story_count"])
                col += 1
                worksheet.write(row, col, feed["share_count"])
                col += 1
                worksheet.write(row, col, feed["ps"])
                col += 1
                worksheet.write(row, col, feed["ng"])
                col += 1
                for author in feed["authors"]:
                    row += 1
                    worksheet.conditional_format(
                        row,
                        col,
                        row,
                        col + 2,
                        {"type": "cell", "criteria": "==", "value": 0, "format": unread_format},
                    )
                    worksheet.write(row, col, author["name"])
                    worksheet.write(row, col + 1, author["ps"])
                    worksheet.write(row, col + 2, author["ng"])
                    for story in author["stories"]:
                        worksheet.write(row, col + 3, story["title"])
                        worksheet.write_url(row, col + 4, story["url"])
                        worksheet.write_datetime(row, col + 5, story["date"], date_format)
                        row += 1
                    for tag in list(author["tags"].values()):
                        worksheet.conditional_format(
                            row,
                            col + 7,
                            row,
                            col + 9,
                            {"type": "cell", "criteria": "==", "value": 0, "format": unread_format},
                        )
                        worksheet.write(row, col + 6, tag["name"])
                        worksheet.write(row, col + 7, tag["count"])
                        worksheet.write(row, col + 8, tag["ps"])
                        worksheet.write(row, col + 9, tag["ng"])
                        row += 1
        workbook.close()
        return title

    def find_stories(self, query, order="newest", offset=0, limit=25):
        story_ids = SearchStory.query(
            feed_ids=[self.pk], query=query, order=order, offset=offset, limit=limit
        )
        stories_db = MStory.objects(story_hash__in=story_ids).order_by(
            "-story_date" if order == "newest" else "story_date"
        )

        stories = self.format_stories(stories_db, self.pk)

        return stories

    @classmethod
    def format_stories(cls, stories_db, feed_id=None, include_permalinks=False):
        stories = []

        for story_db in stories_db:
            story = cls.format_story(story_db, feed_id, include_permalinks=include_permalinks)
            stories.append(story)

        return stories

    @classmethod
    def format_story(cls, story_db, feed_id=None, text=False, include_permalinks=False, show_changes=False):
        if isinstance(story_db.story_content_z, str):
            story_db.story_content_z = base64.b64decode(story_db.story_content_z)

        story_content = ""
        latest_story_content = None
        has_changes = False
        if (
            not show_changes
            and hasattr(story_db, "story_latest_content_z")
            and story_db.story_latest_content_z
        ):
            try:
                latest_story_content = smart_str(zlib.decompress(story_db.story_latest_content_z))
            except DjangoUnicodeDecodeError:
                latest_story_content = zlib.decompress(story_db.story_latest_content_z)
        if story_db.story_content_z:
            story_content = smart_str(zlib.decompress(story_db.story_content_z))

        if "<ins" in story_content or "<del" in story_content:
            has_changes = True
        if not show_changes and latest_story_content:
            story_content = latest_story_content

        story_title = story_db.story_title
        blank_story_title = False
        if not story_title:
            blank_story_title = True
            if story_content:
                story_title = strip_tags(story_content)
            if not story_title and story_db.story_permalink:
                story_title = story_db.story_permalink
            if story_title and len(story_title) > 80:
                story_title = story_title[:80] + "..."

        story = {}
        story["story_hash"] = getattr(story_db, "story_hash", None)
        story["story_tags"] = story_db.story_tags or []
        story["story_date"] = story_db.story_date.replace(tzinfo=None)
        story["story_timestamp"] = story_db.story_date.strftime("%s")
        story["story_authors"] = story_db.story_author_name or ""
        story["story_title"] = story_title
        if blank_story_title:
            story["story_title_blank"] = True
        story["story_content"] = story_content
        story["story_permalink"] = story_db.story_permalink
        story["image_urls"] = story_db.image_urls
        story["secure_image_urls"] = cls.secure_image_urls(story_db.image_urls)
        story["secure_image_thumbnails"] = cls.secure_image_thumbnails(story_db.image_urls)
        story["story_feed_id"] = feed_id or story_db.story_feed_id
        story["has_modifications"] = has_changes
        story["comment_count"] = story_db.comment_count if hasattr(story_db, "comment_count") else 0
        story["comment_user_ids"] = story_db.comment_user_ids if hasattr(story_db, "comment_user_ids") else []
        story["share_count"] = story_db.share_count if hasattr(story_db, "share_count") else 0
        story["share_user_ids"] = story_db.share_user_ids if hasattr(story_db, "share_user_ids") else []
        story["guid_hash"] = story_db.guid_hash if hasattr(story_db, "guid_hash") else None
        if hasattr(story_db, "source_user_id"):
            story["source_user_id"] = story_db.source_user_id
        story["id"] = story_db.story_guid or story_db.story_date
        if hasattr(story_db, "starred_date"):
            story["starred_date"] = story_db.starred_date
        if hasattr(story_db, "user_tags"):
            story["user_tags"] = story_db.user_tags
        if hasattr(story_db, "user_notes"):
            story["user_notes"] = story_db.user_notes
        if hasattr(story_db, "highlights"):
            story["highlights"] = story_db.highlights
        if hasattr(story_db, "shared_date"):
            story["shared_date"] = story_db.shared_date
        if hasattr(story_db, "comments"):
            story["comments"] = story_db.comments
        if hasattr(story_db, "user_id"):
            story["user_id"] = story_db.user_id
        if include_permalinks and hasattr(story_db, "blurblog_permalink"):
            story["blurblog_permalink"] = story_db.blurblog_permalink()
        if text:
            soup = BeautifulSoup(story["story_content"], features="lxml")
            text = "".join(soup.findAll(text=True))
            text = re.sub(r"\n+", "\n\n", text)
            text = re.sub(r"\t+", "\t", text)
            story["text"] = text

        return story

    @classmethod
    def secure_image_urls(cls, urls):
        signed_urls = [
            create_imageproxy_signed_url(settings.IMAGES_URL, settings.IMAGES_SECRET_KEY, url) for url in urls
        ]
        return dict(zip(urls, signed_urls))

    @classmethod
    def secure_image_thumbnails(cls, urls, size=192):
        signed_urls = [
            create_imageproxy_signed_url(settings.IMAGES_URL, settings.IMAGES_SECRET_KEY, url, size)
            for url in urls
        ]
        return dict(zip(urls, signed_urls))

    def get_tags(self, entry):
        fcat = []
        if "tags" in entry:
            for tcat in entry.tags:
                term = None
                if hasattr(tcat, "label") and tcat.label:
                    term = tcat.label
                elif hasattr(tcat, "term") and tcat.term:
                    term = tcat.term
                if not term or "CDATA" in term:
                    continue
                qcat = term.strip()
                if "," in qcat or "/" in qcat:
                    qcat = qcat.replace(",", "/").split("/")
                else:
                    qcat = [qcat]
                for zcat in qcat:
                    tagname = zcat.lower()
                    while "  " in tagname:
                        tagname = tagname.replace("  ", " ")
                    tagname = tagname.strip()
                    if not tagname or tagname == " ":
                        continue
                    fcat.append(tagname)
        fcat = [strip_tags(t)[:250] for t in fcat[:12]]
        return fcat

    @classmethod
    def get_permalink(cls, entry):
        link = entry.get("link")
        if not link:
            links = entry.get("links")
            if links:
                link = links[0].get("href")
        if not link:
            link = entry.get("id")
        return link

    def _exists_story(self, story, story_content, existing_stories, new_story_hashes, lightweight=False):
        story_in_system = None
        story_has_changed = False
        story_link = self.get_permalink(story)
        existing_stories_hashes = list(existing_stories.keys())
        story_pub_date = story.get("published")
        # story_published_now = story.get('published_now', False)
        # start_date = story_pub_date - datetime.timedelta(hours=8)
        # end_date = story_pub_date + datetime.timedelta(hours=8)

        for existing_story in list(existing_stories.values()):
            content_ratio = 0
            # existing_story_pub_date = existing_story.story_date

            if isinstance(existing_story.id, str):
                # Correcting a MongoDB bug
                existing_story.story_guid = existing_story.id

            if story.get("story_hash") == existing_story.story_hash:
                story_in_system = existing_story
            elif (
                story.get("story_hash") in existing_stories_hashes
                and story.get("story_hash") != existing_story.story_hash
            ):
                # Story already exists but is not this one
                continue
            elif (
                existing_story.story_hash in new_story_hashes
                and story.get("story_hash") != existing_story.story_hash
            ):
                # Story coming up later
                continue

            if "story_latest_content_z" in existing_story:
                existing_story_content = smart_str(zlib.decompress(existing_story.story_latest_content_z))
            elif "story_latest_content" in existing_story:
                existing_story_content = existing_story.story_latest_content
            elif "story_content_z" in existing_story:
                existing_story_content = smart_str(zlib.decompress(existing_story.story_content_z))
            elif "story_content" in existing_story:
                existing_story_content = existing_story.story_content
            else:
                existing_story_content = ""

            # Title distance + content distance, checking if story changed
            story_title_difference = abs(levenshtein_distance(story.get("title"), existing_story.story_title))

            title_ratio = difflib.SequenceMatcher(
                None, story.get("title", ""), existing_story.story_title
            ).ratio()
            if title_ratio < 0.75:
                continue

            story_timedelta = existing_story.story_date - story_pub_date
            # logging.debug('Story pub date: %s %s (%s, %s)' % (existing_story.story_date, story_pub_date, title_ratio, story_timedelta))
            if abs(story_timedelta.days) >= 2:
                continue

            seq = difflib.SequenceMatcher(None, story_content, existing_story_content)

            similiar_length_min = 1000
            if existing_story.story_permalink == story_link and existing_story.story_title == story.get(
                "title"
            ):
                similiar_length_min = 20

            # Skip content check if already failed due to a timeout. This way we catch titles
            if lightweight:
                continue

            if (
                seq
                and story_content
                and len(story_content) > similiar_length_min
                and existing_story_content
                and seq.real_quick_ratio() > 0.9
                and seq.quick_ratio() > 0.95
            ):
                content_ratio = seq.ratio()

            if story_title_difference > 0 and content_ratio > 0.98:
                story_in_system = existing_story
                if story_title_difference > 0 or content_ratio < 1.0:
                    if settings.DEBUG:
                        logging.debug(
                            "   ---> Title difference - %s/%s (%s): %s"
                            % (
                                story.get("title"),
                                existing_story.story_title,
                                story_title_difference,
                                content_ratio,
                            )
                        )
                    story_has_changed = True
                    break

            # More restrictive content distance, still no story match
            if not story_in_system and content_ratio > 0.98:
                if settings.DEBUG:
                    logging.debug(
                        "   ---> Content difference - %s/%s (%s): %s"
                        % (
                            story.get("title"),
                            existing_story.story_title,
                            story_title_difference,
                            content_ratio,
                        )
                    )
                story_in_system = existing_story
                story_has_changed = True
                break

            if story_in_system and not story_has_changed:
                if story_content != existing_story_content:
                    if settings.DEBUG:
                        logging.debug(
                            "   ---> Content difference - %s (%s)/%s (%s)"
                            % (
                                story.get("title"),
                                len(story_content),
                                existing_story.story_title,
                                len(existing_story_content),
                            )
                        )
                    story_has_changed = True
                if story_link != existing_story.story_permalink:
                    if settings.DEBUG:
                        logging.debug(
                            "   ---> Permalink difference - %s/%s"
                            % (story_link, existing_story.story_permalink)
                        )
                    story_has_changed = True
                # if story_pub_date != existing_story.story_date:
                #     story_has_changed = True
                break

        # if story_has_changed or not story_in_system:
        #     print 'New/updated story: %s' % (story),
        return story_in_system, story_has_changed

    def get_next_scheduled_update(self, force=False, verbose=True, premium_speed=False, pro_speed=False):
        if self.min_to_decay and not force and not premium_speed:
            return self.min_to_decay

        from apps.notifications.models import MUserFeedNotification

        if premium_speed:
            self.active_premium_subscribers += 1
        if pro_speed:
            self.pro_subscribers += 1

        spd = self.stories_last_month / 30.0
        subs = self.active_premium_subscribers + (
            (self.active_subscribers - self.active_premium_subscribers) / 10.0
        )
        notification_count = MUserFeedNotification.objects.filter(feed_id=self.pk).count()
        # Calculate sub counts:
        #   SELECT COUNT(*) FROM feeds WHERE active_premium_subscribers > 10 AND stories_last_month >= 30;
        #   SELECT COUNT(*) FROM feeds WHERE active_premium_subscribers > 1 AND active_premium_subscribers < 10 AND stories_last_month >= 30;
        #   SELECT COUNT(*) FROM feeds WHERE active_premium_subscribers = 1 AND stories_last_month >= 30;
        # SpD > 1  Subs > 10: t = 6         # 4267   * 1440/6  =      1024080
        # SpD > 1  Subs > 1:  t = 15        # 18973  * 1440/15 =      1821408
        # SpD > 1  Subs = 1:  t = 60        # 65503  * 1440/60 =      1572072
        #   SELECT COUNT(*) FROM feeds WHERE active_premium_subscribers > 1 AND stories_last_month < 30 AND stories_last_month > 0;
        #   SELECT COUNT(*) FROM feeds WHERE active_premium_subscribers = 1 AND stories_last_month < 30 AND stories_last_month > 0;
        # SpD < 1  Subs > 1:  t = 60        # 77618  * 1440/60 =      1862832
        # SpD < 1  Subs = 1:  t = 60 * 12   # 282186 * 1440/(60*12) = 564372
        #   SELECT COUNT(*) FROM feeds WHERE active_premium_subscribers > 1 AND stories_last_month = 0;
        #   SELECT COUNT(*) FROM feeds WHERE active_subscribers > 0 AND active_premium_subscribers <= 1 AND stories_last_month = 0;
        # SpD = 0  Subs > 1:  t = 60 * 3    # 30158  * 1440/(60*3) =  241264
        # SpD = 0  Subs = 1:  t = 60 * 24   # 514131 * 1440/(60*24) = 514131
        if spd >= 1:
            if subs >= 10:
                total = 6
            elif subs > 1:
                total = 15
            else:
                total = 45
        elif spd > 0:
            if subs > 1:
                total = 60 - (spd * 60)
            else:
                total = 60 * 6 - (spd * 60 * 6)
        elif spd == 0:
            if subs > 1:
                total = 60 * 6
            elif subs == 1:
                total = 60 * 12
            else:
                total = 60 * 24
            months_since_last_story = seconds_timesince(self.last_story_date) / (60 * 60 * 24 * 30)
            total *= max(1, months_since_last_story)
        # updates_per_day_delay = 3 * 60 / max(.25, ((max(0, self.active_subscribers)**.2)
        #                                             * (self.stories_last_month**0.25)))
        # if self.active_premium_subscribers > 0:
        #     updates_per_day_delay /= min(self.active_subscribers+self.active_premium_subscribers, 4)
        # updates_per_day_delay = int(updates_per_day_delay)

        # Lots of subscribers = lots of updates
        # 24 hours for 0 subscribers.
        # 4 hours for 1 subscriber.
        # .5 hours for 2 subscribers.
        # .25 hours for 3 subscribers.
        # 1 min for 10 subscribers.
        # subscriber_bonus = 6 * 60 / max(.167, max(0, self.active_subscribers)**3)
        # if self.premium_subscribers > 0:
        #     subscriber_bonus /= min(self.active_subscribers+self.premium_subscribers, 5)
        # subscriber_bonus = int(subscriber_bonus)

        if self.is_push:
            fetch_history = MFetchHistory.feed(self.pk)
            if len(fetch_history["push_history"]):
                total = total * 12

        # Any notifications means a 30 min minumum
        if notification_count > 0:
            total = min(total, 30)

        # 4 hour max for premiums, 48 hour max for free
        if subs >= 1:
            total = min(total, 60 * 4 * 1)
        else:
            total = min(total, 60 * 24 * 2)

        # Craigslist feeds get 6 hours minimum
        if "craigslist" in self.feed_address:
            total = max(total, 60 * 6)

        # Twitter feeds get 2 hours minimum
        if "twitter" in self.feed_address:
            total = max(total, 60 * 2)

        # Pro subscribers get absolute minimum
        if self.pro_subscribers and self.pro_subscribers >= 1:
            if self.stories_last_month == 0:
                total = min(total, 60)
            else:
                total = min(total, settings.PRO_MINUTES_BETWEEN_FETCHES)

        if verbose:
            logging.debug(
                "   ---> [%-30s] Fetched every %s min - Subs: %s/%s/%s/%s/%s Stories/day: %s"
                % (
                    self.log_title[:30],
                    total,
                    self.num_subscribers,
                    self.active_subscribers,
                    self.active_premium_subscribers,
                    self.archive_subscribers,
                    self.pro_subscribers,
                    spd,
                )
            )
        return total

    def set_next_scheduled_update(self, verbose=False, skip_scheduling=False):
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
        total = self.get_next_scheduled_update(force=True, verbose=verbose)
        error_count = self.error_count

        if error_count:
            total = total * error_count
            total = min(total, 60 * 24 * 7)
            if verbose:
                logging.debug(
                    "   ---> [%-30s] ~FBScheduling feed fetch geometrically: "
                    "~SB%s errors. Time: %s min" % (self.log_title[:30], self.errors_since_good, total)
                )

        random_factor = random.randint(0, int(total)) / 4
        next_scheduled_update = datetime.datetime.utcnow() + datetime.timedelta(minutes=total + random_factor)
        original_min_to_decay = self.min_to_decay
        self.min_to_decay = total

        delta = self.next_scheduled_update - datetime.datetime.now()
        minutes_to_next_fetch = (delta.seconds + (delta.days * 24 * 3600)) / 60
        if minutes_to_next_fetch > self.min_to_decay or not skip_scheduling:
            self.next_scheduled_update = next_scheduled_update
            if self.active_subscribers >= 1:
                r.zadd("scheduled_updates", {self.pk: self.next_scheduled_update.strftime("%s")})
            r.zrem("tasked_feeds", self.pk)
            r.srem("queued_feeds", self.pk)

        updated_fields = ["last_update", "next_scheduled_update"]
        if self.min_to_decay != original_min_to_decay:
            updated_fields.append("min_to_decay")
        self.save(update_fields=updated_fields)

    @property
    def error_count(self):
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
        fetch_errors = int(r.zscore("error_feeds", self.pk) or 0)

        return fetch_errors + self.errors_since_good

    def schedule_feed_fetch_immediately(self, verbose=True):
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
        if not self.num_subscribers:
            logging.debug(
                "   ---> [%-30s] Not scheduling feed fetch immediately, no subs." % (self.log_title[:30])
            )
            return self

        if verbose:
            logging.debug("   ---> [%-30s] Scheduling feed fetch immediately..." % (self.log_title[:30]))

        self.next_scheduled_update = datetime.datetime.utcnow()
        r.zadd("scheduled_updates", {self.pk: self.next_scheduled_update.strftime("%s")})

        return self.save()

    def setup_push(self):
        from apps.push.models import PushSubscription

        try:
            push = self.push
        except PushSubscription.DoesNotExist:
            self.is_push = False
        else:
            self.is_push = push.verified
        self.save()

    def queue_pushed_feed_xml(self, xml, latest_push_date_delta=None):
        r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
        queue_size = r.llen("push_feeds")

        if latest_push_date_delta:
            latest_push_date_delta = "%s" % str(latest_push_date_delta).split(".", 2)[0]

        if queue_size > 1000:
            self.schedule_feed_fetch_immediately()
        else:
            logging.debug(
                "   ---> [%-30s] [%s] ~FB~SBQueuing pushed stories, last pushed %s..."
                % (self.log_title[:30], self.pk, latest_push_date_delta)
            )
            self.set_next_scheduled_update()
            PushFeeds.apply_async(args=(self.pk, xml), queue="push_feeds")

    # def calculate_collocations_story_content(self,
    #                                          collocation_measures=TrigramAssocMeasures,
    #                                          collocation_finder=TrigramCollocationFinder):
    #     stories = MStory.objects.filter(story_feed_id=self.pk)
    #     story_content = ' '.join([s.story_content for s in stories if s.story_content])
    #     return self.calculate_collocations(story_content, collocation_measures, collocation_finder)
    #
    # def calculate_collocations_story_title(self,
    #                                        collocation_measures=BigramAssocMeasures,
    #                                        collocation_finder=BigramCollocationFinder):
    #     stories = MStory.objects.filter(story_feed_id=self.pk)
    #     story_titles = ' '.join([s.story_title for s in stories if s.story_title])
    #     return self.calculate_collocations(story_titles, collocation_measures, collocation_finder)
    #
    # def calculate_collocations(self, content,
    #                            collocation_measures=TrigramAssocMeasures,
    #                            collocation_finder=TrigramCollocationFinder):
    #     content = re.sub(r'&#8217;', '\'', content)
    #     content = re.sub(r'&amp;', '&', content)
    #     try:
    #         content = unicode(BeautifulStoneSoup(content,
    #                           convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
    #     except ValueError, e:
    #         print "ValueError, ignoring: %s" % e
    #     content = re.sub(r'</?\w+\s+[^>]*>', '', content)
    #     content = re.split(r"[^A-Za-z-'&]+", content)
    #
    #     finder = collocation_finder.from_words(content)
    #     finder.apply_freq_filter(3)
    #     best = finder.nbest(collocation_measures.pmi, 10)
    #     phrases = [' '.join(phrase) for phrase in best]
    #
    #     return phrases


# class FeedCollocations(models.Model):
#     feed = models.ForeignKey(Feed)
#     phrase = models.CharField(max_length=500)


class FeedData(models.Model):
    feed = AutoOneToOneField(Feed, related_name="data", on_delete=models.CASCADE)
    feed_tagline = models.CharField(max_length=1024, blank=True, null=True)
    story_count_history = models.TextField(blank=True, null=True)
    feed_classifier_counts = models.TextField(blank=True, null=True)
    popular_tags = models.CharField(max_length=1024, blank=True, null=True)
    popular_authors = models.CharField(max_length=2048, blank=True, null=True)

    def save(self, *args, **kwargs):
        if self.feed_tagline and len(self.feed_tagline) >= 1000:
            self.feed_tagline = self.feed_tagline[:1000]

        try:
            super(FeedData, self).save(*args, **kwargs)
        except (IntegrityError, OperationError):
            if hasattr(self, "id") and self.id:
                self.delete()
        except DatabaseError as e:
            # Nothing updated
            logging.debug(" ---> ~FRNothing updated in FeedData (%s): %s" % (self.feed, e))
            pass


class MFeedIcon(mongo.Document):
    feed_id = mongo.IntField(primary_key=True)
    color = mongo.StringField(max_length=6)
    data = mongo.StringField()
    icon_url = mongo.StringField()
    not_found = mongo.BooleanField(default=False)

    meta = {
        "collection": "feed_icons",
        "allow_inheritance": False,
    }

    @classmethod
    def get_feed(cls, feed_id, create=True):
        try:
            feed_icon = cls.objects.read_preference(pymongo.ReadPreference.PRIMARY).get(feed_id=feed_id)
        except cls.DoesNotExist:
            if create:
                feed_icon = cls.objects.create(feed_id=feed_id)
            else:
                feed_icon = None

        return feed_icon

    def save(self, *args, **kwargs):
        if self.icon_url:
            self.icon_url = str(self.icon_url)
        try:
            return super(MFeedIcon, self).save(*args, **kwargs)
        except (IntegrityError, OperationError):
            # print "Error on Icon: %s" % e
            if hasattr(self, "_id"):
                self.delete()


class MFeedPage(mongo.Document):
    feed_id = mongo.IntField(primary_key=True)
    page_data = mongo.BinaryField()

    meta = {
        "collection": "feed_pages",
        "allow_inheritance": False,
    }

    def page(self):
        try:
            return zlib.decompress(self.page_data)
        except zlib.error as e:
            logging.debug(" ***> Zlib decompress error: %s" % e)
            self.page_data = None
            self.save()
            return

    @classmethod
    def get_data(cls, feed_id):
        data = None
        feed_page = cls.objects(feed_id=feed_id)
        if feed_page:
            page_data_z = feed_page[0].page_data
            if page_data_z:
                try:
                    data = zlib.decompress(page_data_z)
                except zlib.error as e:
                    logging.debug(" ***> Zlib decompress error: %s" % e)
                    feed_page.page_data = None
                    feed_page.save()
                    return

        if not data:
            dupe_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
            if dupe_feed:
                feed = dupe_feed[0].feed
                feed_page = MFeedPage.objects.filter(feed_id=feed.pk)
                if feed_page:
                    page_data_z = feed_page[0].page_data
                    if page_data_z:
                        data = zlib.decompress(feed_page[0].page_data)

        return data


class MStory(mongo.Document):
    """A feed item"""

    story_feed_id = mongo.IntField()
    story_date = mongo.DateTimeField()
    story_title = mongo.StringField(max_length=1024)
    story_content = mongo.StringField()
    story_content_z = mongo.BinaryField()
    story_original_content = mongo.StringField()
    story_original_content_z = mongo.BinaryField()
    story_latest_content = mongo.StringField()
    story_latest_content_z = mongo.BinaryField()
    original_text_z = mongo.BinaryField()
    original_page_z = mongo.BinaryField()
    story_content_type = mongo.StringField(max_length=255)
    story_author_name = mongo.StringField()
    story_permalink = mongo.StringField()
    story_guid = mongo.StringField()
    story_hash = mongo.StringField()
    image_urls = mongo.ListField(mongo.StringField(max_length=1024))
    story_tags = mongo.ListField(mongo.StringField(max_length=250))
    comment_count = mongo.IntField()
    comment_user_ids = mongo.ListField(mongo.IntField())
    share_count = mongo.IntField()
    share_user_ids = mongo.ListField(mongo.IntField())

    meta = {
        "collection": "stories",
        "indexes": [
            ("story_feed_id", "-story_date"),
            {
                "fields": ["story_hash"],
                "unique": True,
            },
        ],
        "ordering": ["-story_date"],
        "allow_inheritance": False,
        "cascade": False,
        "strict": False,
    }

    RE_STORY_HASH = re.compile(r"^(\d{1,10}):(\w{6})$")
    RE_RS_KEY = re.compile(r"^RS:(\d+):(\d+)$")

    def __str__(self):
        content = self.story_content_z if self.story_content_z else ""
        return f"{self.story_hash}: {self.story_title[:20]} ({len(self.story_content_z) if self.story_content_z else 0} bytes)"

    @property
    def guid_hash(self):
        return hashlib.sha1((self.story_guid).encode(encoding="utf-8")).hexdigest()[:6]

    @classmethod
    def guid_hash_unsaved(self, guid):
        return hashlib.sha1(guid.encode(encoding="utf-8")).hexdigest()[:6]

    @property
    def feed_guid_hash(self):
        return "%s:%s" % (self.story_feed_id, self.guid_hash)

    @classmethod
    def feed_guid_hash_unsaved(cls, feed_id, guid):
        return "%s:%s" % (feed_id, cls.guid_hash_unsaved(guid))

    @property
    def decoded_story_title(self):
        return html.unescape(self.story_title)

    @property
    def story_content_str(self):
        story_content = self.story_content
        if not story_content and self.story_content_z:
            story_content = smart_str(zlib.decompress(self.story_content_z))
        else:
            story_content = smart_str(story_content)

        return story_content

    def save(self, *args, **kwargs):
        story_title_max = MStory._fields["story_title"].max_length
        story_content_type_max = MStory._fields["story_content_type"].max_length
        self.story_hash = self.feed_guid_hash

        self.extract_image_urls()

        if self.story_content:
            self.story_content_z = zlib.compress(smart_bytes(self.story_content))
            self.story_content = None
        if self.story_original_content:
            self.story_original_content_z = zlib.compress(smart_bytes(self.story_original_content))
            self.story_original_content = None
        if self.story_latest_content:
            self.story_latest_content_z = zlib.compress(smart_bytes(self.story_latest_content))
            self.story_latest_content = None
        if self.story_title and len(self.story_title) > story_title_max:
            self.story_title = self.story_title[:story_title_max]
        if self.story_content_type and len(self.story_content_type) > story_content_type_max:
            self.story_content_type = self.story_content_type[:story_content_type_max]

        super(MStory, self).save(*args, **kwargs)

        self.sync_redis()

        return self

    def delete(self, *args, **kwargs):
        self.remove_from_redis()
        self.remove_from_search_index()

        super(MStory, self).delete(*args, **kwargs)

    def publish_to_subscribers(self):
        try:
            r = redis.Redis(connection_pool=settings.REDIS_PUBSUB_POOL)
            r.publish(
                "%s:story" % (self.story_feed_id), "%s,%s" % (self.story_hash, self.story_date.strftime("%s"))
            )
        except redis.ConnectionError:
            logging.debug(
                "   ***> [%-30s] ~BMRedis is unavailable for real-time."
                % (Feed.get_by_id(self.story_feed_id).title[:30],)
            )

    @classmethod
    def purge_feed_stories(cls, feed, cutoff, verbose=True):
        stories = cls.objects(story_feed_id=feed.pk)
        logging.debug(" ---> Deleting %s stories from %s" % (stories.count(), feed))
        if stories.count() > cutoff * 1.25:
            logging.debug(" ***> ~FRToo many stories in %s, not purging..." % (feed))
            return
        stories.delete()

    @classmethod
    def index_all_for_search(cls, offset=0):
        if not offset:
            SearchStory.create_elasticsearch_mapping(delete=True)

        last_pk = Feed.objects.latest("pk").pk
        for f in range(offset, last_pk, 1000):
            print(" ---> %s / %s (%.2s%%)" % (f, last_pk, float(f) / last_pk * 100))
            feeds = Feed.objects.filter(
                pk__in=list(range(f, f + 1000)), active=True, active_subscribers__gte=1
            ).values_list("pk")
            for (f,) in feeds:
                stories = cls.objects.filter(story_feed_id=f)
                if not len(stories):
                    continue
                print(f"Indexing {len(stories)} stories in feed {f}")
                for story in stories:
                    story.index_story_for_search()

    def index_story_for_search(self):
        story_content = self.story_content or ""
        if self.story_content_z:
            story_content = zlib.decompress(self.story_content_z)
        SearchStory.index(
            story_hash=self.story_hash,
            story_title=self.story_title,
            story_content=prep_for_search(story_content),
            story_tags=self.story_tags,
            story_author=self.story_author_name,
            story_feed_id=self.story_feed_id,
            story_date=self.story_date,
        )

    def remove_from_search_index(self):
        try:
            SearchStory.remove(self.story_hash)
        except Exception:
            pass

    @classmethod
    def trim_feed(cls, cutoff, feed_id=None, feed=None, verbose=True):
        extra_stories_count = 0
        cutoff = int(cutoff)
        if not feed_id and not feed:
            return extra_stories_count

        if not feed_id:
            feed_id = feed.pk
        if not feed:
            feed = feed_id

        stories = cls.objects(story_feed_id=feed_id).only("story_date").order_by("-story_date")

        if stories.count() > cutoff:
            logging.debug(
                "   ---> [%-30s] ~FMFound %s stories. Trimming to ~SB%s~SN..."
                % (str(feed)[:30], stories.count(), cutoff)
            )
            try:
                story_trim_date = stories[cutoff].story_date
                if story_trim_date == stories[0].story_date:
                    # Handle case where every story is the same time
                    story_trim_date = story_trim_date - datetime.timedelta(seconds=1)
            except IndexError as e:
                logging.debug(" ***> [%-30s] ~BRError trimming feed: %s" % (str(feed)[:30], e))
                return extra_stories_count

            extra_stories = cls.objects(story_feed_id=feed_id, story_date__lte=story_trim_date)
            extra_stories_count = extra_stories.count()
            shared_story_count = 0
            for story in extra_stories:
                if story.share_count:
                    shared_story_count += 1
                    extra_stories_count -= 1
                    continue
                story.delete()
            if verbose:
                existing_story_count = cls.objects(story_feed_id=feed_id).count()
                logging.debug(
                    "   ---> Deleted %s stories, %s (%s shared) left."
                    % (extra_stories_count, existing_story_count, shared_story_count)
                )

        return extra_stories_count

    @classmethod
    def find_story(cls, story_feed_id=None, story_id=None, story_hash=None, original_only=False):
        from apps.social.models import MSharedStory

        original_found = False
        if story_hash:
            story_id = story_hash
        story_hash = cls.ensure_story_hash(story_id, story_feed_id)
        if not story_feed_id:
            story_feed_id, _ = cls.split_story_hash(story_hash)
        if isinstance(story_id, ObjectId):
            story = cls.objects(id=story_id).limit(1).first()
        else:
            story = cls.objects(story_hash=story_hash).limit(1).first()

        if story:
            original_found = True
        if not story and not original_only:
            story = (
                MSharedStory.objects.filter(story_feed_id=story_feed_id, story_hash=story_hash)
                .limit(1)
                .first()
            )
        if not story and not original_only:
            story = (
                MStarredStory.objects.filter(story_feed_id=story_feed_id, story_hash=story_hash)
                .limit(1)
                .first()
            )

        return story, original_found

    @classmethod
    def find_by_id(cls, story_ids):
        from apps.social.models import MSharedStory

        count = len(story_ids)
        multiple = isinstance(story_ids, list) or isinstance(story_ids, tuple)

        stories = list(cls.objects(id__in=story_ids))
        if len(stories) < count:
            shared_stories = list(MSharedStory.objects(id__in=story_ids))
            stories.extend(shared_stories)

        if not multiple:
            stories = stories[0]

        return stories

    @classmethod
    def find_by_story_hashes(cls, story_hashes):
        from apps.social.models import MSharedStory

        count = len(story_hashes)
        multiple = isinstance(story_hashes, list) or isinstance(story_hashes, tuple)

        stories = list(cls.objects(story_hash__in=story_hashes))
        if len(stories) < count:
            hashes_found = [s.story_hash for s in stories]
            remaining_hashes = list(set(story_hashes) - set(hashes_found))
            story_feed_ids = [h.split(":")[0] for h in remaining_hashes]
            shared_stories = list(
                MSharedStory.objects(story_feed_id__in=story_feed_ids, story_hash__in=remaining_hashes)
            )
            stories.extend(shared_stories)

        if not multiple:
            stories = stories[0]

        return stories

    @classmethod
    def ensure_story_hash(cls, story_id, story_feed_id):
        if not cls.RE_STORY_HASH.match(story_id):
            story_id = "%s:%s" % (
                story_feed_id,
                hashlib.sha1(story_id.encode(encoding="utf-8")).hexdigest()[:6],
            )

        return story_id

    @classmethod
    def split_story_hash(cls, story_hash):
        matches = cls.RE_STORY_HASH.match(story_hash)
        if matches:
            groups = matches.groups()
            return groups[0], groups[1]
        return None, None

    @classmethod
    def split_rs_key(cls, rs_key):
        matches = cls.RE_RS_KEY.match(rs_key)
        if matches:
            groups = matches.groups()
            return groups[0], groups[1]
        return None, None

    @classmethod
    def story_hashes(cls, story_ids):
        story_hashes = []
        for story_id in story_ids:
            story_hash = cls.ensure_story_hash(story_id)
            if not story_hash:
                continue
            story_hashes.append(story_hash)

        return story_hashes

    def sync_redis(self, r=None):
        if not r:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        feed = Feed.get_by_id(self.story_feed_id)

        if self.id and self.story_date > feed.unread_cutoff:
            feed_key = "F:%s" % self.story_feed_id
            r.sadd(feed_key, self.story_hash)
            r.expire(feed_key, feed.days_of_story_hashes * 24 * 60 * 60)

            r.zadd("z" + feed_key, {self.story_hash: time.mktime(self.story_date.timetuple())})
            r.expire("z" + feed_key, feed.days_of_story_hashes * 24 * 60 * 60)

    def remove_from_redis(self, r=None):
        if not r:
            r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        if self.id:
            r.srem("F:%s" % self.story_feed_id, self.story_hash)
            r.zrem("zF:%s" % self.story_feed_id, self.story_hash)

    @classmethod
    def sync_feed_redis(cls, story_feed_id, allow_skip_resync=False):
        r = redis.Redis(connection_pool=settings.REDIS_STORY_HASH_POOL)
        feed = Feed.get_by_id(story_feed_id)
        stories = cls.objects.filter(story_feed_id=story_feed_id, story_date__gte=feed.unread_cutoff)

        if allow_skip_resync and stories.count() > 1000:
            logging.debug(
                f" ---> [{feed.log_title[:30]}] ~FYSkipping resync of ~SB{stories.count()}~SN stories because it already had archive subscribers"
            )
            return

        # Don't delete redis keys because they take time to rebuild and subs can
        # be counted incorrectly during that time.
        # r.delete('F:%s' % story_feed_id)
        # r.delete('zF:%s' % story_feed_id)

        logging.info(
            "   ---> [%-30s] ~FMSyncing ~SB%s~SN stories to redis"
            % (feed and feed.log_title[:30] or story_feed_id, stories.count())
        )
        p = r.pipeline()
        for story in stories:
            story.sync_redis(r=p)
        p.execute()

    def count_comments(self):
        from apps.social.models import MSharedStory

        params = {
            "story_guid": self.story_guid,
            "story_feed_id": self.story_feed_id,
        }
        comments = MSharedStory.objects.filter(has_comments=True, **params).only("user_id")
        shares = MSharedStory.objects.filter(**params).only("user_id")
        self.comment_count = comments.count()
        self.comment_user_ids = [c["user_id"] for c in comments]
        self.share_count = shares.count()
        self.share_user_ids = [s["user_id"] for s in shares]
        self.save()

    def extract_image_urls(self, force=False, text=False):
        if self.image_urls and not force and not text:
            return self.image_urls

        story_content = None
        if not text:
            story_content = self.story_content_str
        elif text:
            if self.original_text_z:
                story_content = smart_str(zlib.decompress(self.original_text_z))
        if not story_content:
            return

        try:
            soup = BeautifulSoup(story_content, features="lxml")
        except UserWarning as e:
            logging.debug(" ---> ~FBWarning on BS4: ~SB%s" % str(e)[:100])
            return
        except ValueError:
            if not text:
                return self.extract_image_urls(force=force, text=True)
            else:
                return

        images = soup.findAll("img")

        # Add youtube thumbnail and insert appropriately before/after images.
        # Give the Youtube a bit of an edge.
        video_thumbnails = soup.findAll(
            "iframe", src=lambda x: x and any(y in x for y in ["youtube.com", "ytimg.com"])
        )
        for video_thumbnail in video_thumbnails:
            video_src = video_thumbnail.get("src")
            video_id = re.search(".*?youtube.com/embed/([A-Za-z0-9\-_]+)", video_src)
            if not video_id:
                video_id = re.search(".*?youtube.com/v/([A-Za-z0-9\-_]+)", video_src)
            if not video_id:
                video_id = re.search(".*?ytimg.com/vi/([A-Za-z0-9\-_]+)", video_src)
            if not video_id:
                video_id = re.search(".*?youtube.com/watch\?v=([A-Za-z0-9\-_]+)", video_src)
            if not video_id:
                logging.debug(f" ***> Couldn't find youtube url in {video_thumbnail}: {video_src}")
                continue
            video_img_url = f"https://img.youtube.com/vi/{video_id.groups()[0]}/0.jpg"
            iframe_index = story_content.index("<iframe")
            try:
                img_index = story_content.index("<img") * 3
            except ValueError:
                img_index = None
            if not img_index or iframe_index < img_index:
                images.insert(0, video_img_url)
            else:
                images.append(video_img_url)

        if not images:
            if not text:
                return self.extract_image_urls(force=force, text=True)
            else:
                return

        image_urls = self.image_urls
        if not image_urls:
            image_urls = []

        for image in images:
            if isinstance(image, str):
                image_url = image
            else:
                image_url = image.get("src")
            if not image_url:
                continue
            if image_url and len(image_url) >= 1024:
                continue
            if "feedburner.com" in image_url:
                continue
            try:
                image_url = urllib.parse.urljoin(self.story_permalink, image_url)
            except ValueError:
                continue
            image_urls.append(image_url)

        if not image_urls:
            if not text:
                return self.extract_image_urls(force=force, text=True)
            else:
                return

        if text:
            urls = []
            for url in image_urls:
                if "http://" in url[1:] or "https://" in url[1:]:
                    continue
                urls.append(url)
            image_urls = urls

        ordered_image_urls = []
        for image_url in list(set(image_urls)):
            if "feedburner" in image_url:
                ordered_image_urls.append(image_url)
            else:
                ordered_image_urls.insert(0, image_url)
        image_urls = ordered_image_urls

        if len(image_urls):
            self.image_urls = [u for u in image_urls if u]
        else:
            return

        max_length = MStory.image_urls.field.max_length
        while len("".join(self.image_urls)) > max_length:
            if len(self.image_urls) <= 1:
                self.image_urls[0] = self.image_urls[0][: max_length - 1]
                break
            else:
                self.image_urls.pop()

        return self.image_urls

    def fetch_original_text(self, force=False, request=None, debug=False):
        original_text_z = self.original_text_z

        if not original_text_z or force:
            feed = Feed.get_by_id(self.story_feed_id)
            self.extract_image_urls(force=force, text=False)
            ti = TextImporter(self, feed=feed, request=request, debug=debug)
            original_doc = ti.fetch(return_document=True)
            original_text = original_doc.get("content") if original_doc else None
            self.extract_image_urls(force=force, text=True)
            self.save()
        else:
            logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.")
            original_text = zlib.decompress(original_text_z)

        return original_text

    def fetch_original_page(self, force=False, request=None, debug=False):
        from apps.rss_feeds.page_importer import PageImporter

        if not self.original_page_z or force:
            feed = Feed.get_by_id(self.story_feed_id)
            importer = PageImporter(request=request, feed=feed, story=self)
            original_page = importer.fetch_story()
        else:
            logging.user(request, "~FYFetching ~FGoriginal~FY story page, ~SBfound.")
            original_page = zlib.decompress(self.original_page_z)

        return original_page


class MStarredStory(mongo.DynamicDocument):
    """Like MStory, but not inherited due to large overhead of _cls and _type in
    mongoengine's inheritance model on every single row."""

    user_id = mongo.IntField(unique_with=("story_guid",))
    starred_date = mongo.DateTimeField()
    starred_updated = mongo.DateTimeField()
    story_feed_id = mongo.IntField()
    story_date = mongo.DateTimeField()
    story_title = mongo.StringField(max_length=1024)
    story_content = mongo.StringField()
    story_content_z = mongo.BinaryField()
    story_original_content = mongo.StringField()
    story_original_content_z = mongo.BinaryField()
    original_text_z = mongo.BinaryField()
    story_content_type = mongo.StringField(max_length=255)
    story_author_name = mongo.StringField()
    story_permalink = mongo.StringField()
    story_guid = mongo.StringField()
    story_hash = mongo.StringField()
    story_tags = mongo.ListField(mongo.StringField(max_length=250))
    user_notes = mongo.StringField()
    user_tags = mongo.ListField(mongo.StringField(max_length=128))
    highlights = mongo.ListField(mongo.StringField(max_length=16384))
    image_urls = mongo.ListField(mongo.StringField(max_length=1024))

    meta = {
        "collection": "starred_stories",
        "indexes": [
            ("user_id", "-starred_date"),
            ("user_id", "story_feed_id"),
            ("user_id", "story_hash"),
            "story_feed_id",
        ],
        "ordering": ["-starred_date"],
        "allow_inheritance": False,
        "strict": False,
    }

    def __unicode__(self):
        try:
            user = User.objects.get(pk=self.user_id)
            username = user.username
        except User.DoesNotExist:
            username = "[deleted]"
        return "%s: %s (%s)" % (username, self.story_title[:20], self.story_feed_id)

    def save(self, *args, **kwargs):
        if self.story_content:
            self.story_content_z = zlib.compress(smart_bytes(self.story_content))
            self.story_content = None
        if self.story_original_content:
            self.story_original_content_z = zlib.compress(smart_bytes(self.story_original_content))
            self.story_original_content = None
        self.story_hash = self.feed_guid_hash
        self.starred_updated = datetime.datetime.now()

        return super(MStarredStory, self).save(*args, **kwargs)

    @classmethod
    def find_stories(cls, query, user_id, tag=None, offset=0, limit=25, order="newest"):
        stories_db = cls.objects(
            Q(user_id=user_id)
            & (
                Q(story_title__icontains=query)
                | Q(story_author_name__icontains=query)
                | Q(story_tags__icontains=query)
            )
        )
        if tag:
            stories_db = stories_db.filter(user_tags__contains=tag)

        stories_db = stories_db.order_by("%sstarred_date" % ("-" if order == "newest" else ""))[
            offset : offset + limit
        ]
        stories = Feed.format_stories(stories_db)

        return stories

    @classmethod
    def find_stories_by_user_tag(cls, user_tag, user_id, offset=0, limit=25):
        stories_db = cls.objects(Q(user_id=user_id), Q(user_tags__icontains=user_tag)).order_by(
            "-starred_date"
        )[offset : offset + limit]
        stories = Feed.format_stories(stories_db)

        return stories

    @classmethod
    def trim_old_stories(cls, stories=10, days=90, dryrun=False):
        print(" ---> Fetching starred story counts...")
        stats = settings.MONGODB.newsblur.starred_stories.aggregate(
            [
                {
                    "$group": {
                        "_id": "$user_id",
                        "stories": {"$sum": 1},
                    },
                },
                {
                    "$match": {"stories": {"$gte": stories}},
                },
            ]
        )
        month_ago = datetime.datetime.now() - datetime.timedelta(days=days)
        user_ids = list(stats)
        user_ids = sorted(user_ids, key=lambda x: x["stories"], reverse=True)
        print(" ---> Found %s users with more than %s starred stories" % (len(user_ids), stories))

        total = 0
        for stat in user_ids:
            try:
                user = User.objects.select_related("profile").get(pk=stat["_id"])
            except User.DoesNotExist:
                user = None

            if user and (user.profile.is_premium or user.profile.last_seen_on > month_ago):
                continue

            total += stat["stories"]
            username = "%s (%s)" % (user and user.username or " - ", stat["_id"])
            print(
                " ---> %19.19s: %-20.20s %s stories"
                % (user and user.profile.last_seen_on or "Deleted", username, stat["stories"])
            )
            if not dryrun and stat["_id"]:
                cls.objects.filter(user_id=stat["_id"]).delete()
            elif not dryrun and stat["_id"] == 0:
                print(" ---> Deleting unstarred stories (user_id = 0)")
                cls.objects.filter(user_id=stat["_id"]).delete()

        print(" ---> Deleted %s stories in total." % total)

    @property
    def guid_hash(self):
        return hashlib.sha1(self.story_guid.encode(encoding="utf-8")).hexdigest()[:6]

    @property
    def feed_guid_hash(self):
        return "%s:%s" % (self.story_feed_id or "0", self.guid_hash)

    def fetch_original_text(self, force=False, request=None, debug=False):
        original_text_z = self.original_text_z
        feed = Feed.get_by_id(self.story_feed_id)

        if not original_text_z or force:
            ti = TextImporter(self, feed=feed, request=request, debug=debug)
            original_text = ti.fetch()
        else:
            logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.")
            original_text = zlib.decompress(original_text_z)

        return original_text

    def fetch_original_page(self, force=False, request=None, debug=False):
        return None


class MStarredStoryCounts(mongo.Document):
    user_id = mongo.IntField()
    tag = mongo.StringField(max_length=128)
    feed_id = mongo.IntField()
    is_highlights = mongo.BooleanField()
    slug = mongo.StringField(max_length=128)
    count = mongo.IntField(default=0)

    meta = {
        "collection": "starred_stories_counts",
        "indexes": ["user_id"],
        "ordering": ["tag"],
        "allow_inheritance": False,
    }

    def __unicode__(self):
        if self.tag:
            return "Tag: %s (%s)" % (self.tag, self.count)
        elif self.feed_id:
            return "Feed: %s (%s)" % (self.feed_id, self.count)
        elif self.is_highlights:
            return "Highlights: %s (%s)" % (self.is_highlights, self.count)

        return "%s/%s/%s" % (self.tag, self.feed_id, self.is_highlights)

    @property
    def rss_url(self, secret_token=None):
        if self.feed_id:
            return

        if not secret_token:
            user = User.objects.select_related("profile").get(pk=self.user_id)
            secret_token = user.profile.secret_token

        slug = self.slug if self.slug else ""
        if not self.slug and self.tag:
            slug = slugify(self.tag)
            self.slug = slug
            self.save()

        return "%s/reader/starred_rss/%s/%s/%s" % (settings.NEWSBLUR_URL, self.user_id, secret_token, slug)

    @classmethod
    def user_counts(cls, user_id, include_total=False, try_counting=True):
        counts = cls.objects.filter(user_id=user_id)
        counts = sorted(
            [
                {
                    "tag": c.tag,
                    "count": c.count,
                    "is_highlights": c.is_highlights,
                    "feed_address": c.rss_url,
                    "active": True,
                    "feed_id": c.feed_id,
                }
                for c in counts
            ],
            key=lambda x: (x.get("tag", "") or "").lower(),
        )

        total = 0
        feed_total = 0
        for c in counts:
            if not c["tag"] and not c["feed_id"] and not c["is_highlights"]:
                total = c["count"]
            if c["feed_id"]:
                feed_total += c["count"]

        if try_counting and (total != feed_total or not len(counts)):
            user = User.objects.get(pk=user_id)
            logging.user(
                user, "~FC~SBCounting~SN saved stories (%s total vs. %s counted)..." % (total, feed_total)
            )
            cls.count_for_user(user_id)
            return cls.user_counts(user_id, include_total=include_total, try_counting=False)

        if include_total:
            return counts, total
        return counts

    @classmethod
    def schedule_count_tags_for_user(cls, user_id):
        ScheduleCountTagsForUser.apply_async(kwargs=dict(user_id=user_id))

    @classmethod
    def count_for_user(cls, user_id, total_only=False):
        user_tags = []
        user_feeds = []
        highlights = 0

        if not total_only:
            cls.objects(user_id=user_id).delete()
            try:
                user_tags = cls.count_tags_for_user(user_id)
                highlights = cls.count_highlights_for_user(user_id)
                user_feeds = cls.count_feeds_for_user(user_id)
            except pymongo.errors.OperationFailure as e:
                logging.debug(" ---> ~FBOperationError on mongo: ~SB%s" % e)

        total_stories_count = MStarredStory.objects(user_id=user_id).count()
        cls.objects(user_id=user_id, tag=None, feed_id=None, is_highlights=None).update_one(
            set__count=total_stories_count, upsert=True
        )

        return dict(total=total_stories_count, tags=user_tags, feeds=user_feeds, highlights=highlights)

    @classmethod
    def count_tags_for_user(cls, user_id):
        all_tags = MStarredStory.objects(user_id=user_id, user_tags__exists=True).item_frequencies(
            "user_tags"
        )
        user_tags = sorted(
            [(k, v) for k, v in list(all_tags.items()) if int(v) > 0 and k],
            key=lambda x: x[0].lower(),
            reverse=True,
        )

        for tag, count in list(dict(user_tags).items()):
            cls.objects(user_id=user_id, tag=tag, slug=slugify(tag)).update_one(set__count=count, upsert=True)

        return user_tags

    @classmethod
    def count_highlights_for_user(cls, user_id):
        highlighted_count = MStarredStory.objects(
            user_id=user_id, highlights__exists=True, __raw__={"$where": "this.highlights.length > 0"}
        ).count()
        if highlighted_count > 0:
            cls.objects(user_id=user_id, is_highlights=True, slug="highlights").update_one(
                set__count=highlighted_count, upsert=True
            )
        else:
            cls.objects(user_id=user_id, is_highlights=True, slug="highlights").delete()

        return highlighted_count

    @classmethod
    def count_feeds_for_user(cls, user_id):
        all_feeds = MStarredStory.objects(user_id=user_id).item_frequencies("story_feed_id")
        user_feeds = dict([(k, v) for k, v in list(all_feeds.items()) if v])

        # Clean up None'd and 0'd feed_ids, so they can be counted against the total
        if user_feeds.get(None, False):
            user_feeds[0] = user_feeds.get(0, 0)
            user_feeds[0] += user_feeds.get(None)
            del user_feeds[None]
        if user_feeds.get(0, False):
            user_feeds[-1] = user_feeds.get(0, 0)
            del user_feeds[0]

        too_many_feeds = False if len(user_feeds) < 1000 else True
        for feed_id, count in list(user_feeds.items()):
            if too_many_feeds and count <= 1:
                continue
            cls.objects(user_id=user_id, feed_id=feed_id, slug="feed:%s" % feed_id).update_one(
                set__count=count, upsert=True
            )

        return user_feeds

    @classmethod
    def adjust_count(cls, user_id, feed_id=None, tag=None, highlights=None, amount=0):
        params = dict(user_id=user_id)
        if feed_id:
            params["feed_id"] = feed_id
        if tag:
            params["tag"] = tag
        if highlights:
            params["is_highlights"] = True

        cls.objects(**params).update_one(inc__count=amount, upsert=True)
        try:
            story_count = cls.objects.get(**params)
        except cls.MultipleObjectsReturned:
            story_count = cls.objects(**params).first()
        if story_count and story_count.count <= 0:
            story_count.delete()


class MSavedSearch(mongo.Document):
    user_id = mongo.IntField()
    query = mongo.StringField(max_length=1024)
    feed_id = mongo.StringField()
    slug = mongo.StringField(max_length=128)

    meta = {
        "collection": "saved_searches",
        "indexes": [
            "user_id",
            {
                "fields": ["user_id", "feed_id", "query"],
                "unique": True,
            },
        ],
        "ordering": ["query"],
        "allow_inheritance": False,
    }

    @property
    def rss_url(self, secret_token=None):
        if not secret_token:
            user = User.objects.select_related("profile").get(pk=self.user_id)
            secret_token = user.profile.secret_token

        slug = self.slug if self.slug else ""
        return "%s/reader/saved_search/%s/%s/%s" % (settings.NEWSBLUR_URL, self.user_id, secret_token, slug)

    @classmethod
    def user_searches(cls, user_id):
        searches = cls.objects.filter(user_id=user_id)
        searches = sorted(
            [
                {
                    "query": s.query,
                    "feed_address": s.rss_url,
                    "feed_id": s.feed_id,
                    "active": True,
                }
                for s in searches
            ],
            key=lambda x: (x.get("query", "") or "").lower(),
        )
        return searches

    @classmethod
    def save_search(cls, user_id, feed_id, query):
        user = User.objects.get(pk=user_id)
        params = dict(user_id=user_id, feed_id=feed_id, query=query, slug=slugify(query))
        try:
            saved_search = cls.objects.get(**params)
            logging.user(user, "~FRSaved search already exists: ~SB%s" % query)
        except cls.DoesNotExist:
            logging.user(user, "~FCCreating a saved search: ~SB%s~SN/~SB%s" % (feed_id, query))
            saved_search = cls.objects.create(**params)

        return saved_search

    @classmethod
    def delete_search(cls, user_id, feed_id, query):
        user = User.objects.get(pk=user_id)
        params = dict(user_id=user_id, feed_id=feed_id, query=query)
        try:
            saved_search = cls.objects.get(**params)
            logging.user(user, "~FCDeleting saved search: ~SB%s" % query)
            saved_search.delete()
        except cls.DoesNotExist:
            logging.user(user, "~FRCan't delete saved search, missing: ~SB%s~SN/~SB%s" % (feed_id, query))
        except cls.MultipleObjectsReturned:
            logging.user(
                user, "~FRFound multiple saved searches, deleting: ~SB%s~SN/~SB%s" % (feed_id, query)
            )
            cls.objects(**params).delete()


class MFetchHistory(mongo.Document):
    feed_id = mongo.IntField(unique=True)
    feed_fetch_history = mongo.DynamicField()
    page_fetch_history = mongo.DynamicField()
    push_history = mongo.DynamicField()
    raw_feed_history = mongo.DynamicField()

    meta = {
        "db_alias": "nbanalytics",
        "collection": "fetch_history",
        "allow_inheritance": False,
    }

    @classmethod
    def feed(cls, feed_id, timezone=None, fetch_history=None):
        if not fetch_history:
            try:
                fetch_history = cls.objects.read_preference(pymongo.ReadPreference.PRIMARY).get(
                    feed_id=feed_id
                )
            except cls.DoesNotExist:
                fetch_history = cls.objects.create(feed_id=feed_id)
        history = {}

        for fetch_type in ["feed_fetch_history", "page_fetch_history", "push_history"]:
            history[fetch_type] = getattr(fetch_history, fetch_type)
            if not history[fetch_type]:
                history[fetch_type] = []
            for f, fetch in enumerate(history[fetch_type]):
                date_key = "push_date" if fetch_type == "push_history" else "fetch_date"
                history[fetch_type][f] = {
                    date_key: localtime_for_timezone(fetch[0], timezone).strftime("%Y-%m-%d %H:%M:%S"),
                    "status_code": fetch[1],
                    "message": fetch[2],
                }
        return history

    @classmethod
    def add(cls, feed_id, fetch_type, date=None, message=None, code=None, exception=None):
        if not date:
            date = datetime.datetime.now()
        try:
            fetch_history = cls.objects.read_preference(pymongo.ReadPreference.PRIMARY).get(feed_id=feed_id)
        except cls.DoesNotExist:
            fetch_history = cls.objects.create(feed_id=feed_id)

        if fetch_type == "feed":
            history = fetch_history.feed_fetch_history or []
        elif fetch_type == "page":
            history = fetch_history.page_fetch_history or []
        elif fetch_type == "push":
            history = fetch_history.push_history or []
        elif fetch_type == "raw_feed":
            history = fetch_history.raw_feed_history or []

        history = [[date, code, message]] + history
        any_exceptions = any([c for d, c, m in history if c not in [200, 304]])
        if any_exceptions:
            history = history[:25]
        elif fetch_type == "raw_feed":
            history = history[:10]
        else:
            history = history[:5]

        if fetch_type == "feed":
            fetch_history.feed_fetch_history = history
        elif fetch_type == "page":
            fetch_history.page_fetch_history = history
        elif fetch_type == "push":
            fetch_history.push_history = history
        elif fetch_type == "raw_feed":
            fetch_history.raw_feed_history = history

        fetch_history.save()

        if fetch_type == "feed":
            RStats.add("feed_fetch")

        return cls.feed(feed_id, fetch_history=fetch_history)


class DuplicateFeed(models.Model):
    duplicate_address = models.CharField(max_length=764, db_index=True)
    duplicate_link = models.CharField(max_length=764, null=True, db_index=True)
    duplicate_feed_id = models.CharField(max_length=255, null=True, db_index=True)
    feed = models.ForeignKey(Feed, related_name="duplicate_addresses", on_delete=models.CASCADE)

    def __str__(self):
        return "%s: %s / %s" % (self.feed, self.duplicate_address, self.duplicate_link)

    def canonical(self):
        return {
            "duplicate_address": self.duplicate_address,
            "duplicate_link": self.duplicate_link,
            "duplicate_feed_id": self.duplicate_feed_id,
            "feed_id": self.feed_id,
        }

    def save(self, *args, **kwargs):
        max_address = DuplicateFeed._meta.get_field("duplicate_address").max_length
        if len(self.duplicate_address) > max_address:
            self.duplicate_address = self.duplicate_address[:max_address]
        max_link = DuplicateFeed._meta.get_field("duplicate_link").max_length
        if self.duplicate_link and len(self.duplicate_link) > max_link:
            self.duplicate_link = self.duplicate_link[:max_link]

        super(DuplicateFeed, self).save(*args, **kwargs)


def merge_feeds(original_feed_id, duplicate_feed_id, force=False):
    from apps.reader.models import UserSubscription
    from apps.social.models import MSharedStory

    if original_feed_id == duplicate_feed_id:
        logging.info(" ***> Merging the same feed. Ignoring...")
        return original_feed_id
    try:
        original_feed = Feed.objects.get(pk=original_feed_id)
        duplicate_feed = Feed.objects.get(pk=duplicate_feed_id)
    except Feed.DoesNotExist:
        logging.info(" ***> Already deleted feed: %s" % duplicate_feed_id)
        return original_feed_id

    heavier_dupe = original_feed.num_subscribers < duplicate_feed.num_subscribers
    branched_original = original_feed.branch_from_feed and not duplicate_feed.branch_from_feed
    if (heavier_dupe or branched_original) and not force:
        original_feed, duplicate_feed = duplicate_feed, original_feed
        original_feed_id, duplicate_feed_id = duplicate_feed_id, original_feed_id
        if branched_original:
            original_feed.feed_address = strip_underscore_from_feed_address(duplicate_feed.feed_address)

    logging.info(
        " ---> Feed: [%s - %s] %s - %s"
        % (original_feed_id, duplicate_feed_id, original_feed, original_feed.feed_link)
    )
    logging.info(
        "            Orig ++> %s: (%s subs) %s / %s %s"
        % (
            original_feed.pk,
            original_feed.num_subscribers,
            original_feed.feed_address,
            original_feed.feed_link,
            " [B: %s]" % original_feed.branch_from_feed.pk if original_feed.branch_from_feed else "",
        )
    )
    logging.info(
        "            Dupe --> %s: (%s subs) %s / %s %s"
        % (
            duplicate_feed.pk,
            duplicate_feed.num_subscribers,
            duplicate_feed.feed_address,
            duplicate_feed.feed_link,
            " [B: %s]" % duplicate_feed.branch_from_feed.pk if duplicate_feed.branch_from_feed else "",
        )
    )

    original_feed.branch_from_feed = None

    user_subs = UserSubscription.objects.filter(feed=duplicate_feed).order_by("-pk")
    for user_sub in user_subs:
        user_sub.switch_feed(original_feed, duplicate_feed)

    def delete_story_feed(model, feed_field="feed_id"):
        duplicate_stories = model.objects(**{feed_field: duplicate_feed.pk})
        # if duplicate_stories.count():
        #     logging.info(" ---> Deleting %s %s" % (duplicate_stories.count(), model))
        duplicate_stories.delete()

    delete_story_feed(MStory, "story_feed_id")
    delete_story_feed(MFeedPage, "feed_id")

    try:
        DuplicateFeed.objects.create(
            duplicate_address=duplicate_feed.feed_address,
            duplicate_link=duplicate_feed.feed_link,
            duplicate_feed_id=duplicate_feed.pk,
            feed=original_feed,
        )
    except (IntegrityError, OperationError) as e:
        logging.info(" ***> Could not save DuplicateFeed: %s" % e)

    # Switch this dupe feed's dupe feeds over to the new original.
    duplicate_feeds_duplicate_feeds = DuplicateFeed.objects.filter(feed=duplicate_feed)
    for dupe_feed in duplicate_feeds_duplicate_feeds:
        dupe_feed.feed = original_feed
        dupe_feed.duplicate_feed_id = duplicate_feed.pk
        dupe_feed.save()

    logging.debug(
        " ---> Dupe subscribers (%s): %s, Original subscribers (%s): %s"
        % (duplicate_feed.pk, duplicate_feed.num_subscribers, original_feed.pk, original_feed.num_subscribers)
    )
    if duplicate_feed.pk != original_feed.pk:
        duplicate_feed.delete()
    else:
        logging.debug(" ***> Duplicate feed is the same as original feed. Panic!")
    logging.debug(" ---> Deleted duplicate feed: %s/%s" % (duplicate_feed, duplicate_feed_id))
    original_feed.branch_from_feed = None
    original_feed.count_subscribers()
    original_feed.save()
    logging.debug(" ---> Now original subscribers: %s" % (original_feed.num_subscribers))

    MSharedStory.switch_feed(original_feed_id, duplicate_feed_id)

    return original_feed_id


def rewrite_folders(folders, original_feed, duplicate_feed):
    new_folders = []

    for k, folder in enumerate(folders):
        if isinstance(folder, int):
            if folder == duplicate_feed.pk:
                # logging.info("              ===> Rewrote %s'th item: %s" % (k+1, folders))
                new_folders.append(original_feed.pk)
            else:
                new_folders.append(folder)
        elif isinstance(folder, dict):
            for f_k, f_v in list(folder.items()):
                new_folders.append({f_k: rewrite_folders(f_v, original_feed, duplicate_feed)})

    return new_folders