mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Feed update cascading. Feeds update relative to how often they have new content. This should save a LOT of processing power. Whew.
This commit is contained in:
parent
67cc20c272
commit
71b746a48d
3 changed files with 28 additions and 9 deletions
|
@ -8,6 +8,7 @@ import logging
|
|||
import socket
|
||||
import os
|
||||
import math
|
||||
import datetime
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
@ -26,23 +27,21 @@ class Command(BaseCommand):
|
|||
def handle(self, *args, **options):
|
||||
if options['daemonize']:
|
||||
daemonize()
|
||||
|
||||
feeds = Feed.objects.all().order_by('?')
|
||||
|
||||
socket.setdefaulttimeout(options['timeout'])
|
||||
now = datetime.datetime.now()
|
||||
feeds = Feed.objects.filter(next_scheduled_update__lte=now).order_by('?')
|
||||
|
||||
num_workers = min(len(feeds), options['workerthreads'])
|
||||
|
||||
if options['single_threaded']:
|
||||
num_workers = 1
|
||||
|
||||
# settting socket timeout (default= 10 seconds)
|
||||
socket.setdefaulttimeout(options['timeout'])
|
||||
|
||||
disp = feed_fetcher.Dispatcher(options, num_workers)
|
||||
|
||||
|
||||
|
||||
feeds_queue = []
|
||||
for _ in range(num_workers):
|
||||
feeds_queue.append([])
|
||||
|
||||
i = 0
|
||||
for feed in feeds:
|
||||
feeds_queue[i%num_workers].append(feed)
|
||||
|
|
|
@ -18,6 +18,7 @@ from django.db.models import Q
|
|||
import settings
|
||||
import logging
|
||||
import difflib
|
||||
import datetime
|
||||
from utils.diff import HTMLDiff
|
||||
|
||||
USER_AGENT = 'NewsBlur v1.0 - newsblur.com'
|
||||
|
@ -38,6 +39,8 @@ class Feed(models.Model):
|
|||
etag = models.CharField(max_length=50, blank=True, null=True)
|
||||
last_modified = models.DateTimeField(null=True, blank=True)
|
||||
page_data = StoryField(null=True, blank=True)
|
||||
stories_per_month = models.IntegerField(default=0)
|
||||
next_scheduled_update = models.DateTimeField(default=datetime.datetime.now)
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
|
|
|
@ -15,6 +15,8 @@ import datetime
|
|||
import traceback
|
||||
import multiprocessing
|
||||
import Queue
|
||||
import datetime
|
||||
import random
|
||||
|
||||
# Refresh feed code adapted from Feedjack.
|
||||
# http://feedjack.googlecode.com
|
||||
|
@ -107,6 +109,19 @@ class ProcessFeed:
|
|||
logging.debug(u'[%d] Processing %s' % (self.feed.id,
|
||||
self.feed.feed_title))
|
||||
|
||||
# Count stories in past month to calculate next scheduled update
|
||||
month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
|
||||
stories_count = Story.objects.filter(story_feed=self.feed, story_date__gte=month_ago).count()
|
||||
stories_count = stories_count
|
||||
self.feed.stories_per_month = stories_count
|
||||
updates_per_day = max(30, stories_count) / 30.0 * 12
|
||||
minutes_to_next_update = 60 * 24 / updates_per_day
|
||||
random_factor = random.randint(0,int(minutes_to_next_update/4))
|
||||
next_scheduled_update = datetime.datetime.now() + datetime.timedelta(
|
||||
minutes=minutes_to_next_update+random_factor
|
||||
)
|
||||
self.feed.next_scheduled_update = next_scheduled_update
|
||||
|
||||
if hasattr(self.fpf, 'status'):
|
||||
if self.options['verbose']:
|
||||
logging.debug(u'[%d] HTTP status %d: %s' % (self.feed.id,
|
||||
|
@ -118,6 +133,7 @@ class ProcessFeed:
|
|||
logging.debug('[%d] Feed has not changed since ' \
|
||||
'last check: %s' % (self.feed.id,
|
||||
self.feed.feed_address))
|
||||
self.feed.save()
|
||||
return FEED_SAME, ret_values
|
||||
|
||||
if self.fpf.status >= 400:
|
||||
|
@ -125,6 +141,7 @@ class ProcessFeed:
|
|||
logging.error('[%d] !HTTP_ERROR! %d: %s' % (self.feed.id,
|
||||
self.fpf.status,
|
||||
self.feed.feed_address))
|
||||
self.feed.save()
|
||||
return FEED_ERRHTTP, ret_values
|
||||
|
||||
if hasattr(self.fpf, 'bozo') and self.fpf.bozo:
|
||||
|
@ -147,6 +164,7 @@ class ProcessFeed:
|
|||
self.feed.feed_tagline = self.fpf.feed.get('tagline', self.feed.feed_tagline)
|
||||
self.feed.feed_link = self.fpf.feed.get('link', self.feed.feed_link)
|
||||
self.feed.last_update = datetime.datetime.now()
|
||||
|
||||
|
||||
if False and self.options['verbose']:
|
||||
logging.debug(u'[%d] Feed info for: %s\n' \
|
||||
|
@ -242,7 +260,6 @@ class Dispatcher:
|
|||
identity = "X"
|
||||
if current_process._identity:
|
||||
identity = current_process._identity[0]
|
||||
# print feed_queue
|
||||
for feed in feed_queue:
|
||||
# print "Process Feed: [%s] %s" % (current_process.name, feed)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue