mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Feed update cascading. Feeds update relative to how often they have new content. This should save a LOT of processing power. Whew.
This commit is contained in:
parent
67cc20c272
commit
71b746a48d
3 changed files with 28 additions and 9 deletions
|
@ -8,6 +8,7 @@ import logging
|
||||||
import socket
|
import socket
|
||||||
import os
|
import os
|
||||||
import math
|
import math
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
|
@ -26,23 +27,21 @@ class Command(BaseCommand):
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
if options['daemonize']:
|
if options['daemonize']:
|
||||||
daemonize()
|
daemonize()
|
||||||
|
|
||||||
feeds = Feed.objects.all().order_by('?')
|
socket.setdefaulttimeout(options['timeout'])
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
feeds = Feed.objects.filter(next_scheduled_update__lte=now).order_by('?')
|
||||||
|
|
||||||
num_workers = min(len(feeds), options['workerthreads'])
|
num_workers = min(len(feeds), options['workerthreads'])
|
||||||
|
|
||||||
if options['single_threaded']:
|
if options['single_threaded']:
|
||||||
num_workers = 1
|
num_workers = 1
|
||||||
|
|
||||||
# settting socket timeout (default= 10 seconds)
|
|
||||||
socket.setdefaulttimeout(options['timeout'])
|
|
||||||
|
|
||||||
disp = feed_fetcher.Dispatcher(options, num_workers)
|
disp = feed_fetcher.Dispatcher(options, num_workers)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
feeds_queue = []
|
feeds_queue = []
|
||||||
for _ in range(num_workers):
|
for _ in range(num_workers):
|
||||||
feeds_queue.append([])
|
feeds_queue.append([])
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for feed in feeds:
|
for feed in feeds:
|
||||||
feeds_queue[i%num_workers].append(feed)
|
feeds_queue[i%num_workers].append(feed)
|
||||||
|
|
|
@ -18,6 +18,7 @@ from django.db.models import Q
|
||||||
import settings
|
import settings
|
||||||
import logging
|
import logging
|
||||||
import difflib
|
import difflib
|
||||||
|
import datetime
|
||||||
from utils.diff import HTMLDiff
|
from utils.diff import HTMLDiff
|
||||||
|
|
||||||
USER_AGENT = 'NewsBlur v1.0 - newsblur.com'
|
USER_AGENT = 'NewsBlur v1.0 - newsblur.com'
|
||||||
|
@ -38,6 +39,8 @@ class Feed(models.Model):
|
||||||
etag = models.CharField(max_length=50, blank=True, null=True)
|
etag = models.CharField(max_length=50, blank=True, null=True)
|
||||||
last_modified = models.DateTimeField(null=True, blank=True)
|
last_modified = models.DateTimeField(null=True, blank=True)
|
||||||
page_data = StoryField(null=True, blank=True)
|
page_data = StoryField(null=True, blank=True)
|
||||||
|
stories_per_month = models.IntegerField(default=0)
|
||||||
|
next_scheduled_update = models.DateTimeField(default=datetime.datetime.now)
|
||||||
|
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
|
|
|
@ -15,6 +15,8 @@ import datetime
|
||||||
import traceback
|
import traceback
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import Queue
|
import Queue
|
||||||
|
import datetime
|
||||||
|
import random
|
||||||
|
|
||||||
# Refresh feed code adapted from Feedjack.
|
# Refresh feed code adapted from Feedjack.
|
||||||
# http://feedjack.googlecode.com
|
# http://feedjack.googlecode.com
|
||||||
|
@ -107,6 +109,19 @@ class ProcessFeed:
|
||||||
logging.debug(u'[%d] Processing %s' % (self.feed.id,
|
logging.debug(u'[%d] Processing %s' % (self.feed.id,
|
||||||
self.feed.feed_title))
|
self.feed.feed_title))
|
||||||
|
|
||||||
|
# Count stories in past month to calculate next scheduled update
|
||||||
|
month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
|
||||||
|
stories_count = Story.objects.filter(story_feed=self.feed, story_date__gte=month_ago).count()
|
||||||
|
stories_count = stories_count
|
||||||
|
self.feed.stories_per_month = stories_count
|
||||||
|
updates_per_day = max(30, stories_count) / 30.0 * 12
|
||||||
|
minutes_to_next_update = 60 * 24 / updates_per_day
|
||||||
|
random_factor = random.randint(0,int(minutes_to_next_update/4))
|
||||||
|
next_scheduled_update = datetime.datetime.now() + datetime.timedelta(
|
||||||
|
minutes=minutes_to_next_update+random_factor
|
||||||
|
)
|
||||||
|
self.feed.next_scheduled_update = next_scheduled_update
|
||||||
|
|
||||||
if hasattr(self.fpf, 'status'):
|
if hasattr(self.fpf, 'status'):
|
||||||
if self.options['verbose']:
|
if self.options['verbose']:
|
||||||
logging.debug(u'[%d] HTTP status %d: %s' % (self.feed.id,
|
logging.debug(u'[%d] HTTP status %d: %s' % (self.feed.id,
|
||||||
|
@ -118,6 +133,7 @@ class ProcessFeed:
|
||||||
logging.debug('[%d] Feed has not changed since ' \
|
logging.debug('[%d] Feed has not changed since ' \
|
||||||
'last check: %s' % (self.feed.id,
|
'last check: %s' % (self.feed.id,
|
||||||
self.feed.feed_address))
|
self.feed.feed_address))
|
||||||
|
self.feed.save()
|
||||||
return FEED_SAME, ret_values
|
return FEED_SAME, ret_values
|
||||||
|
|
||||||
if self.fpf.status >= 400:
|
if self.fpf.status >= 400:
|
||||||
|
@ -125,6 +141,7 @@ class ProcessFeed:
|
||||||
logging.error('[%d] !HTTP_ERROR! %d: %s' % (self.feed.id,
|
logging.error('[%d] !HTTP_ERROR! %d: %s' % (self.feed.id,
|
||||||
self.fpf.status,
|
self.fpf.status,
|
||||||
self.feed.feed_address))
|
self.feed.feed_address))
|
||||||
|
self.feed.save()
|
||||||
return FEED_ERRHTTP, ret_values
|
return FEED_ERRHTTP, ret_values
|
||||||
|
|
||||||
if hasattr(self.fpf, 'bozo') and self.fpf.bozo:
|
if hasattr(self.fpf, 'bozo') and self.fpf.bozo:
|
||||||
|
@ -147,6 +164,7 @@ class ProcessFeed:
|
||||||
self.feed.feed_tagline = self.fpf.feed.get('tagline', self.feed.feed_tagline)
|
self.feed.feed_tagline = self.fpf.feed.get('tagline', self.feed.feed_tagline)
|
||||||
self.feed.feed_link = self.fpf.feed.get('link', self.feed.feed_link)
|
self.feed.feed_link = self.fpf.feed.get('link', self.feed.feed_link)
|
||||||
self.feed.last_update = datetime.datetime.now()
|
self.feed.last_update = datetime.datetime.now()
|
||||||
|
|
||||||
|
|
||||||
if False and self.options['verbose']:
|
if False and self.options['verbose']:
|
||||||
logging.debug(u'[%d] Feed info for: %s\n' \
|
logging.debug(u'[%d] Feed info for: %s\n' \
|
||||||
|
@ -242,7 +260,6 @@ class Dispatcher:
|
||||||
identity = "X"
|
identity = "X"
|
||||||
if current_process._identity:
|
if current_process._identity:
|
||||||
identity = current_process._identity[0]
|
identity = current_process._identity[0]
|
||||||
# print feed_queue
|
|
||||||
for feed in feed_queue:
|
for feed in feed_queue:
|
||||||
# print "Process Feed: [%s] %s" % (current_process.name, feed)
|
# print "Process Feed: [%s] %s" % (current_process.name, feed)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue