mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
Reading stories from slave db during feed update.
This commit is contained in:
parent
dd440c0080
commit
040b755694
5 changed files with 59 additions and 14 deletions
|
@ -63,6 +63,12 @@ class Command(BaseCommand):
|
||||||
|
|
||||||
options['compute_scores'] = True
|
options['compute_scores'] = True
|
||||||
|
|
||||||
|
|
||||||
|
import pymongo
|
||||||
|
db = pymongo.Connection(settings.MONGODB_SLAVE['host'], slave_okay=True, replicaset='nbset').newsblur
|
||||||
|
|
||||||
|
options['slave_db'] = db
|
||||||
|
|
||||||
disp = feed_fetcher.Dispatcher(options, num_workers)
|
disp = feed_fetcher.Dispatcher(options, num_workers)
|
||||||
|
|
||||||
feeds_queue = []
|
feeds_queue = []
|
||||||
|
|
|
@ -8,7 +8,6 @@ import zlib
|
||||||
import urllib
|
import urllib
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from BeautifulSoup import BeautifulStoneSoup
|
|
||||||
# from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures
|
# from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.db import IntegrityError
|
from django.db import IntegrityError
|
||||||
|
@ -550,7 +549,7 @@ class Feed(models.Model):
|
||||||
self.data.feed_classifier_counts = json.encode(scores)
|
self.data.feed_classifier_counts = json.encode(scores)
|
||||||
self.data.save()
|
self.data.save()
|
||||||
|
|
||||||
def update(self, force=False, single_threaded=True, compute_scores=True):
|
def update(self, force=False, single_threaded=True, compute_scores=True, slave_db=None):
|
||||||
from utils import feed_fetcher
|
from utils import feed_fetcher
|
||||||
try:
|
try:
|
||||||
self.feed_address = self.feed_address % {'NEWSBLUR_DIR': settings.NEWSBLUR_DIR}
|
self.feed_address = self.feed_address % {'NEWSBLUR_DIR': settings.NEWSBLUR_DIR}
|
||||||
|
@ -566,6 +565,7 @@ class Feed(models.Model):
|
||||||
'single_threaded': single_threaded,
|
'single_threaded': single_threaded,
|
||||||
'force': force,
|
'force': force,
|
||||||
'compute_scores': compute_scores,
|
'compute_scores': compute_scores,
|
||||||
|
'slave_db': slave_db,
|
||||||
}
|
}
|
||||||
disp = feed_fetcher.Dispatcher(options, 1)
|
disp = feed_fetcher.Dispatcher(options, 1)
|
||||||
disp.add_jobs([[self.pk]])
|
disp.add_jobs([[self.pk]])
|
||||||
|
@ -624,6 +624,7 @@ class Feed(models.Model):
|
||||||
# logging.debug('- Updated story in feed (%s - %s): %s / %s' % (self.feed_title, story.get('title'), len(existing_story.story_content), len(story_content)))
|
# logging.debug('- Updated story in feed (%s - %s): %s / %s' % (self.feed_title, story.get('title'), len(existing_story.story_content), len(story_content)))
|
||||||
story_guid = story.get('guid') or story.get('id') or story.get('link')
|
story_guid = story.get('guid') or story.get('id') or story.get('link')
|
||||||
original_content = None
|
original_content = None
|
||||||
|
existing_story = MStory.objects.get(story_feed_id=existing_story.story_feed_id, story_guid=existing_story.story_guid)
|
||||||
if existing_story.story_original_content_z:
|
if existing_story.story_original_content_z:
|
||||||
original_content = zlib.decompress(existing_story.story_original_content_z)
|
original_content = zlib.decompress(existing_story.story_original_content_z)
|
||||||
elif existing_story.story_content_z:
|
elif existing_story.story_content_z:
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from celery.task import Task
|
from celery.task import Task
|
||||||
from utils import log as logging
|
from utils import log as logging
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
class UpdateFeeds(Task):
|
class UpdateFeeds(Task):
|
||||||
name = 'update-feeds'
|
name = 'update-feeds'
|
||||||
|
@ -11,10 +12,13 @@ class UpdateFeeds(Task):
|
||||||
if not isinstance(feed_pks, list):
|
if not isinstance(feed_pks, list):
|
||||||
feed_pks = [feed_pks]
|
feed_pks = [feed_pks]
|
||||||
|
|
||||||
|
import pymongo
|
||||||
|
db = pymongo.Connection(settings.MONGODB_SLAVE['host'], slave_okay=True, replicaset='nbset').newsblur
|
||||||
|
|
||||||
for feed_pk in feed_pks:
|
for feed_pk in feed_pks:
|
||||||
try:
|
try:
|
||||||
feed = Feed.objects.get(pk=feed_pk)
|
feed = Feed.objects.get(pk=feed_pk)
|
||||||
feed.update()
|
feed.update(slave_db=db)
|
||||||
except Feed.DoesNotExist:
|
except Feed.DoesNotExist:
|
||||||
logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
|
logging.info(" ---> Feed doesn't exist: [%s]" % feed_pk)
|
||||||
# logging.debug(' Updating: [%s] %s' % (feed_pks, feed))
|
# logging.debug(' Updating: [%s] %s' % (feed_pks, feed))
|
||||||
|
|
|
@ -421,6 +421,14 @@ class MasterSlaveRouter(object):
|
||||||
"Explicitly put all models on all databases."
|
"Explicitly put all models on all databases."
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# ===========
|
||||||
|
# = MongoDB =
|
||||||
|
# ===========
|
||||||
|
|
||||||
|
MONGODB_SLAVE = {
|
||||||
|
'host': 'db01'
|
||||||
|
}
|
||||||
|
|
||||||
# ==================
|
# ==================
|
||||||
# = Configurations =
|
# = Configurations =
|
||||||
# ==================
|
# ==================
|
||||||
|
|
|
@ -11,6 +11,7 @@ from utils import feedparser
|
||||||
from utils.story_functions import pre_process_story
|
from utils.story_functions import pre_process_story
|
||||||
from utils import log as logging
|
from utils import log as logging
|
||||||
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
|
from utils.feed_functions import timelimit, TimeoutError, mail_feed_error_to_admin, utf8encode
|
||||||
|
from utils.story_functions import bunch
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -214,6 +215,19 @@ class ProcessFeed:
|
||||||
# if story.get('published') > end_date:
|
# if story.get('published') > end_date:
|
||||||
# end_date = story.get('published')
|
# end_date = story.get('published')
|
||||||
story_guids.append(story.get('guid') or story.get('link'))
|
story_guids.append(story.get('guid') or story.get('link'))
|
||||||
|
|
||||||
|
if self.options['slave_db']:
|
||||||
|
slave_db = self.options['slave_db']
|
||||||
|
stories_db_orig = slave_db.stories.find({
|
||||||
|
"story_feed_id": self.feed.pk,
|
||||||
|
"story_date": {
|
||||||
|
"$gte": start_date,
|
||||||
|
},
|
||||||
|
}).limit(len(story_guids))
|
||||||
|
existing_stories = []
|
||||||
|
for story in stories_db_orig:
|
||||||
|
existing_stories.append(bunch(story))
|
||||||
|
else:
|
||||||
existing_stories = list(MStory.objects(
|
existing_stories = list(MStory.objects(
|
||||||
# story_guid__in=story_guids,
|
# story_guid__in=story_guids,
|
||||||
story_date__gte=start_date,
|
story_date__gte=start_date,
|
||||||
|
@ -227,10 +241,9 @@ class ProcessFeed:
|
||||||
# ).order_by('-story_date')
|
# ).order_by('-story_date')
|
||||||
ret_values = self.feed.add_update_stories(self.fpf.entries, existing_stories)
|
ret_values = self.feed.add_update_stories(self.fpf.entries, existing_stories)
|
||||||
|
|
||||||
logging.debug(u' ---> [%-30s] Parsed Feed: %s' % (
|
logging.debug(u' ---> [%-30s] ~FYParsed Feed: new~FG=~FG~SB%s~SN~FY up~FG=~FY~SB%s~SN same~FG=~FY%s err~FG=~FR~SB%s' % (
|
||||||
unicode(self.feed)[:30],
|
unicode(self.feed)[:30],
|
||||||
u' '.join(u'%s=%d' % (self.entry_trans[key],
|
ret_values[ENTRY_NEW], ret_values[ENTRY_UPDATED], ret_values[ENTRY_SAME], ret_values[ENTRY_ERR]))
|
||||||
ret_values[key]) for key in self.entry_keys),))
|
|
||||||
self.feed.update_all_statistics()
|
self.feed.update_all_statistics()
|
||||||
self.feed.trim_feed()
|
self.feed.trim_feed()
|
||||||
self.feed.save_feed_history(200, "OK")
|
self.feed.save_feed_history(200, "OK")
|
||||||
|
@ -401,6 +414,19 @@ class Dispatcher:
|
||||||
unicode(feed)[:30], user_subs.count(),
|
unicode(feed)[:30], user_subs.count(),
|
||||||
feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
|
feed.num_subscribers, feed.active_subscribers, feed.premium_subscribers))
|
||||||
|
|
||||||
|
if self.options['slave_db']:
|
||||||
|
slave_db = self.options['slave_db']
|
||||||
|
|
||||||
|
stories_db_orig = slave_db.stories.find({
|
||||||
|
"story_feed_id": feed.pk,
|
||||||
|
"story_date": {
|
||||||
|
"$gte": UNREAD_CUTOFF,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
stories_db = []
|
||||||
|
for story in stories_db_orig:
|
||||||
|
stories_db.append(bunch(story))
|
||||||
|
else:
|
||||||
stories_db = MStory.objects(story_feed_id=feed.pk,
|
stories_db = MStory.objects(story_feed_id=feed.pk,
|
||||||
story_date__gte=UNREAD_CUTOFF)
|
story_date__gte=UNREAD_CUTOFF)
|
||||||
for sub in user_subs:
|
for sub in user_subs:
|
||||||
|
|
Loading…
Add table
Reference in a new issue