mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Merge branch 'feed_settings' into jammit
* feed_settings: Correcting for feeds that have incorrect media content. Adding media enclosures in an audio tag. Using story date when there is no guid. Using story date when there is no guid. Adding verbose flag to feed updates to diagnose errors. Fixing issue with stories that have no permalink.
This commit is contained in:
commit
e586c1736f
3 changed files with 51 additions and 17 deletions
|
@ -575,7 +575,7 @@ class Feed(models.Model):
|
|||
self.data.feed_classifier_counts = json.encode(scores)
|
||||
self.data.save()
|
||||
|
||||
def update(self, force=False, single_threaded=True, compute_scores=True, slave_db=None):
|
||||
def update(self, verbose=False, force=False, single_threaded=True, compute_scores=True, slave_db=None):
|
||||
from utils import feed_fetcher
|
||||
try:
|
||||
self.feed_address = self.feed_address % {'NEWSBLUR_DIR': settings.NEWSBLUR_DIR}
|
||||
|
@ -586,7 +586,7 @@ class Feed(models.Model):
|
|||
self.set_next_scheduled_update()
|
||||
|
||||
options = {
|
||||
'verbose': 1 if not force else 2,
|
||||
'verbose': verbose,
|
||||
'timeout': 10,
|
||||
'single_threaded': single_threaded,
|
||||
'force': force,
|
||||
|
@ -607,7 +607,7 @@ class Feed(models.Model):
|
|||
|
||||
return feed
|
||||
|
||||
def add_update_stories(self, stories, existing_stories):
|
||||
def add_update_stories(self, stories, existing_stories, verbose=False):
|
||||
ret_values = {
|
||||
ENTRY_NEW:0,
|
||||
ENTRY_UPDATED:0,
|
||||
|
@ -635,20 +635,21 @@ class Feed(models.Model):
|
|||
story_content = story_content,
|
||||
story_author_name = story.get('author'),
|
||||
story_permalink = story.get('link'),
|
||||
story_guid = story.get('guid') or story.get('id') or story.get('link'),
|
||||
story_guid = story.get('guid'),
|
||||
story_tags = story_tags
|
||||
)
|
||||
try:
|
||||
s.save()
|
||||
ret_values[ENTRY_NEW] += 1
|
||||
cache.set('updated_feed:%s' % self.id, 1)
|
||||
except (IntegrityError, OperationError):
|
||||
except (IntegrityError, OperationError), e:
|
||||
ret_values[ENTRY_ERR] += 1
|
||||
# logging.info('Saving new story, IntegrityError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
|
||||
if verbose:
|
||||
logging.info('Saving new story, IntegrityError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
|
||||
elif existing_story and story_has_changed:
|
||||
# update story
|
||||
# logging.debug('- Updated story in feed (%s - %s): %s / %s' % (self.feed_title, story.get('title'), len(existing_story.story_content), len(story_content)))
|
||||
story_guid = story.get('guid') or story.get('id') or story.get('link')
|
||||
|
||||
original_content = None
|
||||
try:
|
||||
if existing_story and existing_story.id:
|
||||
|
@ -657,8 +658,10 @@ class Feed(models.Model):
|
|||
existing_story = MStory.objects.get(story_feed_id=existing_story.story_feed_id, story_guid=existing_story.story_guid)
|
||||
else:
|
||||
raise MStory.DoesNotExist
|
||||
except (MStory.DoesNotExist, OperationError):
|
||||
except (MStory.DoesNotExist, OperationError), e:
|
||||
ret_values[ENTRY_ERR] += 1
|
||||
if verbose:
|
||||
logging.info('Saving existing story, OperationError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
|
||||
continue
|
||||
if existing_story.story_original_content_z:
|
||||
original_content = zlib.decompress(existing_story.story_original_content_z)
|
||||
|
@ -674,8 +677,8 @@ class Feed(models.Model):
|
|||
# logging.debug("\t\tDiff content: %s" % diff.getDiff())
|
||||
# if existing_story.story_title != story.get('title'):
|
||||
# logging.debug('\tExisting title / New: : \n\t\t- %s\n\t\t- %s' % (existing_story.story_title, story.get('title')))
|
||||
if existing_story.story_guid != story_guid:
|
||||
self.update_read_stories_with_new_guid(existing_story.story_guid, story_guid)
|
||||
if existing_story.story_guid != story.get('guid'):
|
||||
self.update_read_stories_with_new_guid(existing_story.story_guid, story.get('guid'))
|
||||
|
||||
existing_story.story_feed = self.pk
|
||||
existing_story.story_date = story.get('published')
|
||||
|
@ -684,7 +687,7 @@ class Feed(models.Model):
|
|||
existing_story.story_original_content = original_content
|
||||
existing_story.story_author_name = story.get('author')
|
||||
existing_story.story_permalink = story.get('link')
|
||||
existing_story.story_guid = story_guid
|
||||
existing_story.story_guid = story.get('guid')
|
||||
existing_story.story_tags = story_tags
|
||||
try:
|
||||
existing_story.save()
|
||||
|
@ -692,10 +695,12 @@ class Feed(models.Model):
|
|||
cache.set('updated_feed:%s' % self.id, 1)
|
||||
except (IntegrityError, OperationError):
|
||||
ret_values[ENTRY_ERR] += 1
|
||||
logging.info('Saving updated story, IntegrityError: %s - %s' % (self.feed_title, story.get('title')))
|
||||
if verbose:
|
||||
logging.info('Saving updated story, IntegrityError: %s - %s' % (self.feed_title, story.get('title')))
|
||||
except ValidationError, e:
|
||||
ret_values[ENTRY_ERR] += 1
|
||||
logging.info('Saving updated story, ValidationError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
|
||||
if verbose:
|
||||
logging.info('Saving updated story, ValidationError: %s - %s: %s' % (self.feed_title, story.get('title'), e))
|
||||
else:
|
||||
ret_values[ENTRY_SAME] += 1
|
||||
# logging.debug("Unchanged story: %s " % story.get('title'))
|
||||
|
@ -817,7 +822,7 @@ class Feed(models.Model):
|
|||
story['story_content'] = story_db.story_content_z and zlib.decompress(story_db.story_content_z) or ''
|
||||
story['story_permalink'] = urllib.unquote(urllib.unquote(story_db.story_permalink))
|
||||
story['story_feed_id'] = feed_id or story_db.story_feed_id
|
||||
story['id'] = story_db.story_guid
|
||||
story['id'] = story_db.story_guid or story_db.story_date
|
||||
if hasattr(story_db, 'starred_date'):
|
||||
story['starred_date'] = story_db.starred_date
|
||||
if text:
|
||||
|
@ -827,8 +832,7 @@ class Feed(models.Model):
|
|||
text = re.sub(r'\n+', '\n\n', text)
|
||||
text = re.sub(r'\t+', '\t', text)
|
||||
story['text'] = text
|
||||
|
||||
|
||||
|
||||
return story
|
||||
|
||||
def get_tags(self, entry):
|
||||
|
@ -1334,3 +1338,18 @@ def merge_feeds(original_feed_id, duplicate_feed_id, force=False):
|
|||
duplicate_feed.delete()
|
||||
original_feed.count_subscribers()
|
||||
|
||||
def rewrite_folders(folders, original_feed, duplicate_feed):
|
||||
new_folders = []
|
||||
|
||||
for k, folder in enumerate(folders):
|
||||
if isinstance(folder, int):
|
||||
if folder == duplicate_feed.pk:
|
||||
# logging.info(" ===> Rewrote %s'th item: %s" % (k+1, folders))
|
||||
new_folders.append(original_feed.pk)
|
||||
else:
|
||||
new_folders.append(folder)
|
||||
elif isinstance(folder, dict):
|
||||
for f_k, f_v in folder.items():
|
||||
new_folders.append({f_k: rewrite_folders(f_v, original_feed, duplicate_feed)})
|
||||
|
||||
return new_folders
|
||||
|
|
|
@ -245,7 +245,7 @@ class ProcessFeed:
|
|||
# | (Q(story_guid__in=story_guids)),
|
||||
# story_feed=self.feed
|
||||
# ).order_by('-story_date')
|
||||
ret_values = self.feed.add_update_stories(self.fpf.entries, existing_stories)
|
||||
ret_values = self.feed.add_update_stories(self.fpf.entries, existing_stories, verbose=self.options['verbose'])
|
||||
|
||||
logging.debug(u' ---> [%-30s] ~FYParsed Feed: new~FG=~FG~SB%s~SN~FY up~FG=~FY~SB%s~SN same~FG=~FY%s err~FG=~FR~SB%s' % (
|
||||
unicode(self.feed)[:30],
|
||||
|
|
|
@ -67,6 +67,21 @@ def pre_process_story(entry):
|
|||
entry['link'] = urlquote(entry_link)
|
||||
if isinstance(entry.get('guid'), dict):
|
||||
entry['guid'] = unicode(entry['guid'])
|
||||
entry_content = ""
|
||||
if entry.get('content'):
|
||||
entry_content = entry['content'][0]['value']
|
||||
if entry.get('media_content') and 'audio controls' not in entry_content:
|
||||
media_url = entry['media_content'][0].get('url') and entry['media_content'][0]['url']
|
||||
media_type = entry['media_content'][0].get('type') and entry['media_content'][0]['type']
|
||||
if media_url and media_type:
|
||||
entry['content'][0]['value'] += """<br><br>
|
||||
<audio controls="controls">
|
||||
<source src="%(media_url)s" type="%(media_type)s" />
|
||||
<a href="%(media_url)s">%(media_url)s</a>
|
||||
</audio>""" % {'media_url': media_url, 'media_type': media_type}
|
||||
|
||||
entry['guid'] = entry.get('guid') or entry.get('id') or entry.get('link') or str(entry.get('published'))
|
||||
|
||||
return entry
|
||||
|
||||
class bunch(dict):
|
||||
|
|
Loading…
Add table
Reference in a new issue