mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Not perfect, but a temp fix for feeds that have no dates (like Hacker News).
This commit is contained in:
parent
4cfb2c2075
commit
27d5c1c421
3 changed files with 14 additions and 8 deletions
|
@ -138,7 +138,7 @@ class Feed(models.Model):
|
|||
story_original_content = original_content,
|
||||
story_author = story_author,
|
||||
story_permalink = story.get('link'),
|
||||
story_guid = story.get('id') or story.get('link')
|
||||
story_guid = story.get('guid') or story.get('id') or story.get('link')
|
||||
)
|
||||
s.tags.clear()
|
||||
[s.tags.add(tcat) for tcat in story_tags]
|
||||
|
@ -232,14 +232,16 @@ class Feed(models.Model):
|
|||
story_in_system = None
|
||||
story_has_changed = False
|
||||
story_pub_date = story.get('published')
|
||||
story_published_now = story.get('published_now', False)
|
||||
start_date = story_pub_date - datetime.timedelta(hours=8)
|
||||
end_date = story_pub_date + datetime.timedelta(hours=8)
|
||||
|
||||
for existing_story in existing_stories:
|
||||
content_ratio = 0
|
||||
|
||||
if story_pub_date > start_date and story_pub_date < end_date:
|
||||
if story.get('id') and story.get('id') == existing_story.story_guid:
|
||||
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
|
||||
if story_published_now or\
|
||||
(story_pub_date > start_date and story_pub_date < end_date):
|
||||
if story.get('guid') and story.get('guid') == existing_story.story_guid:
|
||||
story_in_system = existing_story
|
||||
elif story.get('link') and story.get('link') == existing_story.story_permalink:
|
||||
story_in_system = existing_story
|
||||
|
@ -273,7 +275,9 @@ class Feed(models.Model):
|
|||
if story_content != existing_story.story_content:
|
||||
story_has_changed = True
|
||||
break
|
||||
|
||||
|
||||
if story_has_changed or not story_in_system:
|
||||
print 'New/updated story: %s' % (story),
|
||||
return story_in_system, story_has_changed
|
||||
|
||||
class Meta:
|
||||
|
|
|
@ -179,12 +179,13 @@ class ProcessFeed:
|
|||
story_guids = []
|
||||
for entry in self.fpf.entries:
|
||||
story = pre_process_story(entry)
|
||||
if story.get('published') < start_date or not start_date:
|
||||
if story.get('published') < start_date:
|
||||
start_date = story.get('published')
|
||||
if story.get('published') > end_date or not end_date:
|
||||
if story.get('published') > end_date:
|
||||
end_date = story.get('published')
|
||||
story_guids.append(story.get('guid'))
|
||||
story_guids.append(story.get('guid') or story.get('link'))
|
||||
# print 'Story GUIDs: %s' % story_guids
|
||||
# print 'Story start/end: %s %s' % (start_date, end_date)
|
||||
existing_stories = Story.objects.filter(
|
||||
(Q(story_date__gte=start_date) & Q(story_date__lte=end_date))
|
||||
| (Q(story_guid__in=story_guids)),
|
||||
|
|
|
@ -34,6 +34,7 @@ def pre_process_story(entry):
|
|||
date_published = entry.get('published', entry.get('updated'))
|
||||
if not date_published:
|
||||
date_published = str(datetime.datetime.now())
|
||||
entry['published_now'] = True
|
||||
if not isinstance(date_published, datetime.datetime):
|
||||
date_published = dateutil_parse(date_published)
|
||||
# Change the date to UTC and remove timezone info since
|
||||
|
|
Loading…
Add table
Reference in a new issue