NewsBlur/utils/feeds.py
Samuel Clay 48ba40e29b Initial
2009-06-16 03:08:55 +00:00

78 lines
No EOL
2.6 KiB
Python

# Originally adapted from IanLewis / dlife
from django.shortcuts import get_list_or_404
from apps.rss_feeds.models import Feed, Story
from utils import feedparser
from utils.dateutil.parser import parse as dateutil_parse
from django.utils.http import urlquote
from django.db.models import Q
import time
class FeedInjest(object):
feed = None
def __init__(self, feed):
self.feed = feed
def update(self):
stories = []
feed_items = feedparser.parse(self.feed.feed_address)
for entry in feed_items['entries']:
stories.append(entry)
self.feed.feed_last_update = time.time()
self.feed.save()
return stories
def save_story(self, story):
story_contents = story.get('content')
if story_contents is not None:
story_contents = story_contents[0]['value']
else:
story_contents = story.get('summary')
print 'Story: ', story_contents
if story_contents is not None:
story_content = story_contents
else:
story_content = None
s = Story(story_feed = self.feed,
story_date = story.get('published'),
story_title = story.get('title'),
story_content = story_content,
story_author = story.get('author'),
story_permalink = story.get('link')
)
s.save()
def include_story(self, entry):
story_count = Story.objects.filter(
Q(story_date = entry['published']) | Q(story_permalink = entry['link'])
).filter(
story_feed = self.feed
).count()
return story_count == 0
def pre_process(self, entry):
'''
A hook is used to clean up feed entry data before it is processed.
This hook can be used to clean up dates and/or media data
before being processed.
'''
date_published = entry.get('published', entry.get('updated'))
if not date_published:
date_published = str(datetime.datetime.utcnow())
date_published = dateutil_parse(date_published)
# Change the date to UTC and remove timezone info since MySQL doesn't
# support it.
date_published = (date_published - date_published.utcoffset()).replace(tzinfo=None)
entry['published'] = date_published
protocol_index = entry['link'].find("://")
if protocol_index != -1:
entry['link'] = entry['link'][:protocol_index+3] + urlquote(entry['link'][protocol_index+3:])
else:
entry['link'] = urlquote(entry['link'])