2017-05-22 16:46:56 -07:00
|
|
|
import datetime
|
|
|
|
import dateutil.parser
|
|
|
|
from django.conf import settings
|
|
|
|
from django.utils import feedgenerator
|
|
|
|
from utils import log as logging
|
|
|
|
from utils.json_functions import decode
|
|
|
|
|
|
|
|
class JSONFetcher:
|
|
|
|
|
|
|
|
def __init__(self, feed, options=None):
|
|
|
|
self.feed = feed
|
|
|
|
self.options = options or {}
|
|
|
|
|
|
|
|
def fetch(self, address, raw_feed):
|
|
|
|
if not address:
|
|
|
|
address = self.feed.feed_address
|
|
|
|
|
|
|
|
json_feed = decode(raw_feed.content)
|
|
|
|
if not json_feed:
|
2020-06-19 02:27:48 -04:00
|
|
|
logging.debug(' ***> [%-30s] ~FRJSON fetch failed: %s' %
|
2017-05-22 16:46:56 -07:00
|
|
|
(self.feed.log_title[:30], address))
|
|
|
|
return
|
|
|
|
|
|
|
|
data = {}
|
|
|
|
data['title'] = json_feed.get('title', '[Untitled]')
|
2020-01-05 18:14:13 -05:00
|
|
|
data['link'] = json_feed.get('home_page_url', "")
|
2017-05-22 16:46:56 -07:00
|
|
|
data['description'] = json_feed.get('title', "")
|
|
|
|
data['lastBuildDate'] = datetime.datetime.utcnow()
|
|
|
|
data['generator'] = 'NewsBlur JSON Feed - %s' % settings.NEWSBLUR_URL
|
|
|
|
data['docs'] = None
|
|
|
|
data['feed_url'] = json_feed.get('feed_url')
|
|
|
|
|
|
|
|
rss = feedgenerator.Atom1Feed(**data)
|
|
|
|
|
|
|
|
for item in json_feed.get('items', []):
|
|
|
|
story_data = self.json_feed_story(item)
|
|
|
|
rss.add_item(**story_data)
|
|
|
|
|
|
|
|
return rss.writeString('utf-8')
|
|
|
|
|
|
|
|
def json_feed_story(self, item):
|
|
|
|
date_published = datetime.datetime.now()
|
|
|
|
pubdate = item.get('date_published', None)
|
|
|
|
if pubdate:
|
|
|
|
date_published = dateutil.parser.parse(pubdate)
|
|
|
|
story = {
|
2020-01-05 18:14:13 -05:00
|
|
|
'title': item.get('title', ""),
|
|
|
|
'link': item.get('external_url', item.get('url', "")),
|
|
|
|
'description': item.get('content_html', item.get('content_text', "")),
|
2021-04-02 13:27:33 -04:00
|
|
|
'author_name': item.get('authors', item.get('author', {})).get('name', ""),
|
2017-05-22 16:46:56 -07:00
|
|
|
'categories': item.get('tags', []),
|
2020-06-19 02:27:48 -04:00
|
|
|
'unique_id': str(item.get('id', item.get('url', ""))),
|
2017-05-22 16:46:56 -07:00
|
|
|
'pubdate': date_published,
|
|
|
|
}
|
|
|
|
|
2021-04-02 13:27:33 -04:00
|
|
|
return story
|