NewsBlur/utils/json_fetcher.py

63 lines
2.1 KiB
Python
Raw Permalink Normal View History

2017-05-22 16:46:56 -07:00
import datetime
2024-04-24 09:50:42 -04:00
2017-05-22 16:46:56 -07:00
import dateutil.parser
from django.conf import settings
from django.utils import feedgenerator
2024-04-24 09:50:42 -04:00
2017-05-22 16:46:56 -07:00
from utils import log as logging
from utils.json_functions import decode
2024-04-24 09:43:56 -04:00
2017-05-22 16:46:56 -07:00
class JSONFetcher:
def __init__(self, feed, options=None):
self.feed = feed
self.options = options or {}
2024-04-24 09:43:56 -04:00
2017-05-22 16:46:56 -07:00
def fetch(self, address, raw_feed):
if not address:
address = self.feed.feed_address
2024-04-24 09:43:56 -04:00
2017-05-22 16:46:56 -07:00
json_feed = decode(raw_feed.content)
if not json_feed:
2024-04-24 09:43:56 -04:00
logging.debug(" ***> [%-30s] ~FRJSON fetch failed: %s" % (self.feed.log_title[:30], address))
2017-05-22 16:46:56 -07:00
return
2021-07-29 17:25:09 -04:00
2017-05-22 16:46:56 -07:00
data = {}
2024-04-24 09:43:56 -04:00
data["title"] = json_feed.get("title", "[Untitled]")
data["link"] = json_feed.get("home_page_url", "")
data["description"] = json_feed.get("title", "")
data["lastBuildDate"] = datetime.datetime.utcnow()
data["generator"] = "NewsBlur JSON Feed - %s" % settings.NEWSBLUR_URL
data["docs"] = None
data["feed_url"] = json_feed.get("feed_url")
2017-05-22 16:46:56 -07:00
rss = feedgenerator.Atom1Feed(**data)
2024-04-24 09:43:56 -04:00
for item in json_feed.get("items", []):
2017-05-22 16:46:56 -07:00
story_data = self.json_feed_story(item)
rss.add_item(**story_data)
2024-04-24 09:43:56 -04:00
return rss.writeString("utf-8")
2017-05-22 16:46:56 -07:00
def json_feed_story(self, item):
date_published = datetime.datetime.now()
2024-04-24 09:43:56 -04:00
pubdate = item.get("date_published", None)
2017-05-22 16:46:56 -07:00
if pubdate:
date_published = dateutil.parser.parse(pubdate)
2024-04-24 09:43:56 -04:00
authors = item.get("authors", item.get("author", {}))
2021-07-29 17:25:09 -04:00
if isinstance(authors, list):
2024-04-24 09:43:56 -04:00
author_name = ", ".join([author.get("name", "") for author in authors])
2021-07-29 17:25:09 -04:00
else:
2024-04-24 09:43:56 -04:00
author_name = authors.get("name", "")
2017-05-22 16:46:56 -07:00
story = {
2024-04-24 09:43:56 -04:00
"title": item.get("title", ""),
"link": item.get("external_url", item.get("url", "")),
"description": item.get("content_html", item.get("content_text", "")),
"author_name": author_name,
"categories": item.get("tags", []),
"unique_id": str(item.get("id", item.get("url", ""))),
"pubdate": date_published,
2017-05-22 16:46:56 -07:00
}
2024-04-24 09:43:56 -04:00
2021-04-02 13:27:33 -04:00
return story