NewsBlur-viq/utils/json_fetcher.py
2024-04-24 09:50:42 -04:00

62 lines
2.1 KiB
Python

import datetime
import dateutil.parser
from django.conf import settings
from django.utils import feedgenerator
from utils import log as logging
from utils.json_functions import decode
class JSONFetcher:
def __init__(self, feed, options=None):
self.feed = feed
self.options = options or {}
def fetch(self, address, raw_feed):
if not address:
address = self.feed.feed_address
json_feed = decode(raw_feed.content)
if not json_feed:
logging.debug(" ***> [%-30s] ~FRJSON fetch failed: %s" % (self.feed.log_title[:30], address))
return
data = {}
data["title"] = json_feed.get("title", "[Untitled]")
data["link"] = json_feed.get("home_page_url", "")
data["description"] = json_feed.get("title", "")
data["lastBuildDate"] = datetime.datetime.utcnow()
data["generator"] = "NewsBlur JSON Feed - %s" % settings.NEWSBLUR_URL
data["docs"] = None
data["feed_url"] = json_feed.get("feed_url")
rss = feedgenerator.Atom1Feed(**data)
for item in json_feed.get("items", []):
story_data = self.json_feed_story(item)
rss.add_item(**story_data)
return rss.writeString("utf-8")
def json_feed_story(self, item):
date_published = datetime.datetime.now()
pubdate = item.get("date_published", None)
if pubdate:
date_published = dateutil.parser.parse(pubdate)
authors = item.get("authors", item.get("author", {}))
if isinstance(authors, list):
author_name = ", ".join([author.get("name", "") for author in authors])
else:
author_name = authors.get("name", "")
story = {
"title": item.get("title", ""),
"link": item.get("external_url", item.get("url", "")),
"description": item.get("content_html", item.get("content_text", "")),
"author_name": author_name,
"categories": item.get("tags", []),
"unique_id": str(item.get("id", item.get("url", ""))),
"pubdate": date_published,
}
return story