From 5c1adfd403eb0595c652bba1c48e84845a9f7a8b Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Thu, 31 Oct 2024 10:52:44 -0700 Subject: [PATCH 1/2] Don't guess RSS feed urls on openrss/feedburner domains. --- utils/feedfinder_forman.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/feedfinder_forman.py b/utils/feedfinder_forman.py index 9ee7b34ee..627278e38 100755 --- a/utils/feedfinder_forman.py +++ b/utils/feedfinder_forman.py @@ -133,8 +133,9 @@ def find_feeds(url, check_all=False, user_agent=None): return sort_urls(urls) # Guessing potential URLs. - fns = ["atom.xml", "index.atom", "index.rdf", "rss.xml", "index.xml", "index.rss", "index.json"] - urls += list(filter(finder.is_feed, [urlparse.urljoin(url, f) for f in fns])) + if not any(ignored_domain in url for ignored_domain in ["openrss", "feedburner"]): + fns = ["atom.xml", "index.atom", "index.rdf", "rss.xml", "index.xml", "index.rss", "index.json"] + urls += list(filter(finder.is_feed, [urlparse.urljoin(url, f) for f in fns])) return sort_urls(urls) From f4d559187b73c69c68c5af2ba2729b1045393539 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Fri, 1 Nov 2024 10:16:11 -0700 Subject: [PATCH 2/2] Handling no content when finding feeds. --- utils/feedfinder_forman.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/feedfinder_forman.py b/utils/feedfinder_forman.py index 627278e38..8927cd76a 100755 --- a/utils/feedfinder_forman.py +++ b/utils/feedfinder_forman.py @@ -44,7 +44,7 @@ class FeedFinder(object): logging.warn("Error while getting '{0}'".format(url)) logging.warn("{0}".format(e)) return None - if not skip_user_agent and r.status_code == 403: + if not skip_user_agent and r.status_code in [403, 204]: return self.get_feed(url, skip_user_agent=True) return r.text