mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
High volume feeds
This commit is contained in:
parent
e632f13c68
commit
66c5355eac
1 changed files with 15 additions and 3 deletions
|
|
@ -143,6 +143,8 @@ def preprocess_feed_encoding(raw_xml):
|
||||||
# http://feedjack.googlecode.com
|
# http://feedjack.googlecode.com
|
||||||
|
|
||||||
MAX_ENTRIES_TO_PROCESS = 100
|
MAX_ENTRIES_TO_PROCESS = 100
|
||||||
|
MAX_ENTRIES_HIGH_VOLUME = 250
|
||||||
|
HIGH_VOLUME_FEED_URLS = ['arxiv.org'] # Feeds that can handle more stories per fetch
|
||||||
|
|
||||||
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = list(range(5))
|
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = list(range(5))
|
||||||
|
|
||||||
|
|
@ -566,10 +568,20 @@ class ProcessFeed:
|
||||||
)
|
)
|
||||||
|
|
||||||
self.feed_entries = self.fpf.entries
|
self.feed_entries = self.fpf.entries
|
||||||
# If there are more than 100 entries, we should sort the entries in date descending order and cut them off
|
|
||||||
if len(self.feed_entries) > MAX_ENTRIES_TO_PROCESS:
|
# Check if this is a high-volume feed that can handle more stories
|
||||||
|
max_entries = MAX_ENTRIES_TO_PROCESS
|
||||||
|
feed_address_lower = self.feed.feed_address.lower()
|
||||||
|
for high_volume_url in HIGH_VOLUME_FEED_URLS:
|
||||||
|
if high_volume_url in feed_address_lower:
|
||||||
|
max_entries = MAX_ENTRIES_HIGH_VOLUME
|
||||||
|
logging.debug(f" ---> [{self.feed.log_title[:30]:<30}] High-volume feed detected ({high_volume_url}), allowing up to {max_entries} stories")
|
||||||
|
break
|
||||||
|
|
||||||
|
# If there are more than max_entries, we should sort the entries in date descending order and cut them off
|
||||||
|
if len(self.feed_entries) > max_entries:
|
||||||
self.feed_entries = sorted(self.feed_entries, key=lambda x: extract_story_date(x), reverse=True)[
|
self.feed_entries = sorted(self.feed_entries, key=lambda x: extract_story_date(x), reverse=True)[
|
||||||
:MAX_ENTRIES_TO_PROCESS
|
:max_entries
|
||||||
]
|
]
|
||||||
|
|
||||||
if not self.options.get("archive_page", None):
|
if not self.options.get("archive_page", None):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue