mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
High volume feeds
This commit is contained in:
parent
e632f13c68
commit
66c5355eac
1 changed files with 15 additions and 3 deletions
|
@ -143,6 +143,8 @@ def preprocess_feed_encoding(raw_xml):
|
|||
# http://feedjack.googlecode.com
|
||||
|
||||
MAX_ENTRIES_TO_PROCESS = 100
|
||||
MAX_ENTRIES_HIGH_VOLUME = 250
|
||||
HIGH_VOLUME_FEED_URLS = ['arxiv.org'] # Feeds that can handle more stories per fetch
|
||||
|
||||
FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = list(range(5))
|
||||
|
||||
|
@ -566,10 +568,20 @@ class ProcessFeed:
|
|||
)
|
||||
|
||||
self.feed_entries = self.fpf.entries
|
||||
# If there are more than 100 entries, we should sort the entries in date descending order and cut them off
|
||||
if len(self.feed_entries) > MAX_ENTRIES_TO_PROCESS:
|
||||
|
||||
# Check if this is a high-volume feed that can handle more stories
|
||||
max_entries = MAX_ENTRIES_TO_PROCESS
|
||||
feed_address_lower = self.feed.feed_address.lower()
|
||||
for high_volume_url in HIGH_VOLUME_FEED_URLS:
|
||||
if high_volume_url in feed_address_lower:
|
||||
max_entries = MAX_ENTRIES_HIGH_VOLUME
|
||||
logging.debug(f" ---> [{self.feed.log_title[:30]:<30}] High-volume feed detected ({high_volume_url}), allowing up to {max_entries} stories")
|
||||
break
|
||||
|
||||
# If there are more than max_entries, we should sort the entries in date descending order and cut them off
|
||||
if len(self.feed_entries) > max_entries:
|
||||
self.feed_entries = sorted(self.feed_entries, key=lambda x: extract_story_date(x), reverse=True)[
|
||||
:MAX_ENTRIES_TO_PROCESS
|
||||
:max_entries
|
||||
]
|
||||
|
||||
if not self.options.get("archive_page", None):
|
||||
|
|
Loading…
Add table
Reference in a new issue