NewsBlur/apps/rss_feeds/management/commands/trim_feeds.py

41 lines
1.3 KiB
Python
Raw Normal View History

2024-04-24 09:50:42 -04:00
import gc
2010-01-26 19:59:43 -05:00
from django.core.management.base import BaseCommand
2024-04-24 09:50:42 -04:00
2010-01-26 19:59:43 -05:00
from apps.rss_feeds.models import Feed
2024-04-24 09:43:56 -04:00
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument("-f", "--feed", dest="feed", default=None),
2010-01-26 19:59:43 -05:00
def handle(self, *args, **options):
2024-04-24 09:43:56 -04:00
if not options["feed"]:
feeds = Feed.objects.filter(fetched_once=True, active_subscribers=0, premium_subscribers=0)
else:
2024-04-24 09:43:56 -04:00
feeds = Feed.objects.filter(feed_id=options["feed"])
for f in queryset_iterator(feeds):
f.trim_feed(verbose=True)
2024-04-24 09:43:56 -04:00
def queryset_iterator(queryset, chunksize=100):
2024-04-24 09:43:56 -04:00
"""
Iterate over a Django Queryset ordered by the primary key
This method loads a maximum of chunksize (default: 1000) rows in it's
memory at the same time while django normally would load all rows in it's
memory. Using the iterator() method only causes it to not preload all the
classes.
Note that the implementation of the iterator does not support ordered query sets.
2024-04-24 09:43:56 -04:00
"""
last_pk = queryset.order_by("-pk")[0].pk
queryset = queryset.order_by("pk")
pk = queryset[0].pk
while pk < last_pk:
for row in queryset.filter(pk__gte=pk, pk__lte=last_pk)[:chunksize]:
yield row
pk += chunksize
2024-04-24 09:43:56 -04:00
gc.collect()