diff --git a/apps/rss_feeds/icon_importer.py b/apps/rss_feeds/icon_importer.py index 9db42f1a4..0cf594098 100644 --- a/apps/rss_feeds/icon_importer.py +++ b/apps/rss_feeds/icon_importer.py @@ -10,6 +10,7 @@ import urllib.request from io import BytesIO from socket import error as SocketError +import numpy as np import boto3 import lxml.html import numpy @@ -380,16 +381,16 @@ class IconImporter(object): # Reshape array of values to merge color bands. [[R], [G], [B], [A]] => [R, G, B, A] if len(shape) > 2: - ar = ar.reshape(scipy.product(shape[:2]), shape[2]) + ar = ar.reshape(np.product(shape[:2]), shape[2]) # Get NUM_CLUSTERS worth of centroids. - ar = ar.astype(numpy.float) + ar = ar.astype(float) codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS) # Pare centroids, removing blacks and whites and shades of really dark and really light. original_codes = codes for low, hi in [(60, 200), (35, 230), (10, 250)]: - codes = scipy.array( + codes = np.array( [ code for code in codes diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 6a4d9bde2..181795016 100755 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -46,6 +46,7 @@ from utils.feed_functions import ( levenshtein_distance, relative_timesince, seconds_timesince, + strip_underscore_from_feed_address, timelimit, ) from utils.fields import AutoOneToOneField @@ -744,7 +745,7 @@ class Feed(models.Model): logging.debug(" ---> Feed points to 'Wierdo' or 'feedsportal', ignoring.") return False, self try: - self.feed_address = feed_address + self.feed_address = strip_underscore_from_feed_address(feed_address) feed = self.save() feed.count_subscribers() # feed.schedule_feed_fetch_immediately() # Don't fetch as it can get stuck in a loop @@ -3936,7 +3937,7 @@ def merge_feeds(original_feed_id, duplicate_feed_id, force=False): original_feed, duplicate_feed = duplicate_feed, original_feed original_feed_id, duplicate_feed_id = duplicate_feed_id, original_feed_id if branched_original: - original_feed.feed_address = duplicate_feed.feed_address + original_feed.feed_address = strip_underscore_from_feed_address(duplicate_feed.feed_address) logging.info( " ---> Feed: [%s - %s] %s - %s" diff --git a/utils/feed_fetcher.py b/utils/feed_fetcher.py index 2955ac203..5b2c74701 100644 --- a/utils/feed_fetcher.py +++ b/utils/feed_fetcher.py @@ -52,7 +52,7 @@ from sentry_sdk import capture_exception, flush from utils import json_functions as json from utils import log as logging from utils.facebook_fetcher import FacebookFetcher -from utils.feed_functions import TimeoutError, timelimit +from utils.feed_functions import TimeoutError, strip_underscore_from_feed_address, timelimit from utils.json_fetcher import JSONFetcher from utils.story_functions import linkify, pre_process_story, strip_tags from utils.twitter_fetcher import TwitterFetcher @@ -66,6 +66,8 @@ from utils.youtube_fetcher import YoutubeFetcher FEED_OK, FEED_SAME, FEED_ERRPARSE, FEED_ERRHTTP, FEED_ERREXC = list(range(5)) +NO_UNDERSCORE_ADDRESSES = ["jwz"] + class FetchFeed: def __init__(self, feed_id, options): @@ -111,7 +113,7 @@ class FetchFeed: address = self.options["archive_page_link"] elif self.options.get("archive_page", None): address = qurl(address, add={self.options["archive_page_key"]: self.options["archive_page"]}) - elif address.startswith("http"): + elif address.startswith("http") and not any(item in address for item in NO_UNDERSCORE_ADDRESSES): address = qurl(address, add={"_": random.randint(0, 10000)}) logging.debug(" ---> [%-30s] ~FBForcing fetch: %s" % (self.feed.log_title[:30], address)) elif not self.feed.fetched_once or not self.feed.known_good: @@ -521,7 +523,7 @@ class ProcessFeed: address = self.fpf.href if self.options["force"] and address: address = qurl(address, remove=["_"]) - self.feed.feed_address = address + self.feed.feed_address = strip_underscore_from_feed_address(address) if not self.feed.known_good: self.feed.fetched_once = True logging.debug( diff --git a/utils/feed_functions.py b/utils/feed_functions.py index 74e84b7d5..cd00d3455 100644 --- a/utils/feed_functions.py +++ b/utils/feed_functions.py @@ -9,6 +9,7 @@ import urllib.parse import urllib.request import warnings +from qurl import qurl from django.utils.encoding import smart_str from django.utils.translation import ungettext @@ -436,3 +437,10 @@ if __name__ == "__main__": def chunks(l, n): for i in range(0, len(l), n): yield l[i : i + n] + + +def strip_underscore_from_feed_address(feed_address): + # Strip _=#### from feed_address + parsed_url = qurl(feed_address) + parsed_url.remove_query_param("_") + return parsed_url.url