From c7f4cbb13d5be6d8fc638d26c38dc62a53584976 Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Sun, 24 Jan 2021 21:05:22 -0500 Subject: [PATCH] Fixing a substantial number of encoding errors. Also fixing OPML uploads. --- apps/feed_import/models.py | 3 +++ apps/reader/managers.py | 5 ++--- apps/rss_feeds/models.py | 2 +- apps/rss_feeds/page_importer.py | 17 ++++++++++------- config/mongodb.prod.conf | 4 ++++ utils/exception_middleware.py | 4 ++-- utils/feed_functions.py | 13 ++++++++----- 7 files changed, 30 insertions(+), 18 deletions(-) diff --git a/apps/feed_import/models.py b/apps/feed_import/models.py index fe75e61b8..eb2dfba75 100644 --- a/apps/feed_import/models.py +++ b/apps/feed_import/models.py @@ -188,6 +188,9 @@ class OPMLImporter(Importer): feed=feed_db, user=self.user) except UserSubscription.DoesNotExist: + us = None + + if not us: us = UserSubscription( feed=feed_db, user=self.user, diff --git a/apps/reader/managers.py b/apps/reader/managers.py index c808a4ad7..697fda287 100644 --- a/apps/reader/managers.py +++ b/apps/reader/managers.py @@ -8,7 +8,7 @@ class UserSubscriptionManager(models.Manager): def get(self, *args, **kwargs): try: return super(UserSubscriptionManager, self).get(*args, **kwargs) - except self.model.DoesNotExist: + except self.model.DoesNotExist as exception: if isinstance(kwargs.get('feed'), int): feed_id = kwargs.get('feed') elif 'feed' in kwargs: @@ -32,5 +32,4 @@ class UserSubscriptionManager(models.Manager): logging.debug(" ---> [%s] ~BRFound dupe UserSubscription: ~SB%s (%s)" % (user and user.username, feed, feed_id)) return super(UserSubscriptionManager, self).get(*args, **kwargs) else: - exc_info = sys.exc_info() - raise exc_info[1] + raise exception diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 1c5ee6962..18150c074 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -365,7 +365,7 @@ class Feed(models.Model): return feed, False except cls.DoesNotExist: feed = cls(**defaults) - feed.save() + feed = feed.save() return feed, True @classmethod diff --git a/apps/rss_feeds/page_importer.py b/apps/rss_feeds/page_importer.py index 4e2904e25..f6247139c 100644 --- a/apps/rss_feeds/page_importer.py +++ b/apps/rss_feeds/page_importer.py @@ -199,15 +199,18 @@ class PageImporter(object): requests.adapters.ReadTimeout) as e: logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e)) return - try: - data = response.text - except (LookupError, TypeError): - data = response.content + # try: + data = response.text + # except (LookupError, TypeError): + # data = response.content + # import pdb; pdb.set_trace() - if response.encoding and response.encoding != 'utf-8': + if response.encoding and response.encoding.lower() != 'utf-8': + logging.debug(f" -> ~FBEncoding is {response.encoding}, re-encoding...") try: - data = data.encode(response.encoding) + data = data.encode('utf-8').decode('utf-8') except (LookupError, UnicodeEncodeError): + logging.debug(f" -> ~FRRe-encoding failed!") pass if data: @@ -221,7 +224,7 @@ class PageImporter(object): return html def save_story(self, html): - self.story.original_page_z = zlib.compress(html) + self.story.original_page_z = zlib.compress(html.encode('utf-8')) try: self.story.save() except NotUniqueError: diff --git a/config/mongodb.prod.conf b/config/mongodb.prod.conf index 331026c41..7e6730ac9 100644 --- a/config/mongodb.prod.conf +++ b/config/mongodb.prod.conf @@ -88,5 +88,9 @@ noauth = true #master = true #source = slave.example.com +# Mongo 3.6 requires binding to non-localhost, but because we use ufw for a firewall, +# allow all IPs to connect +net.bindIpAll = true + # in replica set configuration, specify the name of the replica set replSet = nbset diff --git a/utils/exception_middleware.py b/utils/exception_middleware.py index 859cb7359..de282de65 100644 --- a/utils/exception_middleware.py +++ b/utils/exception_middleware.py @@ -9,7 +9,7 @@ class ConsoleExceptionMiddleware: print("######################## Exception #############################") print(('\n'.join(traceback.format_exception(*(exc_info or sys.exc_info()))))) print("----------------------------------------------------------------") - pprint(inspect.trace()[-1][0].f_locals) + # pprint(inspect.trace()[-1][0].f_locals) print("################################################################") #pprint(request) @@ -22,4 +22,4 @@ class ConsoleExceptionMiddleware: response = self.get_response(request) - return response \ No newline at end of file + return response diff --git a/utils/feed_functions.py b/utils/feed_functions.py index dc5c19476..6482c8180 100644 --- a/utils/feed_functions.py +++ b/utils/feed_functions.py @@ -23,6 +23,7 @@ def timelimit(timeout): threading.Thread.__init__(self) self.result = None self.error = None + self.exc_info = None self.setDaemon(True) self.start() @@ -30,17 +31,19 @@ def timelimit(timeout): def run(self): try: self.result = function(*args, **kw) - except: - self.error = sys.exc_info() + except BaseException as e: + self.error = e + self.exc_info = sys.exc_info() c = Dispatch() + dispatch = c c.join(timeout) if c.is_alive(): raise TimeoutError('took too long') if c.error: - tb = ''.join(traceback.format_exception(c.error[0], c.error[1], c.error[2])) + tb = ''.join(traceback.format_exception(c.exc_info[0], c.exc_info[1], c.exc_info[2])) logging.debug(tb) - mail_admins('Error in timeout: %s' % c.error[0], tb) - raise c.error[0](c.error[1]).with_traceback(c.error[2]) + mail_admins('Error in timeout: %s' % c.exc_info[0], tb) + raise c.error return c.result return _2 return _1