Adding timeouts to most outbound requests.

2025-08-05 16:58:59 +00:00 · 2020-12-06 11:37:01 -05:00 · 2020-12-06 11:37:01 -05:00 · 1a5d440582
commit 1a5d440582
parent 88b425f515
6 changed files with 25 additions and 16 deletions
--- a/apps/rss_feeds/icon_importer.py
+++ b/apps/rss_feeds/icon_importer.py
@ -214,7 +214,7 @@ class IconImporter(object):
        url = self._url_from_html(content)
        if not url:
            try:
-                content = requests.get(self.cleaned_feed_link).content
+                content = requests.get(self.cleaned_feed_link, timeout=10).content
                url = self._url_from_html(content)
            except (AttributeError, SocketError, requests.ConnectionError,
                    requests.models.MissingSchema, requests.sessions.InvalidSchema,
@ -222,6 +222,7 @@ class IconImporter(object):
                    requests.models.InvalidURL,
                    requests.models.ChunkedEncodingError,
                    requests.models.ContentDecodingError,
                    requests.adapters.ReadTimeout,
                    httplib.IncompleteRead,
                    LocationParseError, OpenSSLError, PyAsn1Error,
                    ValueError), e:
--- a/apps/rss_feeds/page_importer.py
+++ b/apps/rss_feeds/page_importer.py
@ -90,11 +90,12 @@ class PageImporter(object):
                    data = response.read()
                else:
                    try:
-                        response = requests.get(feed_link, headers=self.headers)
+                        response = requests.get(feed_link, headers=self.headers, timeout=10)
                        response.connection.close()
                    except requests.exceptions.TooManyRedirects:
-                        response = requests.get(feed_link)
+                        response = requests.get(feed_link, timeout=10)
-                    except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError), e:
+                    except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError,
                            requests.adapters.ReadTimeout), e:
                        logging.debug('   ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
                        self.save_no_page()
                        return
@ -184,12 +185,18 @@ class PageImporter(object):
            return
        try:
-            response = requests.get(story_permalink, headers=self.headers)
+            response = requests.get(story_permalink, headers=self.headers, timeout=10)
            response.connection.close()
-        except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e:
+        except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, 
                requests.exceptions.ConnectionError, 
                requests.exceptions.TooManyRedirects,
                requests.adapters.ReadTimeout), e:
            try:
-                response = requests.get(story_permalink)
+                response = requests.get(story_permalink, timeout=10)
-            except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e:
+            except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, 
                    requests.exceptions.ConnectionError, 
                    requests.exceptions.TooManyRedirects,
                    requests.adapters.ReadTimeout), e:
                logging.debug('   ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e))
                return
        try:
--- a/apps/rss_feeds/text_importer.py
+++ b/apps/rss_feeds/text_importer.py
@ -201,7 +201,7 @@ class TextImporter:
                url = "https://www.newsblur.com/rss_feeds/original_text_fetcher?url=%s" % url
        try:
-            r = requests.get(url, headers=headers, verify=False)
+            r = requests.get(url, headers=headers, verify=False, timeout=15)
            r.connection.close()
        except (AttributeError, SocketError, requests.ConnectionError,
                requests.models.MissingSchema, requests.sessions.InvalidSchema,
@ -209,6 +209,7 @@ class TextImporter:
                requests.models.InvalidURL,
                requests.models.ChunkedEncodingError,
                requests.models.ContentDecodingError,
                requests.adapters.ReadTimeout,
                urllib3.exceptions.LocationValueError,
                LocationParseError, OpenSSLError, PyAsn1Error), e:
            logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
--- a/apps/social/models.py
+++ b/apps/social/models.py
@ -2330,7 +2330,7 @@ class MSharedStory(mongo.DynamicDocument):
        for image_source in self.image_urls[:10]:
            if any(ignore in image_source for ignore in IGNORE_IMAGE_SOURCES):
                continue
-            req = requests.get(image_source, headers=headers, stream=True)
+            req = requests.get(image_source, headers=headers, stream=True, timeout=10)
            try:
                datastream = StringIO(req.content)
                width, height = ImageOps.image_size(datastream)
@ -2715,7 +2715,7 @@ class MSocialServices(mongo.Document):
                os.remove(filename)
            else:
                api.update_status(status=message)
-        except tweepy.TweepError, e:
+        except (tweepy.TweepError, requests.adapters.ReadError), e:
            user = User.objects.get(pk=self.user_id)
            logging.user(user, "~FRTwitter error: ~SB%s" % e)
            return
@ -2730,7 +2730,7 @@ class MSocialServices(mongo.Document):
        url = shared_story.image_urls[0]
        image_filename = os.path.basename(urlparse.urlparse(url).path)
-        req = requests.get(url, stream=True)
+        req = requests.get(url, stream=True, timeout=10)
        filename = "/tmp/%s-%s" % (shared_story.story_hash, image_filename)
        if req.status_code == 200:
--- a/utils/feed_fetcher.py
+++ b/utils/feed_fetcher.py
@ -133,10 +133,10 @@ class FetchFeed:
                    headers['If-Modified-Since'] = modified_header
                if etag or modified:
                    headers['A-IM'] = 'feed'
-                raw_feed = requests.get(address, headers=headers)
+                raw_feed = requests.get(address, headers=headers, timeout=15)
                if raw_feed.status_code >= 400:
                    logging.debug("   ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers))
-                    raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True))
+                    raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True), timeout=15)
                if raw_feed.content and 'application/json' in raw_feed.headers.get('Content-Type', ""):
                    # JSON Feed
--- a/utils/feedfinder_forman.py
+++ b/utils/feedfinder_forman.py
@ -38,7 +38,7 @@ class FeedFinder(object):
    def get_feed(self, url, skip_user_agent=False):
        try:
-            r = requests.get(url, headers={"User-Agent": self.user_agent if not skip_user_agent else None})
+            r = requests.get(url, headers={"User-Agent": self.user_agent if not skip_user_agent else None}, timeout=15)
        except Exception as e:
            logging.warn("Error while getting '{0}'".format(url))
            logging.warn("{0}".format(e))
@ -162,4 +162,4 @@ if __name__ == "__main__":
    print(find_feeds("dan.iel.fm", check_all=True))
    print(find_feeds("kapadia.github.io"))
    print(find_feeds("blog.jonathansick.ca"))
-    print(find_feeds("asdasd"))
+    print(find_feeds("asdasd"))