Adding timeouts to most outbound requests.

This commit is contained in:
Samuel Clay 2020-12-06 11:37:01 -05:00
parent 88b425f515
commit 1a5d440582
6 changed files with 25 additions and 16 deletions

View file

@ -214,7 +214,7 @@ class IconImporter(object):
url = self._url_from_html(content) url = self._url_from_html(content)
if not url: if not url:
try: try:
content = requests.get(self.cleaned_feed_link).content content = requests.get(self.cleaned_feed_link, timeout=10).content
url = self._url_from_html(content) url = self._url_from_html(content)
except (AttributeError, SocketError, requests.ConnectionError, except (AttributeError, SocketError, requests.ConnectionError,
requests.models.MissingSchema, requests.sessions.InvalidSchema, requests.models.MissingSchema, requests.sessions.InvalidSchema,
@ -222,6 +222,7 @@ class IconImporter(object):
requests.models.InvalidURL, requests.models.InvalidURL,
requests.models.ChunkedEncodingError, requests.models.ChunkedEncodingError,
requests.models.ContentDecodingError, requests.models.ContentDecodingError,
requests.adapters.ReadTimeout,
httplib.IncompleteRead, httplib.IncompleteRead,
LocationParseError, OpenSSLError, PyAsn1Error, LocationParseError, OpenSSLError, PyAsn1Error,
ValueError), e: ValueError), e:

View file

@ -90,11 +90,12 @@ class PageImporter(object):
data = response.read() data = response.read()
else: else:
try: try:
response = requests.get(feed_link, headers=self.headers) response = requests.get(feed_link, headers=self.headers, timeout=10)
response.connection.close() response.connection.close()
except requests.exceptions.TooManyRedirects: except requests.exceptions.TooManyRedirects:
response = requests.get(feed_link) response = requests.get(feed_link, timeout=10)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError), e: except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError,
requests.adapters.ReadTimeout), e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e)) logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
self.save_no_page() self.save_no_page()
return return
@ -184,12 +185,18 @@ class PageImporter(object):
return return
try: try:
response = requests.get(story_permalink, headers=self.headers) response = requests.get(story_permalink, headers=self.headers, timeout=10)
response.connection.close() response.connection.close()
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e: except (AttributeError, SocketError, OpenSSLError, PyAsn1Error,
requests.exceptions.ConnectionError,
requests.exceptions.TooManyRedirects,
requests.adapters.ReadTimeout), e:
try: try:
response = requests.get(story_permalink) response = requests.get(story_permalink, timeout=10)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e: except (AttributeError, SocketError, OpenSSLError, PyAsn1Error,
requests.exceptions.ConnectionError,
requests.exceptions.TooManyRedirects,
requests.adapters.ReadTimeout), e:
logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e)) logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e))
return return
try: try:

View file

@ -201,7 +201,7 @@ class TextImporter:
url = "https://www.newsblur.com/rss_feeds/original_text_fetcher?url=%s" % url url = "https://www.newsblur.com/rss_feeds/original_text_fetcher?url=%s" % url
try: try:
r = requests.get(url, headers=headers, verify=False) r = requests.get(url, headers=headers, verify=False, timeout=15)
r.connection.close() r.connection.close()
except (AttributeError, SocketError, requests.ConnectionError, except (AttributeError, SocketError, requests.ConnectionError,
requests.models.MissingSchema, requests.sessions.InvalidSchema, requests.models.MissingSchema, requests.sessions.InvalidSchema,
@ -209,6 +209,7 @@ class TextImporter:
requests.models.InvalidURL, requests.models.InvalidURL,
requests.models.ChunkedEncodingError, requests.models.ChunkedEncodingError,
requests.models.ContentDecodingError, requests.models.ContentDecodingError,
requests.adapters.ReadTimeout,
urllib3.exceptions.LocationValueError, urllib3.exceptions.LocationValueError,
LocationParseError, OpenSSLError, PyAsn1Error), e: LocationParseError, OpenSSLError, PyAsn1Error), e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e) logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)

View file

@ -2330,7 +2330,7 @@ class MSharedStory(mongo.DynamicDocument):
for image_source in self.image_urls[:10]: for image_source in self.image_urls[:10]:
if any(ignore in image_source for ignore in IGNORE_IMAGE_SOURCES): if any(ignore in image_source for ignore in IGNORE_IMAGE_SOURCES):
continue continue
req = requests.get(image_source, headers=headers, stream=True) req = requests.get(image_source, headers=headers, stream=True, timeout=10)
try: try:
datastream = StringIO(req.content) datastream = StringIO(req.content)
width, height = ImageOps.image_size(datastream) width, height = ImageOps.image_size(datastream)
@ -2715,7 +2715,7 @@ class MSocialServices(mongo.Document):
os.remove(filename) os.remove(filename)
else: else:
api.update_status(status=message) api.update_status(status=message)
except tweepy.TweepError, e: except (tweepy.TweepError, requests.adapters.ReadError), e:
user = User.objects.get(pk=self.user_id) user = User.objects.get(pk=self.user_id)
logging.user(user, "~FRTwitter error: ~SB%s" % e) logging.user(user, "~FRTwitter error: ~SB%s" % e)
return return
@ -2730,7 +2730,7 @@ class MSocialServices(mongo.Document):
url = shared_story.image_urls[0] url = shared_story.image_urls[0]
image_filename = os.path.basename(urlparse.urlparse(url).path) image_filename = os.path.basename(urlparse.urlparse(url).path)
req = requests.get(url, stream=True) req = requests.get(url, stream=True, timeout=10)
filename = "/tmp/%s-%s" % (shared_story.story_hash, image_filename) filename = "/tmp/%s-%s" % (shared_story.story_hash, image_filename)
if req.status_code == 200: if req.status_code == 200:

View file

@ -133,10 +133,10 @@ class FetchFeed:
headers['If-Modified-Since'] = modified_header headers['If-Modified-Since'] = modified_header
if etag or modified: if etag or modified:
headers['A-IM'] = 'feed' headers['A-IM'] = 'feed'
raw_feed = requests.get(address, headers=headers) raw_feed = requests.get(address, headers=headers, timeout=15)
if raw_feed.status_code >= 400: if raw_feed.status_code >= 400:
logging.debug(" ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers)) logging.debug(" ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers))
raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True)) raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True), timeout=15)
if raw_feed.content and 'application/json' in raw_feed.headers.get('Content-Type', ""): if raw_feed.content and 'application/json' in raw_feed.headers.get('Content-Type', ""):
# JSON Feed # JSON Feed

View file

@ -38,7 +38,7 @@ class FeedFinder(object):
def get_feed(self, url, skip_user_agent=False): def get_feed(self, url, skip_user_agent=False):
try: try:
r = requests.get(url, headers={"User-Agent": self.user_agent if not skip_user_agent else None}) r = requests.get(url, headers={"User-Agent": self.user_agent if not skip_user_agent else None}, timeout=15)
except Exception as e: except Exception as e:
logging.warn("Error while getting '{0}'".format(url)) logging.warn("Error while getting '{0}'".format(url))
logging.warn("{0}".format(e)) logging.warn("{0}".format(e))
@ -162,4 +162,4 @@ if __name__ == "__main__":
print(find_feeds("dan.iel.fm", check_all=True)) print(find_feeds("dan.iel.fm", check_all=True))
print(find_feeds("kapadia.github.io")) print(find_feeds("kapadia.github.io"))
print(find_feeds("blog.jonathansick.ca")) print(find_feeds("blog.jonathansick.ca"))
print(find_feeds("asdasd")) print(find_feeds("asdasd"))