Fixing issue with both requests and urllib fallback.

This commit is contained in:
Samuel Clay 2021-04-02 12:51:48 -04:00
parent 29238e3a67
commit 918ddb5200

View file

@ -6,6 +6,7 @@ import time
import urllib.request, urllib.error, urllib.parse import urllib.request, urllib.error, urllib.parse
import http.client import http.client
import zlib import zlib
from django.contrib.sites.models import Site
from mongoengine.queryset import NotUniqueError from mongoengine.queryset import NotUniqueError
from socket import error as SocketError from socket import error as SocketError
from boto.s3.key import Key from boto.s3.key import Key
@ -87,7 +88,7 @@ class PageImporter(object):
request = urllib.request.Request(feed_link, headers=self.headers) request = urllib.request.Request(feed_link, headers=self.headers)
response = urllib.request.urlopen(request) response = urllib.request.urlopen(request)
time.sleep(0.01) # Grrr, GIL. time.sleep(0.01) # Grrr, GIL.
data = response.read() data = response.read().decode(response.headers.get_content_charset() or 'utf-8')
else: else:
try: try:
response = requests.get(feed_link, headers=self.headers, timeout=10) response = requests.get(feed_link, headers=self.headers, timeout=10)
@ -99,10 +100,7 @@ class PageImporter(object):
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e)) logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
self.save_no_page(reason="Page fetch failed") self.save_no_page(reason="Page fetch failed")
return return
# try:
data = response.text data = response.text
# except (LookupError, TypeError):
# data = response.content
if response.encoding and response.encoding.lower() != 'utf-8': if response.encoding and response.encoding.lower() != 'utf-8':
logging.debug(f" -> ~FBEncoding is {response.encoding}, re-encoding...") logging.debug(f" -> ~FBEncoding is {response.encoding}, re-encoding...")
try: try:
@ -110,11 +108,6 @@ class PageImporter(object):
except (LookupError, UnicodeEncodeError): except (LookupError, UnicodeEncodeError):
logging.debug(f" -> ~FRRe-encoding failed!") logging.debug(f" -> ~FRRe-encoding failed!")
pass pass
# if response.encoding and response.encoding != 'utf-8':
# try:
# data = data.encode(response.encoding)
# except LookupError:
# pass
else: else:
try: try:
data = open(feed_link, 'r').read() data = open(feed_link, 'r').read()