Removing gamespot.com feed from page and text fetchers.

This commit is contained in:
Samuel Clay 2015-08-03 20:09:36 -07:00
parent 78d1bfeeee
commit 22ae0e65e4
3 changed files with 11 additions and 1 deletions

View file

@ -36,6 +36,7 @@ BROKEN_PAGE_URLS = [
'stackexchange.com', 'stackexchange.com',
'twitter.com', 'twitter.com',
'rankexploits', 'rankexploits',
'gamespot.com',
] ]
class PageImporter(object): class PageImporter(object):

View file

@ -10,6 +10,10 @@ from utils.feed_functions import timelimit, TimeoutError
from OpenSSL.SSL import Error as OpenSSLError from OpenSSL.SSL import Error as OpenSSLError
from pyasn1.error import PyAsn1Error from pyasn1.error import PyAsn1Error
BROKEN_URLS = [
"gamespot.com",
]
class TextImporter: class TextImporter:
def __init__(self, story=None, feed=None, story_url=None, request=None, debug=False): def __init__(self, story=None, feed=None, story_url=None, request=None, debug=False):
@ -33,6 +37,10 @@ class TextImporter:
} }
def fetch(self, skip_save=False, return_document=False): def fetch(self, skip_save=False, return_document=False):
if any(broken_url in self.story_url for broken_url in BROKEN_URLS):
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: banned")
return
try: try:
resp = self.fetch_request() resp = self.fetch_request()
except TimeoutError: except TimeoutError:

View file

@ -195,7 +195,8 @@ NEWSBLUR.utils = {
'stackoverflow.com', 'stackoverflow.com',
'stackexchange.com', 'stackexchange.com',
'twitter.com', 'twitter.com',
'rankexploits' 'rankexploits',
'gamespot.com'
]; ];
return _.any(BROKEN_URLS, function(broken_url) { return _.any(BROKEN_URLS, function(broken_url) {
return _.string.contains(url, broken_url); return _.string.contains(url, broken_url);