Removing gamespot.com feed from page and text fetchers.

This commit is contained in:
Samuel Clay 2015-08-03 20:09:36 -07:00
parent 78d1bfeeee
commit 22ae0e65e4
3 changed files with 11 additions and 1 deletions

View file

@ -36,6 +36,7 @@ BROKEN_PAGE_URLS = [
'stackexchange.com',
'twitter.com',
'rankexploits',
'gamespot.com',
]
class PageImporter(object):

View file

@ -10,6 +10,10 @@ from utils.feed_functions import timelimit, TimeoutError
from OpenSSL.SSL import Error as OpenSSLError
from pyasn1.error import PyAsn1Error
BROKEN_URLS = [
"gamespot.com",
]
class TextImporter:
def __init__(self, story=None, feed=None, story_url=None, request=None, debug=False):
@ -33,6 +37,10 @@ class TextImporter:
}
def fetch(self, skip_save=False, return_document=False):
if any(broken_url in self.story_url for broken_url in BROKEN_URLS):
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: banned")
return
try:
resp = self.fetch_request()
except TimeoutError:

View file

@ -195,7 +195,8 @@ NEWSBLUR.utils = {
'stackoverflow.com',
'stackexchange.com',
'twitter.com',
'rankexploits'
'rankexploits',
'gamespot.com'
];
return _.any(BROKEN_URLS, function(broken_url) {
return _.string.contains(url, broken_url);