mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
Fixing bug where publisher-disabled original pages were not properly disabled.
This commit is contained in:
parent
5216fa88b0
commit
9d379377d0
3 changed files with 23 additions and 21 deletions
|
@ -34,14 +34,6 @@ from utils.diff import HTMLDiff
|
|||
|
||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
||||
|
||||
# Also change in reader_utils.js.
|
||||
BROKEN_PAGE_URLS = [
|
||||
'nytimes.com',
|
||||
'stackoverflow.com',
|
||||
'stackexchange.com',
|
||||
'twitter.com',
|
||||
'rankexploits',
|
||||
]
|
||||
|
||||
class Feed(models.Model):
|
||||
feed_address = models.URLField(max_length=255, db_index=True)
|
||||
|
@ -139,12 +131,9 @@ class Feed(models.Model):
|
|||
feed['exception_type'] = None
|
||||
feed['exception_code'] = self.exception_code
|
||||
|
||||
if self.feed_link:
|
||||
for broken_page in BROKEN_PAGE_URLS:
|
||||
if broken_page in self.feed_link:
|
||||
feed['disabled_page'] = True
|
||||
break
|
||||
|
||||
if not self.has_page:
|
||||
feed['disabled_page'] = True
|
||||
print feed
|
||||
if full:
|
||||
feed['feed_tags'] = json.decode(self.data.popular_tags) if self.data.popular_tags else []
|
||||
feed['feed_authors'] = json.decode(self.data.popular_authors) if self.data.popular_authors else []
|
||||
|
|
|
@ -19,6 +19,15 @@ BROKEN_PAGES = [
|
|||
'[]',
|
||||
]
|
||||
|
||||
# Also change in reader_utils.js.
|
||||
BROKEN_PAGE_URLS = [
|
||||
'nytimes.com',
|
||||
'stackoverflow.com',
|
||||
'stackexchange.com',
|
||||
'twitter.com',
|
||||
'rankexploits',
|
||||
]
|
||||
|
||||
class PageImporter(object):
|
||||
|
||||
def __init__(self, feed):
|
||||
|
@ -47,11 +56,17 @@ class PageImporter(object):
|
|||
if not feed_link:
|
||||
self.save_no_page()
|
||||
return
|
||||
|
||||
|
||||
if feed_link.startswith('www'):
|
||||
self.feed.feed_link = 'http://' + feed_link
|
||||
try:
|
||||
if feed_link.startswith('www'):
|
||||
self.feed.feed_link = 'http://' + feed_link
|
||||
if feed_link.startswith('http'):
|
||||
if any(feed_link.startswith(s) for s in BROKEN_PAGES):
|
||||
self.save_no_page()
|
||||
return
|
||||
elif any(s in feed_link.lower() for s in BROKEN_PAGE_URLS):
|
||||
self.save_no_page()
|
||||
return
|
||||
elif feed_link.startswith('http'):
|
||||
if urllib_fallback:
|
||||
request = urllib2.Request(feed_link, headers=self.headers)
|
||||
response = urllib2.urlopen(request)
|
||||
|
@ -66,9 +81,6 @@ class PageImporter(object):
|
|||
data = response.text
|
||||
except (LookupError, TypeError):
|
||||
data = response.content
|
||||
elif any(feed_link.startswith(s) for s in BROKEN_PAGES):
|
||||
self.save_no_page()
|
||||
return
|
||||
else:
|
||||
try:
|
||||
data = open(feed_link, 'r').read()
|
||||
|
@ -112,6 +124,7 @@ class PageImporter(object):
|
|||
return html
|
||||
|
||||
def save_no_page(self):
|
||||
logging.debug(' --->> [%-30s] ~FYNo original page: %s' % (self.feed, self.feed.feed_link))
|
||||
self.feed.has_page = False
|
||||
self.feed.save()
|
||||
self.feed.save_page_history(404, "Feed has no original page.")
|
||||
|
|
BIN
dump.rdb
BIN
dump.rdb
Binary file not shown.
Loading…
Add table
Reference in a new issue