mirror of
https://github.com/viq/NewsBlur.git
synced 2025-09-18 21:43:31 +00:00
2to3 apps/rss_feeds
This commit is contained in:
parent
2d62e80344
commit
6021afaec3
17 changed files with 269 additions and 178 deletions
|
@ -1,20 +1,20 @@
|
|||
import urllib2
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import lxml.html
|
||||
import numpy
|
||||
import scipy
|
||||
import scipy.misc
|
||||
import scipy.cluster
|
||||
import urlparse
|
||||
import urllib.parse
|
||||
import struct
|
||||
import operator
|
||||
import gzip
|
||||
import datetime
|
||||
import requests
|
||||
import httplib
|
||||
import http.client
|
||||
from PIL import BmpImagePlugin, PngImagePlugin, Image
|
||||
from socket import error as SocketError
|
||||
from boto.s3.key import Key
|
||||
from StringIO import StringIO
|
||||
from io import StringIO
|
||||
from django.conf import settings
|
||||
from apps.rss_feeds.models import MFeedPage, MFeedIcon
|
||||
from utils.facebook_fetcher import FacebookFetcher
|
||||
|
@ -127,7 +127,7 @@ class IconImporter(object):
|
|||
try:
|
||||
image_file.seek(0)
|
||||
header = struct.unpack('<3H', image_file.read(6))
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
return
|
||||
|
||||
# Check magic
|
||||
|
@ -136,9 +136,9 @@ class IconImporter(object):
|
|||
|
||||
# Collect icon directories
|
||||
directories = []
|
||||
for i in xrange(header[2]):
|
||||
for i in range(header[2]):
|
||||
directory = list(struct.unpack('<4B2H2I', image_file.read(16)))
|
||||
for j in xrange(3):
|
||||
for j in range(3):
|
||||
if not directory[j]:
|
||||
directory[j] = 256
|
||||
|
||||
|
@ -222,9 +222,9 @@ class IconImporter(object):
|
|||
requests.models.InvalidURL,
|
||||
requests.models.ChunkedEncodingError,
|
||||
requests.models.ContentDecodingError,
|
||||
httplib.IncompleteRead,
|
||||
http.client.IncompleteRead,
|
||||
LocationParseError, OpenSSLError, PyAsn1Error,
|
||||
ValueError), e:
|
||||
ValueError) as e:
|
||||
logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
|
||||
if url:
|
||||
image, image_file = self.get_image_from_url(url)
|
||||
|
@ -244,7 +244,7 @@ class IconImporter(object):
|
|||
url = self.feed_icon.icon_url
|
||||
if not url and self.feed.feed_link and len(self.feed.feed_link) > 6:
|
||||
try:
|
||||
url = urlparse.urljoin(self.feed.feed_link, 'favicon.ico')
|
||||
url = urllib.parse.urljoin(self.feed.feed_link, 'favicon.ico')
|
||||
except ValueError:
|
||||
url = None
|
||||
if not url:
|
||||
|
@ -252,7 +252,7 @@ class IconImporter(object):
|
|||
|
||||
image, image_file = self.get_image_from_url(url)
|
||||
if not image:
|
||||
url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
|
||||
url = urllib.parse.urljoin(self.feed.feed_link, '/favicon.ico')
|
||||
image, image_file = self.get_image_from_url(url)
|
||||
# print 'Found: %s - %s' % (url, image)
|
||||
return image, image_file, url
|
||||
|
@ -262,7 +262,7 @@ class IconImporter(object):
|
|||
url = facebook_fetcher.favicon_url()
|
||||
image, image_file = self.get_image_from_url(url)
|
||||
if not image:
|
||||
url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
|
||||
url = urllib.parse.urljoin(self.feed.feed_link, '/favicon.ico')
|
||||
image, image_file = self.get_image_from_url(url)
|
||||
# print 'Found: %s - %s' % (url, image)
|
||||
return image, image_file, url
|
||||
|
@ -288,8 +288,8 @@ class IconImporter(object):
|
|||
'Accept': 'image/png,image/x-icon,image/*;q=0.9,*/*;q=0.8'
|
||||
}
|
||||
try:
|
||||
request = urllib2.Request(url, headers=headers)
|
||||
icon = urllib2.urlopen(request).read()
|
||||
request = urllib.request.Request(url, headers=headers)
|
||||
icon = urllib.request.urlopen(request).read()
|
||||
except Exception:
|
||||
return None
|
||||
return icon
|
||||
|
@ -311,7 +311,7 @@ class IconImporter(object):
|
|||
if not content:
|
||||
return url
|
||||
try:
|
||||
if isinstance(content, unicode):
|
||||
if isinstance(content, str):
|
||||
content = content.encode('utf-8')
|
||||
icon_path = lxml.html.fromstring(content).xpath(
|
||||
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
|
||||
|
@ -323,7 +323,7 @@ class IconImporter(object):
|
|||
if str(icon_path[0]).startswith('http'):
|
||||
url = icon_path[0]
|
||||
else:
|
||||
url = urlparse.urljoin(self.feed.feed_link, icon_path[0])
|
||||
url = urllib.parse.urljoin(self.feed.feed_link, icon_path[0])
|
||||
return url
|
||||
|
||||
def normalize_image(self, image):
|
||||
|
|
|
@ -37,12 +37,12 @@ class Command(BaseCommand):
|
|||
usersubs = UserSubscription.objects.filter(user=u, active=True)
|
||||
else:
|
||||
usersubs = UserSubscription.objects.filter(user=u, needs_unread_recalc=True)
|
||||
print " ---> %s has %s feeds (%s/%s)" % (u.username, usersubs.count(), i+1, user_count)
|
||||
print(" ---> %s has %s feeds (%s/%s)" % (u.username, usersubs.count(), i+1, user_count))
|
||||
for sub in usersubs:
|
||||
try:
|
||||
sub.calculate_feed_scores(silent=options['silent'])
|
||||
except Exception, e:
|
||||
print " ***> Exception: %s" % e
|
||||
except Exception as e:
|
||||
print(" ***> Exception: %s" % e)
|
||||
continue
|
||||
|
||||
def daemonize():
|
||||
|
@ -56,12 +56,12 @@ def daemonize():
|
|||
os.setsid()
|
||||
if os.fork(): # launch child and...
|
||||
os._exit(0) # kill off parent again.
|
||||
os.umask(077)
|
||||
os.umask(0o77)
|
||||
null = os.open("/dev/null", os.O_RDWR)
|
||||
for i in range(3):
|
||||
try:
|
||||
os.dup2(null, i)
|
||||
except OSError, e:
|
||||
except OSError as e:
|
||||
if e.errno != errno.EBADF:
|
||||
raise
|
||||
os.close(null)
|
|
@ -20,4 +20,4 @@ class Command(BaseCommand):
|
|||
for feed in feeds:
|
||||
feed.count_stories(verbose=options['verbose'])
|
||||
|
||||
print "\nCounted %s feeds" % feeds.count()
|
||||
print("\nCounted %s feeds" % feeds.count())
|
|
@ -19,16 +19,16 @@ class Command(BaseCommand):
|
|||
|
||||
feeds_count = feeds.count()
|
||||
|
||||
for i in xrange(0, feeds_count, 100):
|
||||
for i in range(0, feeds_count, 100):
|
||||
feeds = Feed.objects.all()[i:i+100]
|
||||
for feed in feeds.iterator():
|
||||
feed.count_subscribers(verbose=options['verbose'])
|
||||
|
||||
if options['delete']:
|
||||
print "# Deleting old feeds..."
|
||||
print("# Deleting old feeds...")
|
||||
old_feeds = Feed.objects.filter(num_subscribers=0)
|
||||
for feed in old_feeds:
|
||||
feed.count_subscribers(verbose=True)
|
||||
if feed.num_subscribers == 0:
|
||||
print ' ---> Deleting: [%s] %s' % (feed.pk, feed)
|
||||
print(' ---> Deleting: [%s] %s' % (feed.pk, feed))
|
||||
feed.delete()
|
|
@ -16,7 +16,7 @@ class Command(BaseCommand):
|
|||
elif options['username']:
|
||||
user = User.objects.get(username__icontains=options['username'])
|
||||
else:
|
||||
raise Exception, "Need username or user id."
|
||||
raise Exception("Need username or user id.")
|
||||
|
||||
user.profile.last_seen_on = datetime.datetime.utcnow()
|
||||
user.profile.save()
|
||||
|
|
|
@ -39,10 +39,10 @@ class Command(BaseCommand):
|
|||
feeds = Feed.objects.filter(next_scheduled_update__lte=now,
|
||||
average_stories_per_month__lt=options['skip'],
|
||||
active=True)
|
||||
print " ---> Skipping %s feeds" % feeds.count()
|
||||
print(" ---> Skipping %s feeds" % feeds.count())
|
||||
for feed in feeds:
|
||||
feed.set_next_scheduled_update()
|
||||
print '.',
|
||||
print('.', end=' ')
|
||||
return
|
||||
|
||||
socket.setdefaulttimeout(options['timeout'])
|
||||
|
@ -82,5 +82,5 @@ class Command(BaseCommand):
|
|||
|
||||
django.db.connection.close()
|
||||
|
||||
print " ---> Fetching %s feeds..." % feeds.count()
|
||||
print(" ---> Fetching %s feeds..." % feeds.count())
|
||||
disp.run_jobs()
|
||||
|
|
|
@ -56,11 +56,11 @@ class Command(BaseCommand):
|
|||
execution_time = time.time() - starttime
|
||||
raw_sql = self.db.ops.last_executed_query(self.cursor, sql, params)
|
||||
if sqlparse:
|
||||
print(sqlparse.format(raw_sql, reindent=True))
|
||||
print((sqlparse.format(raw_sql, reindent=True)))
|
||||
else:
|
||||
print(raw_sql)
|
||||
print("")
|
||||
print('Execution time: %.6fs [Database: %s]' % (execution_time, self.db.alias))
|
||||
print(('Execution time: %.6fs [Database: %s]' % (execution_time, self.db.alias)))
|
||||
print("")
|
||||
|
||||
util.CursorDebugWrapper = PrintQueryWrapper
|
||||
|
@ -154,7 +154,7 @@ class Command(BaseCommand):
|
|||
except ImportError:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print(self.style.ERROR("Could not load '%s' Python environment." % SETTINGS_SHELL_PLUS))
|
||||
print((self.style.ERROR("Could not load '%s' Python environment." % SETTINGS_SHELL_PLUS)))
|
||||
else:
|
||||
for shell_name, func in shells:
|
||||
try:
|
||||
|
@ -166,5 +166,5 @@ class Command(BaseCommand):
|
|||
else:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print(self.style.ERROR("Could not load any interactive Python environment."))
|
||||
print((self.style.ERROR("Could not load any interactive Python environment.")))
|
||||
|
||||
|
|
|
@ -4,4 +4,4 @@ import redis
|
|||
from apps.social.models import *
|
||||
|
||||
r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
|
||||
print "Redis: %s" % r
|
||||
print("Redis: %s" % r)
|
|
@ -21,7 +21,7 @@ def import_objects(options, style):
|
|||
|
||||
model_aliases = getattr(settings, 'SHELL_PLUS_MODEL_ALIASES', {})
|
||||
|
||||
for app_mod in apps.app_configs.items():
|
||||
for app_mod in list(apps.app_configs.items()):
|
||||
app_models = apps.get_models(app_mod)
|
||||
if not app_models:
|
||||
continue
|
||||
|
@ -50,9 +50,9 @@ def import_objects(options, style):
|
|||
|
||||
except AttributeError as e:
|
||||
if not quiet_load:
|
||||
print(style.ERROR("Failed to import '%s' from '%s' reason: %s" % (model.__name__, app_name, str(e))))
|
||||
print((style.ERROR("Failed to import '%s' from '%s' reason: %s" % (model.__name__, app_name, str(e)))))
|
||||
continue
|
||||
if not quiet_load:
|
||||
print(style.SQL_COLTYPE("From '%s' autoload: %s" % (app_mod.__name__.split('.')[-2], ", ".join(model_labels))))
|
||||
print((style.SQL_COLTYPE("From '%s' autoload: %s" % (app_mod.__name__.split('.')[-2], ", ".join(model_labels)))))
|
||||
|
||||
return imported_objects
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from django.db import models, migrations
|
||||
import utils.fields
|
||||
|
|
94
apps/rss_feeds/migrations/0001_initial.py.bak
Normal file
94
apps/rss_feeds/migrations/0001_initial.py.bak
Normal file
|
@ -0,0 +1,94 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import models, migrations
|
||||
import utils.fields
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='DuplicateFeed',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('duplicate_address', models.CharField(max_length=764, db_index=True)),
|
||||
('duplicate_link', models.CharField(max_length=764, null=True, db_index=True)),
|
||||
('duplicate_feed_id', models.CharField(max_length=255, null=True, db_index=True)),
|
||||
],
|
||||
options={
|
||||
},
|
||||
bases=(models.Model,),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Feed',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('feed_address', models.URLField(max_length=764, db_index=True)),
|
||||
('feed_address_locked', models.NullBooleanField(default=False)),
|
||||
('feed_link', models.URLField(default=b'', max_length=1000, null=True, blank=True)),
|
||||
('feed_link_locked', models.BooleanField(default=False)),
|
||||
('hash_address_and_link', models.CharField(unique=True, max_length=64)),
|
||||
('feed_title', models.CharField(default=b'[Untitled]', max_length=255, null=True, blank=True)),
|
||||
('is_push', models.NullBooleanField(default=False)),
|
||||
('active', models.BooleanField(default=True, db_index=True)),
|
||||
('num_subscribers', models.IntegerField(default=-1)),
|
||||
('active_subscribers', models.IntegerField(default=-1, db_index=True)),
|
||||
('premium_subscribers', models.IntegerField(default=-1)),
|
||||
('active_premium_subscribers', models.IntegerField(default=-1)),
|
||||
('last_update', models.DateTimeField(db_index=True)),
|
||||
('next_scheduled_update', models.DateTimeField()),
|
||||
('last_story_date', models.DateTimeField(null=True, blank=True)),
|
||||
('fetched_once', models.BooleanField(default=False)),
|
||||
('known_good', models.BooleanField(default=False)),
|
||||
('has_feed_exception', models.BooleanField(default=False, db_index=True)),
|
||||
('has_page_exception', models.BooleanField(default=False, db_index=True)),
|
||||
('has_page', models.BooleanField(default=True)),
|
||||
('exception_code', models.IntegerField(default=0)),
|
||||
('errors_since_good', models.IntegerField(default=0)),
|
||||
('min_to_decay', models.IntegerField(default=0)),
|
||||
('days_to_trim', models.IntegerField(default=90)),
|
||||
('creation', models.DateField(auto_now_add=True)),
|
||||
('etag', models.CharField(max_length=255, null=True, blank=True)),
|
||||
('last_modified', models.DateTimeField(null=True, blank=True)),
|
||||
('stories_last_month', models.IntegerField(default=0)),
|
||||
('average_stories_per_month', models.IntegerField(default=0)),
|
||||
('last_load_time', models.IntegerField(default=0)),
|
||||
('favicon_color', models.CharField(max_length=6, null=True, blank=True)),
|
||||
('favicon_not_found', models.BooleanField(default=False)),
|
||||
('s3_page', models.NullBooleanField(default=False)),
|
||||
('s3_icon', models.NullBooleanField(default=False)),
|
||||
('search_indexed', models.NullBooleanField(default=None)),
|
||||
('branch_from_feed', models.ForeignKey(blank=True, to='rss_feeds.Feed', null=True)),
|
||||
],
|
||||
options={
|
||||
'ordering': ['feed_title'],
|
||||
'db_table': 'feeds',
|
||||
},
|
||||
bases=(models.Model,),
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='FeedData',
|
||||
fields=[
|
||||
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
|
||||
('feed_tagline', models.CharField(max_length=1024, null=True, blank=True)),
|
||||
('story_count_history', models.TextField(null=True, blank=True)),
|
||||
('feed_classifier_counts', models.TextField(null=True, blank=True)),
|
||||
('popular_tags', models.CharField(max_length=1024, null=True, blank=True)),
|
||||
('popular_authors', models.CharField(max_length=2048, null=True, blank=True)),
|
||||
('feed', utils.fields.AutoOneToOneField(related_name=b'data', to='rss_feeds.Feed')),
|
||||
],
|
||||
options={
|
||||
},
|
||||
bases=(models.Model,),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='duplicatefeed',
|
||||
name='feed',
|
||||
field=models.ForeignKey(related_name=b'duplicate_addresses', to='rss_feeds.Feed'),
|
||||
preserve_default=True,
|
||||
),
|
||||
]
|
|
@ -10,26 +10,24 @@ import zlib
|
|||
import hashlib
|
||||
import redis
|
||||
import pymongo
|
||||
import HTMLParser
|
||||
import urlparse
|
||||
import html.parser
|
||||
import urllib.parse
|
||||
from collections import defaultdict
|
||||
from operator import itemgetter
|
||||
from bson.objectid import ObjectId
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from pyes.exceptions import NotFoundException
|
||||
from bs4 import BeautifulSoup
|
||||
# from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures
|
||||
from django.db import models
|
||||
from django.db import IntegrityError
|
||||
from django.conf import settings
|
||||
from django.db.models.query import QuerySet
|
||||
from django.db.utils import DatabaseError
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.urls import reverse
|
||||
from django.contrib.auth.models import User
|
||||
from django.contrib.sites.models import Site
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.utils.encoding import smart_str, smart_unicode
|
||||
from django.utils.encoding import smart_bytes, smart_text
|
||||
from mongoengine.queryset import OperationError, Q, NotUniqueError
|
||||
from mongoengine.base import ValidationError
|
||||
from vendor.timezones.utilities import localtime_for_timezone
|
||||
from apps.rss_feeds.tasks import UpdateFeeds, PushFeeds, ScheduleCountTagsForUser
|
||||
from apps.rss_feeds.text_importer import TextImporter
|
||||
|
@ -49,7 +47,7 @@ from utils.story_functions import strip_tags, htmldiff, strip_comments, strip_co
|
|||
from utils.story_functions import prep_for_search
|
||||
from utils.story_functions import create_imageproxy_signed_url
|
||||
|
||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
|
||||
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = list(range(4))
|
||||
|
||||
|
||||
class Feed(models.Model):
|
||||
|
@ -250,7 +248,7 @@ class Feed(models.Model):
|
|||
|
||||
try:
|
||||
super(Feed, self).save(*args, **kwargs)
|
||||
except IntegrityError, e:
|
||||
except IntegrityError as e:
|
||||
logging.debug(" ---> ~FRFeed save collision (%s), checking dupe hash..." % e)
|
||||
feed_address = self.feed_address or ""
|
||||
feed_link = self.feed_link or ""
|
||||
|
@ -274,7 +272,7 @@ class Feed(models.Model):
|
|||
return feed
|
||||
else:
|
||||
logging.debug(" ---> ~FRFeed is its own dupe? %s == %s" % (self, duplicate_feeds))
|
||||
except DatabaseError, e:
|
||||
except DatabaseError as e:
|
||||
logging.debug(" ---> ~FBFeed update failed, no change: %s / %s..." % (kwargs.get('update_fields', None), e))
|
||||
pass
|
||||
|
||||
|
@ -287,7 +285,7 @@ class Feed(models.Model):
|
|||
|
||||
last_pk = cls.objects.latest('pk').pk
|
||||
for f in xrange(offset, last_pk, 1000):
|
||||
print " ---> %s / %s (%.2s%%)" % (f, last_pk, float(f)/last_pk*100)
|
||||
print(f" ---> {f} / {last_pk} ({str(float(f)/last_pk*100)[:2]}%")
|
||||
feeds = Feed.objects.filter(pk__in=range(f, f+1000),
|
||||
active=True,
|
||||
active_subscribers__gte=subscribers)\
|
||||
|
@ -673,7 +671,7 @@ class Feed(models.Model):
|
|||
|
||||
try:
|
||||
feed_address, feed = _1()
|
||||
except TimeoutError, e:
|
||||
except TimeoutError as e:
|
||||
logging.debug(' ---> [%-30s] Feed address check timed out...' % (self.log_title[:30]))
|
||||
self.save_feed_history(505, 'Timeout', e)
|
||||
feed = self
|
||||
|
@ -886,14 +884,14 @@ class Feed(models.Model):
|
|||
|
||||
if verbose:
|
||||
if self.num_subscribers <= 1:
|
||||
print '.',
|
||||
print('.', end=' ')
|
||||
else:
|
||||
print "\n %s> %s subscriber%s: %s" % (
|
||||
print("\n %s> %s subscriber%s: %s" % (
|
||||
'-' * min(self.num_subscribers, 20),
|
||||
self.num_subscribers,
|
||||
'' if self.num_subscribers == 1 else 's',
|
||||
self.feed_title,
|
||||
),
|
||||
), end=' ')
|
||||
|
||||
def _split_favicon_color(self):
|
||||
color = self.favicon_color
|
||||
|
@ -971,8 +969,7 @@ class Feed(models.Model):
|
|||
self.save(update_fields=['stories_last_month'])
|
||||
|
||||
if verbose:
|
||||
print " ---> %s [%s]: %s stories last month" % (self.feed_title, self.pk,
|
||||
self.stories_last_month)
|
||||
print(f" ---> {self.feed} [{self.pk}]: {self.stories_last_month} stories last month")
|
||||
|
||||
def save_feed_story_history_statistics(self, current_counts=None):
|
||||
"""
|
||||
|
@ -1037,7 +1034,7 @@ class Feed(models.Model):
|
|||
for year in range(min_year, now.year+1):
|
||||
for month in range(1, 12+1):
|
||||
if datetime.datetime(year, month, 1) < now:
|
||||
key = u'%s-%s' % (year, month)
|
||||
key = '%s-%s' % (year, month)
|
||||
if dates.get(key) or start:
|
||||
start = True
|
||||
months.append((key, dates.get(key, 0)))
|
||||
|
@ -1083,7 +1080,7 @@ class Feed(models.Model):
|
|||
scores = []
|
||||
res = cls.objects(feed_id=self.pk).map_reduce(map_f, reduce_f, output='inline')
|
||||
for r in res:
|
||||
facet_values = dict([(k, int(v)) for k,v in r.value.iteritems()])
|
||||
facet_values = dict([(k, int(v)) for k,v in r.value.items()])
|
||||
facet_values[facet] = r.key
|
||||
if facet_values['pos'] + facet_values['neg'] >= 1:
|
||||
scores.append(facet_values)
|
||||
|
@ -1111,7 +1108,7 @@ class Feed(models.Model):
|
|||
|
||||
@property
|
||||
def user_agent(self):
|
||||
feed_parts = urlparse.urlparse(self.feed_address)
|
||||
feed_parts = urllib.parse.urlparse(self.feed_address)
|
||||
if feed_parts.netloc.find('.tumblr.com') != -1:
|
||||
# Certain tumblr feeds will redirect to tumblr's login page when fetching.
|
||||
# A known workaround is using facebook's user agent.
|
||||
|
@ -1146,7 +1143,7 @@ class Feed(models.Model):
|
|||
def update(self, **kwargs):
|
||||
try:
|
||||
from utils import feed_fetcher
|
||||
except ImportError, e:
|
||||
except ImportError as e:
|
||||
logging.info(" ***> ~BR~FRImportError: %s" % e)
|
||||
return
|
||||
r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
|
||||
|
@ -1170,7 +1167,7 @@ class Feed(models.Model):
|
|||
}
|
||||
|
||||
if getattr(settings, 'TEST_DEBUG', False):
|
||||
print " ---> Testing feed fetch: %s" % self.log_title
|
||||
print(" ---> Testing feed fetch: %s" % self.log_title)
|
||||
# options['force_fp'] = True # No, why would this be needed?
|
||||
original_feed_address = self.feed_address
|
||||
original_feed_link = self.feed_link
|
||||
|
@ -1245,7 +1242,7 @@ class Feed(models.Model):
|
|||
logging.debug(" ---> [%-30s] ~FBChecking ~SB%s~SN new/updated against ~SB%s~SN stories" % (
|
||||
self.log_title[:30],
|
||||
len(stories),
|
||||
len(existing_stories.keys())))
|
||||
len(list(existing_stories.keys()))))
|
||||
@timelimit(2)
|
||||
def _1(story, story_content, existing_stories, new_story_hashes):
|
||||
existing_story, story_has_changed = self._exists_story(story, story_content,
|
||||
|
@ -1271,7 +1268,7 @@ class Feed(models.Model):
|
|||
try:
|
||||
existing_story, story_has_changed = _1(story, story_content,
|
||||
existing_stories, new_story_hashes)
|
||||
except TimeoutError, e:
|
||||
except TimeoutError as e:
|
||||
logging.debug(' ---> [%-30s] ~SB~FRExisting story check timed out...' % (self.log_title[:30]))
|
||||
existing_story = None
|
||||
story_has_changed = False
|
||||
|
@ -1293,7 +1290,7 @@ class Feed(models.Model):
|
|||
s.save()
|
||||
ret_values['new'] += 1
|
||||
s.publish_to_subscribers()
|
||||
except (IntegrityError, OperationError), e:
|
||||
except (IntegrityError, OperationError) as e:
|
||||
ret_values['error'] += 1
|
||||
if settings.DEBUG:
|
||||
logging.info(' ---> [%-30s] ~SN~FRIntegrityError on new story: %s - %s' % (self.feed_title[:30], story.get('guid'), e))
|
||||
|
@ -1316,7 +1313,7 @@ class Feed(models.Model):
|
|||
original_only=True)
|
||||
else:
|
||||
raise MStory.DoesNotExist
|
||||
except (MStory.DoesNotExist, OperationError), e:
|
||||
except (MStory.DoesNotExist, OperationError) as e:
|
||||
ret_values['error'] += 1
|
||||
if verbose:
|
||||
logging.info(' ---> [%-30s] ~SN~FROperation on existing story: %s - %s' % (self.feed_title[:30], story.get('guid'), e))
|
||||
|
@ -1331,7 +1328,7 @@ class Feed(models.Model):
|
|||
# Don't mangle stories with code, just use new
|
||||
story_content_diff = story_content
|
||||
else:
|
||||
story_content_diff = htmldiff(smart_unicode(original_content), smart_unicode(story_content))
|
||||
story_content_diff = htmldiff(smart_text(original_content), smart_text(story_content))
|
||||
else:
|
||||
story_content_diff = original_content
|
||||
# logging.debug("\t\tDiff: %s %s %s" % diff.getStats())
|
||||
|
@ -1405,12 +1402,12 @@ class Feed(models.Model):
|
|||
if not feed_tags:
|
||||
all_tags = MStory.objects(story_feed_id=self.pk,
|
||||
story_tags__exists=True).item_frequencies('story_tags')
|
||||
feed_tags = sorted([(k, v) for k, v in all_tags.items() if int(v) > 0],
|
||||
feed_tags = sorted([(k, v) for k, v in list(all_tags.items()) if int(v) > 0],
|
||||
key=itemgetter(1),
|
||||
reverse=True)[:25]
|
||||
popular_tags = json.encode(feed_tags)
|
||||
if verbose:
|
||||
print "Found %s tags: %s" % (len(feed_tags), popular_tags)
|
||||
print("Found %s tags: %s" % (len(feed_tags), popular_tags))
|
||||
|
||||
# TODO: This len() bullshit will be gone when feeds move to mongo
|
||||
# On second thought, it might stay, because we don't want
|
||||
|
@ -1423,7 +1420,7 @@ class Feed(models.Model):
|
|||
return
|
||||
|
||||
tags_list = []
|
||||
if feed_tags and isinstance(feed_tags, unicode):
|
||||
if feed_tags and isinstance(feed_tags, str):
|
||||
tags_list = json.decode(feed_tags)
|
||||
if len(tags_list) >= 1:
|
||||
self.save_popular_tags(tags_list[:-1])
|
||||
|
@ -1433,7 +1430,7 @@ class Feed(models.Model):
|
|||
authors = defaultdict(int)
|
||||
for story in MStory.objects(story_feed_id=self.pk).only('story_author_name'):
|
||||
authors[story.story_author_name] += 1
|
||||
feed_authors = sorted([(k, v) for k, v in authors.items() if k],
|
||||
feed_authors = sorted([(k, v) for k, v in list(authors.items()) if k],
|
||||
key=itemgetter(1),
|
||||
reverse=True)[:20]
|
||||
|
||||
|
@ -1453,9 +1450,9 @@ class Feed(models.Model):
|
|||
month_ago = now - datetime.timedelta(days=settings.DAYS_OF_STORY_HASHES)
|
||||
feed_count = Feed.objects.latest('pk').pk
|
||||
|
||||
for feed_id in xrange(start, feed_count):
|
||||
for feed_id in range(start, feed_count):
|
||||
if feed_id % 1000 == 0:
|
||||
print "\n\n -------------------------- %s (%s deleted so far) --------------------------\n\n" % (feed_id, total)
|
||||
print("\n\n -------------------------- %s (%s deleted so far) --------------------------\n\n" % (feed_id, total))
|
||||
try:
|
||||
feed = Feed.objects.get(pk=feed_id)
|
||||
except Feed.DoesNotExist:
|
||||
|
@ -1466,17 +1463,17 @@ class Feed(models.Model):
|
|||
months_ago = int((now - feed.last_story_date).days / 30.0)
|
||||
cutoff = max(1, 6 - months_ago)
|
||||
if dryrun:
|
||||
print " DRYRUN: %s cutoff - %s" % (cutoff, feed)
|
||||
print(" DRYRUN: %s cutoff - %s" % (cutoff, feed))
|
||||
else:
|
||||
total += MStory.trim_feed(feed=feed, cutoff=cutoff, verbose=verbose)
|
||||
else:
|
||||
if dryrun:
|
||||
print " DRYRUN: %s/%s cutoff - %s" % (cutoff, feed.story_cutoff, feed)
|
||||
print(" DRYRUN: %s/%s cutoff - %s" % (cutoff, feed.story_cutoff, feed))
|
||||
else:
|
||||
total += feed.trim_feed(verbose=verbose)
|
||||
|
||||
|
||||
print " ---> Deleted %s stories in total." % total
|
||||
print(" ---> Deleted %s stories in total." % total)
|
||||
|
||||
@property
|
||||
def story_cutoff(self):
|
||||
|
@ -1517,7 +1514,7 @@ class Feed(models.Model):
|
|||
cutoff = min(cutoff, 10)
|
||||
try:
|
||||
logging.debug(" ---> [%-30s] ~FBTrimming down to ~SB%s (instead of %s)~SN stories (~FM%s~FB)" % (self.log_title[:30], cutoff, original_cutoff, self.last_story_date.strftime("%Y-%m-%d") if self.last_story_date else "No last story date"))
|
||||
except ValueError, e:
|
||||
except ValueError as e:
|
||||
logging.debug(" ***> [%-30s] Error trimming: %s" % (self.log_title[:30], e))
|
||||
pass
|
||||
|
||||
|
@ -1621,7 +1618,7 @@ class Feed(models.Model):
|
|||
popularity[feed_id]['ng'] = -1 * classifier['neg']
|
||||
popularity[feed_id]['story_ids'].append(story_hash)
|
||||
|
||||
sorted_popularity = sorted(popularity.values(), key=lambda x: x['reach_score'],
|
||||
sorted_popularity = sorted(list(popularity.values()), key=lambda x: x['reach_score'],
|
||||
reverse=True)
|
||||
|
||||
# Extract story authors from feeds
|
||||
|
@ -1671,7 +1668,7 @@ class Feed(models.Model):
|
|||
author['tags'][tag]['ps'] = classifier['pos']
|
||||
author['tags'][tag]['ng'] = -1 * classifier['neg']
|
||||
|
||||
sorted_authors = sorted(feed['authors'].values(), key=lambda x: x['count'])
|
||||
sorted_authors = sorted(list(feed['authors'].values()), key=lambda x: x['count'])
|
||||
feed['authors'] = sorted_authors
|
||||
|
||||
# pprint(sorted_popularity)
|
||||
|
@ -1716,7 +1713,7 @@ class Feed(models.Model):
|
|||
import xlsxwriter
|
||||
from xlsxwriter.utility import xl_rowcol_to_cell
|
||||
|
||||
if isinstance(queries, unicode):
|
||||
if isinstance(queries, str):
|
||||
queries = [q.strip() for q in queries.split(',')]
|
||||
|
||||
title = 'NewsBlur-%s.xlsx' % slugify('-'.join(queries))
|
||||
|
@ -1818,7 +1815,7 @@ class Feed(models.Model):
|
|||
worksheet.write_url(row, col+4, story['url'])
|
||||
worksheet.write_datetime(row, col+5, story['date'], date_format)
|
||||
row += 1
|
||||
for tag in author['tags'].values():
|
||||
for tag in list(author['tags'].values()):
|
||||
worksheet.conditional_format(row, col+7, row, col+9, {'type': 'cell',
|
||||
'criteria': '==',
|
||||
'value': 0,
|
||||
|
@ -1855,7 +1852,7 @@ class Feed(models.Model):
|
|||
@classmethod
|
||||
def format_story(cls, story_db, feed_id=None, text=False, include_permalinks=False,
|
||||
show_changes=False):
|
||||
if isinstance(story_db.story_content_z, unicode):
|
||||
if isinstance(story_db.story_content_z, str):
|
||||
story_db.story_content_z = story_db.story_content_z.decode('base64')
|
||||
|
||||
story_content = ''
|
||||
|
@ -1864,9 +1861,9 @@ class Feed(models.Model):
|
|||
if (not show_changes and
|
||||
hasattr(story_db, 'story_latest_content_z') and
|
||||
story_db.story_latest_content_z):
|
||||
latest_story_content = smart_unicode(zlib.decompress(story_db.story_latest_content_z))
|
||||
latest_story_content = smart_text(zlib.decompress(story_db.story_latest_content_z))
|
||||
if story_db.story_content_z:
|
||||
story_content = smart_unicode(zlib.decompress(story_db.story_content_z))
|
||||
story_content = smart_text(zlib.decompress(story_db.story_content_z))
|
||||
|
||||
if '<ins' in story_content or '<del' in story_content:
|
||||
has_changes = True
|
||||
|
@ -1934,7 +1931,7 @@ class Feed(models.Model):
|
|||
signed_urls = [create_imageproxy_signed_url(settings.IMAGES_URL,
|
||||
settings.IMAGES_SECRET_KEY,
|
||||
url) for url in urls]
|
||||
return dict(zip(urls, signed_urls))
|
||||
return dict(list(zip(urls, signed_urls)))
|
||||
|
||||
@classmethod
|
||||
def secure_image_thumbnails(cls, urls, size=192):
|
||||
|
@ -1942,11 +1939,11 @@ class Feed(models.Model):
|
|||
settings.IMAGES_SECRET_KEY,
|
||||
url,
|
||||
size) for url in urls]
|
||||
return dict(zip(urls, signed_urls))
|
||||
return dict(list(zip(urls, signed_urls)))
|
||||
|
||||
def get_tags(self, entry):
|
||||
fcat = []
|
||||
if entry.has_key('tags'):
|
||||
if 'tags' in entry:
|
||||
for tcat in entry.tags:
|
||||
term = None
|
||||
if hasattr(tcat, 'label') and tcat.label:
|
||||
|
@ -1986,18 +1983,18 @@ class Feed(models.Model):
|
|||
story_in_system = None
|
||||
story_has_changed = False
|
||||
story_link = self.get_permalink(story)
|
||||
existing_stories_hashes = existing_stories.keys()
|
||||
existing_stories_hashes = list(existing_stories.keys())
|
||||
story_pub_date = story.get('published')
|
||||
# story_published_now = story.get('published_now', False)
|
||||
# start_date = story_pub_date - datetime.timedelta(hours=8)
|
||||
# end_date = story_pub_date + datetime.timedelta(hours=8)
|
||||
|
||||
for existing_story in existing_stories.values():
|
||||
for existing_story in list(existing_stories.values()):
|
||||
content_ratio = 0
|
||||
# existing_story_pub_date = existing_story.story_date
|
||||
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
|
||||
|
||||
if isinstance(existing_story.id, unicode):
|
||||
if isinstance(existing_story.id, str):
|
||||
# Correcting a MongoDB bug
|
||||
existing_story.story_guid = existing_story.id
|
||||
|
||||
|
@ -2013,15 +2010,15 @@ class Feed(models.Model):
|
|||
continue
|
||||
|
||||
if 'story_latest_content_z' in existing_story:
|
||||
existing_story_content = smart_unicode(zlib.decompress(existing_story.story_latest_content_z))
|
||||
existing_story_content = smart_text(zlib.decompress(existing_story.story_latest_content_z))
|
||||
elif 'story_latest_content' in existing_story:
|
||||
existing_story_content = existing_story.story_latest_content
|
||||
elif 'story_content_z' in existing_story:
|
||||
existing_story_content = smart_unicode(zlib.decompress(existing_story.story_content_z))
|
||||
existing_story_content = smart_text(zlib.decompress(existing_story.story_content_z))
|
||||
elif 'story_content' in existing_story:
|
||||
existing_story_content = existing_story.story_content
|
||||
else:
|
||||
existing_story_content = u''
|
||||
existing_story_content = ''
|
||||
|
||||
|
||||
# Title distance + content distance, checking if story changed
|
||||
|
@ -2304,7 +2301,7 @@ class FeedData(models.Model):
|
|||
super(FeedData, self).save(*args, **kwargs)
|
||||
except (IntegrityError, OperationError):
|
||||
if hasattr(self, 'id') and self.id: self.delete()
|
||||
except DatabaseError, e:
|
||||
except DatabaseError as e:
|
||||
# Nothing updated
|
||||
logging.debug(" ---> ~FRNothing updated in FeedData (%s): %s" % (self.feed, e))
|
||||
pass
|
||||
|
@ -2337,7 +2334,7 @@ class MFeedIcon(mongo.Document):
|
|||
|
||||
def save(self, *args, **kwargs):
|
||||
if self.icon_url:
|
||||
self.icon_url = unicode(self.icon_url)
|
||||
self.icon_url = str(self.icon_url)
|
||||
try:
|
||||
return super(MFeedIcon, self).save(*args, **kwargs)
|
||||
except (IntegrityError, OperationError):
|
||||
|
@ -2441,7 +2438,7 @@ class MStory(mongo.Document):
|
|||
|
||||
@property
|
||||
def decoded_story_title(self):
|
||||
h = HTMLParser.HTMLParser()
|
||||
h = html.parser.HTMLParser()
|
||||
return h.unescape(self.story_title)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
|
@ -2452,13 +2449,13 @@ class MStory(mongo.Document):
|
|||
self.extract_image_urls()
|
||||
|
||||
if self.story_content:
|
||||
self.story_content_z = zlib.compress(smart_str(self.story_content))
|
||||
self.story_content_z = zlib.compress(smart_bytes(self.story_content))
|
||||
self.story_content = None
|
||||
if self.story_original_content:
|
||||
self.story_original_content_z = zlib.compress(smart_str(self.story_original_content))
|
||||
self.story_original_content_z = zlib.compress(smart_bytes(self.story_original_content))
|
||||
self.story_original_content = None
|
||||
if self.story_latest_content:
|
||||
self.story_latest_content_z = zlib.compress(smart_str(self.story_latest_content))
|
||||
self.story_latest_content_z = zlib.compress(smart_bytes(self.story_latest_content))
|
||||
self.story_latest_content = None
|
||||
if self.story_title and len(self.story_title) > story_title_max:
|
||||
self.story_title = self.story_title[:story_title_max]
|
||||
|
@ -2499,9 +2496,9 @@ class MStory(mongo.Document):
|
|||
SearchStory.create_elasticsearch_mapping(delete=True)
|
||||
|
||||
last_pk = Feed.objects.latest('pk').pk
|
||||
for f in xrange(offset, last_pk, 1000):
|
||||
print " ---> %s / %s (%.2s%%)" % (f, last_pk, float(f)/last_pk*100)
|
||||
feeds = Feed.objects.filter(pk__in=range(f, f+1000),
|
||||
for f in range(offset, last_pk, 1000):
|
||||
print(" ---> %s / %s (%.2s%%)" % (f, last_pk, float(f)/last_pk*100))
|
||||
feeds = Feed.objects.filter(pk__in=list(range(f, f+1000)),
|
||||
active=True,
|
||||
active_subscribers__gte=1)\
|
||||
.values_list('pk')
|
||||
|
@ -2525,7 +2522,7 @@ class MStory(mongo.Document):
|
|||
def remove_from_search_index(self):
|
||||
try:
|
||||
SearchStory.remove(self.story_hash)
|
||||
except NotFoundException:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
|
@ -2545,14 +2542,14 @@ class MStory(mongo.Document):
|
|||
|
||||
if stories.count() > cutoff:
|
||||
logging.debug(' ---> [%-30s] ~FMFound %s stories. Trimming to ~SB%s~SN...' %
|
||||
(unicode(feed)[:30], stories.count(), cutoff))
|
||||
(str(feed)[:30], stories.count(), cutoff))
|
||||
try:
|
||||
story_trim_date = stories[cutoff].story_date
|
||||
if story_trim_date == stories[0].story_date:
|
||||
# Handle case where every story is the same time
|
||||
story_trim_date = story_trim_date - datetime.timedelta(seconds=1)
|
||||
except IndexError, e:
|
||||
logging.debug(' ***> [%-30s] ~BRError trimming feed: %s' % (unicode(feed)[:30], e))
|
||||
except IndexError as e:
|
||||
logging.debug(' ***> [%-30s] ~BRError trimming feed: %s' % (str(feed)[:30], e))
|
||||
return extra_stories_count
|
||||
|
||||
extra_stories = cls.objects(story_feed_id=feed_id,
|
||||
|
@ -2901,7 +2898,7 @@ class MStarredStory(mongo.DynamicDocument):
|
|||
|
||||
@classmethod
|
||||
def trim_old_stories(cls, stories=10, days=90, dryrun=False):
|
||||
print " ---> Fetching starred story counts..."
|
||||
print(" ---> Fetching starred story counts...")
|
||||
stats = settings.MONGODB.newsblur.starred_stories.aggregate([{
|
||||
"$group": {
|
||||
"_id": "$user_id",
|
||||
|
@ -2915,7 +2912,7 @@ class MStarredStory(mongo.DynamicDocument):
|
|||
month_ago = datetime.datetime.now() - datetime.timedelta(days=days)
|
||||
user_ids = list(stats)
|
||||
user_ids = sorted(user_ids, key=lambda x:x['stories'], reverse=True)
|
||||
print " ---> Found %s users with more than %s starred stories" % (len(user_ids), stories)
|
||||
print(" ---> Found %s users with more than %s starred stories" % (len(user_ids), stories))
|
||||
|
||||
total = 0
|
||||
for stat in user_ids:
|
||||
|
@ -2929,17 +2926,17 @@ class MStarredStory(mongo.DynamicDocument):
|
|||
|
||||
total += stat['stories']
|
||||
username = "%s (%s)" % (user and user.username or " - ", stat['_id'])
|
||||
print " ---> %19.19s: %-20.20s %s stories" % (user and user.profile.last_seen_on or "Deleted",
|
||||
print(" ---> %19.19s: %-20.20s %s stories" % (user and user.profile.last_seen_on or "Deleted",
|
||||
username,
|
||||
stat['stories'])
|
||||
stat['stories']))
|
||||
if not dryrun and stat['_id']:
|
||||
cls.objects.filter(user_id=stat['_id']).delete()
|
||||
elif not dryrun and stat['_id'] == 0:
|
||||
print " ---> Deleting unstarred stories (user_id = 0)"
|
||||
print(" ---> Deleting unstarred stories (user_id = 0)")
|
||||
cls.objects.filter(user_id=stat['_id']).delete()
|
||||
|
||||
|
||||
print " ---> Deleted %s stories in total." % total
|
||||
print(" ---> Deleted %s stories in total." % total)
|
||||
|
||||
@property
|
||||
def guid_hash(self):
|
||||
|
@ -3036,7 +3033,7 @@ class MStarredStoryCounts(mongo.Document):
|
|||
try:
|
||||
user_tags = cls.count_tags_for_user(user_id)
|
||||
user_feeds = cls.count_feeds_for_user(user_id)
|
||||
except pymongo.errors.OperationFailure, e:
|
||||
except pymongo.errors.OperationFailure as e:
|
||||
logging.debug(" ---> ~FBOperationError on mongo: ~SB%s" % e)
|
||||
|
||||
total_stories_count = MStarredStory.objects(user_id=user_id).count()
|
||||
|
@ -3049,11 +3046,11 @@ class MStarredStoryCounts(mongo.Document):
|
|||
def count_tags_for_user(cls, user_id):
|
||||
all_tags = MStarredStory.objects(user_id=user_id,
|
||||
user_tags__exists=True).item_frequencies('user_tags')
|
||||
user_tags = sorted([(k, v) for k, v in all_tags.items() if int(v) > 0 and k],
|
||||
user_tags = sorted([(k, v) for k, v in list(all_tags.items()) if int(v) > 0 and k],
|
||||
key=lambda x: x[0].lower(),
|
||||
reverse=True)
|
||||
|
||||
for tag, count in dict(user_tags).items():
|
||||
for tag, count in list(dict(user_tags).items()):
|
||||
cls.objects(user_id=user_id, tag=tag, slug=slugify(tag)).update_one(set__count=count,
|
||||
upsert=True)
|
||||
|
||||
|
@ -3062,7 +3059,7 @@ class MStarredStoryCounts(mongo.Document):
|
|||
@classmethod
|
||||
def count_feeds_for_user(cls, user_id):
|
||||
all_feeds = MStarredStory.objects(user_id=user_id).item_frequencies('story_feed_id')
|
||||
user_feeds = dict([(k, v) for k, v in all_feeds.items() if v])
|
||||
user_feeds = dict([(k, v) for k, v in list(all_feeds.items()) if v])
|
||||
|
||||
# Clean up None'd and 0'd feed_ids, so they can be counted against the total
|
||||
if user_feeds.get(None, False):
|
||||
|
@ -3074,7 +3071,7 @@ class MStarredStoryCounts(mongo.Document):
|
|||
del user_feeds[0]
|
||||
|
||||
too_many_feeds = False if len(user_feeds) < 1000 else True
|
||||
for feed_id, count in user_feeds.items():
|
||||
for feed_id, count in list(user_feeds.items()):
|
||||
if too_many_feeds and count <= 1: continue
|
||||
cls.objects(user_id=user_id,
|
||||
feed_id=feed_id,
|
||||
|
@ -3334,7 +3331,7 @@ def merge_feeds(original_feed_id, duplicate_feed_id, force=False):
|
|||
duplicate_feed_id=duplicate_feed.pk,
|
||||
feed=original_feed
|
||||
)
|
||||
except (IntegrityError, OperationError), e:
|
||||
except (IntegrityError, OperationError) as e:
|
||||
logging.info(" ***> Could not save DuplicateFeed: %s" % e)
|
||||
|
||||
# Switch this dupe feed's dupe feeds over to the new original.
|
||||
|
@ -3374,7 +3371,7 @@ def rewrite_folders(folders, original_feed, duplicate_feed):
|
|||
else:
|
||||
new_folders.append(folder)
|
||||
elif isinstance(folder, dict):
|
||||
for f_k, f_v in folder.items():
|
||||
for f_k, f_v in list(folder.items()):
|
||||
new_folders.append({f_k: rewrite_folders(f_v, original_feed, duplicate_feed)})
|
||||
|
||||
return new_folders
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import requests
|
||||
import re
|
||||
import urlparse
|
||||
import urllib.parse
|
||||
import traceback
|
||||
import feedparser
|
||||
import time
|
||||
import urllib2
|
||||
import httplib
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import http.client
|
||||
import zlib
|
||||
from mongoengine.queryset import NotUniqueError
|
||||
from socket import error as SocketError
|
||||
|
@ -86,8 +86,8 @@ class PageImporter(object):
|
|||
return
|
||||
elif feed_link.startswith('http'):
|
||||
if urllib_fallback:
|
||||
request = urllib2.Request(feed_link, headers=self.headers)
|
||||
response = urllib2.urlopen(request)
|
||||
request = urllib.request.Request(feed_link, headers=self.headers)
|
||||
response = urllib.request.urlopen(request)
|
||||
time.sleep(0.01) # Grrr, GIL.
|
||||
data = response.read()
|
||||
else:
|
||||
|
@ -96,7 +96,7 @@ class PageImporter(object):
|
|||
response.connection.close()
|
||||
except requests.exceptions.TooManyRedirects:
|
||||
response = requests.get(feed_link)
|
||||
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError), e:
|
||||
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError) as e:
|
||||
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
|
||||
self.save_no_page()
|
||||
return
|
||||
|
@ -127,23 +127,23 @@ class PageImporter(object):
|
|||
else:
|
||||
self.save_no_page()
|
||||
return
|
||||
except (ValueError, urllib2.URLError, httplib.BadStatusLine, httplib.InvalidURL,
|
||||
requests.exceptions.ConnectionError), e:
|
||||
except (ValueError, urllib.error.URLError, http.client.BadStatusLine, http.client.InvalidURL,
|
||||
requests.exceptions.ConnectionError) as e:
|
||||
self.feed.save_page_history(401, "Bad URL", e)
|
||||
fp = feedparser.parse(self.feed.feed_address)
|
||||
feed_link = fp.feed.get('link', "")
|
||||
self.feed.save()
|
||||
logging.debug(' ***> [%-30s] Page fetch failed: %s' % (self.feed.log_title[:30], e))
|
||||
except (urllib2.HTTPError), e:
|
||||
except (urllib.error.HTTPError) as e:
|
||||
self.feed.save_page_history(e.code, e.msg, e.fp.read())
|
||||
except (httplib.IncompleteRead), e:
|
||||
except (http.client.IncompleteRead) as e:
|
||||
self.feed.save_page_history(500, "IncompleteRead", e)
|
||||
except (requests.exceptions.RequestException,
|
||||
requests.packages.urllib3.exceptions.HTTPError), e:
|
||||
requests.packages.urllib3.exceptions.HTTPError) as e:
|
||||
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
|
||||
# mail_feed_error_to_admin(self.feed, e, local_vars=locals())
|
||||
return self.fetch_page(urllib_fallback=True, requests_exception=e)
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
logging.debug('[%d] ! -------------------------' % (self.feed.id,))
|
||||
tb = traceback.format_exc()
|
||||
logging.debug(tb)
|
||||
|
@ -188,10 +188,10 @@ class PageImporter(object):
|
|||
try:
|
||||
response = requests.get(story_permalink, headers=self.headers)
|
||||
response.connection.close()
|
||||
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e:
|
||||
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects) as e:
|
||||
try:
|
||||
response = requests.get(story_permalink)
|
||||
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e:
|
||||
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects) as e:
|
||||
logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e))
|
||||
return
|
||||
try:
|
||||
|
@ -207,7 +207,7 @@ class PageImporter(object):
|
|||
|
||||
if data:
|
||||
data = data.replace("\xc2\xa0", " ") # Non-breaking space, is mangled when encoding is not utf-8
|
||||
data = data.replace("\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
|
||||
data = data.replace("\\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
|
||||
html = self.rewrite_page(data)
|
||||
if not html:
|
||||
return
|
||||
|
@ -231,7 +231,7 @@ class PageImporter(object):
|
|||
|
||||
def rewrite_page(self, response):
|
||||
BASE_RE = re.compile(r'<head(.*?\>)', re.I)
|
||||
base_code = u'<base href="%s" />' % (self.feed.feed_link,)
|
||||
base_code = '<base href="%s" />' % (self.feed.feed_link,)
|
||||
try:
|
||||
html = BASE_RE.sub(r'<head\1 '+base_code, response)
|
||||
except:
|
||||
|
@ -258,9 +258,9 @@ class PageImporter(object):
|
|||
url = match.group(2)
|
||||
if url[0] in "\"'":
|
||||
url = url.strip(url[0])
|
||||
parsed = urlparse.urlparse(url)
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if parsed.scheme == parsed.netloc == '': #relative to domain
|
||||
url = urlparse.urljoin(self.feed.feed_link, url)
|
||||
url = urllib.parse.urljoin(self.feed.feed_link, url)
|
||||
ret.append(document[last_end:match.start(2)])
|
||||
ret.append('"%s"' % (url,))
|
||||
last_end = match.end(2)
|
||||
|
|
|
@ -167,7 +167,7 @@ class UpdateFeeds(Task):
|
|||
continue
|
||||
try:
|
||||
feed.update(**options)
|
||||
except SoftTimeLimitExceeded, e:
|
||||
except SoftTimeLimitExceeded as e:
|
||||
feed.save_feed_history(505, 'Timeout', e)
|
||||
logging.info(" ---> [%-30s] ~BR~FWTime limit hit!~SB~FR Moving on to next feed..." % feed)
|
||||
if profiler_activated: profiler.process_celery_finished()
|
||||
|
|
|
@ -37,12 +37,12 @@ class FeedTest(TestCase):
|
|||
|
||||
feed = Feed.objects.get(feed_link__contains='gawker')
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 0)
|
||||
self.assertEqual(stories.count(), 0)
|
||||
|
||||
feed.update(force=True)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 38)
|
||||
self.assertEqual(stories.count(), 38)
|
||||
|
||||
management.call_command('loaddata', 'gawker2.json', verbosity=0, skip_checks=False)
|
||||
|
||||
|
@ -50,12 +50,12 @@ class FeedTest(TestCase):
|
|||
|
||||
# Test: 1 changed char in content
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 38)
|
||||
self.assertEqual(stories.count(), 38)
|
||||
|
||||
url = reverse('load-single-feed', kwargs=dict(feed_id=1))
|
||||
response = self.client.get(url)
|
||||
feed = json.decode(response.content)
|
||||
self.assertEquals(len(feed['stories']), 6)
|
||||
self.assertEqual(len(feed['stories']), 6)
|
||||
|
||||
def test_load_feeds__gothamist(self):
|
||||
self.client.login(username='conesus', password='test')
|
||||
|
@ -63,30 +63,30 @@ class FeedTest(TestCase):
|
|||
management.call_command('loaddata', 'gothamist_aug_2009_1.json', verbosity=0, skip_checks=False)
|
||||
feed = Feed.objects.get(feed_link__contains='gothamist')
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 0)
|
||||
self.assertEqual(stories.count(), 0)
|
||||
|
||||
feed.update(force=True)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 42)
|
||||
self.assertEqual(stories.count(), 42)
|
||||
|
||||
url = reverse('load-single-feed', kwargs=dict(feed_id=4))
|
||||
response = self.client.get(url)
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(len(content['stories']), 6)
|
||||
self.assertEqual(len(content['stories']), 6)
|
||||
|
||||
management.call_command('loaddata', 'gothamist_aug_2009_2.json', verbosity=0, skip_checks=False)
|
||||
feed.update(force=True)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 42)
|
||||
self.assertEqual(stories.count(), 42)
|
||||
|
||||
url = reverse('load-single-feed', kwargs=dict(feed_id=4))
|
||||
response = self.client.get(url)
|
||||
# print [c['story_title'] for c in json.decode(response.content)]
|
||||
content = json.decode(response.content)
|
||||
# Test: 1 changed char in title
|
||||
self.assertEquals(len(content['stories']), 6)
|
||||
self.assertEqual(len(content['stories']), 6)
|
||||
|
||||
def test_load_feeds__slashdot(self):
|
||||
self.client.login(username='conesus', password='test')
|
||||
|
@ -97,28 +97,28 @@ class FeedTest(TestCase):
|
|||
|
||||
feed = Feed.objects.get(feed_link__contains='slashdot')
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 0)
|
||||
self.assertEqual(stories.count(), 0)
|
||||
|
||||
management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False, skip_checks=False)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 38)
|
||||
self.assertEqual(stories.count(), 38)
|
||||
|
||||
response = self.client.get(reverse('load-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds']['5']['nt'], 38)
|
||||
self.assertEqual(content['feeds']['5']['nt'], 38)
|
||||
|
||||
self.client.post(reverse('mark-story-as-read'), {'story_id': old_story_guid, 'feed_id': 5})
|
||||
|
||||
response = self.client.get(reverse('refresh-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds']['5']['nt'], 37)
|
||||
self.assertEqual(content['feeds']['5']['nt'], 37)
|
||||
|
||||
management.call_command('loaddata', 'slashdot2.json', verbosity=0, skip_checks=False)
|
||||
management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False, skip_checks=False)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 38)
|
||||
self.assertEqual(stories.count(), 38)
|
||||
|
||||
url = reverse('load-single-feed', kwargs=dict(feed_id=5))
|
||||
response = self.client.get(url)
|
||||
|
@ -127,11 +127,11 @@ class FeedTest(TestCase):
|
|||
feed = json.decode(response.content)
|
||||
|
||||
# Test: 1 changed char in title
|
||||
self.assertEquals(len(feed['stories']), 6)
|
||||
self.assertEqual(len(feed['stories']), 6)
|
||||
|
||||
response = self.client.get(reverse('refresh-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds']['5']['nt'], 37)
|
||||
self.assertEqual(content['feeds']['5']['nt'], 37)
|
||||
|
||||
def test_load_feeds__motherjones(self):
|
||||
self.client.login(username='conesus', password='test')
|
||||
|
@ -140,28 +140,28 @@ class FeedTest(TestCase):
|
|||
|
||||
feed = Feed.objects.get(feed_link__contains='motherjones')
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 0)
|
||||
self.assertEqual(stories.count(), 0)
|
||||
|
||||
management.call_command('refresh_feed', force=1, feed=feed.pk, single_threaded=True, daemonize=False, skip_checks=False)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 10)
|
||||
self.assertEqual(stories.count(), 10)
|
||||
|
||||
response = self.client.get(reverse('load-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds'][str(feed.pk)]['nt'], 10)
|
||||
self.assertEqual(content['feeds'][str(feed.pk)]['nt'], 10)
|
||||
|
||||
self.client.post(reverse('mark-story-as-read'), {'story_id': stories[0].story_guid, 'feed_id': feed.pk})
|
||||
|
||||
response = self.client.get(reverse('refresh-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds'][str(feed.pk)]['nt'], 9)
|
||||
self.assertEqual(content['feeds'][str(feed.pk)]['nt'], 9)
|
||||
|
||||
management.call_command('loaddata', 'motherjones2.json', verbosity=0, skip_checks=False)
|
||||
management.call_command('refresh_feed', force=1, feed=feed.pk, single_threaded=True, daemonize=False, skip_checks=False)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 10)
|
||||
self.assertEqual(stories.count(), 10)
|
||||
|
||||
url = reverse('load-single-feed', kwargs=dict(feed_id=feed.pk))
|
||||
response = self.client.get(url)
|
||||
|
@ -170,11 +170,11 @@ class FeedTest(TestCase):
|
|||
feed = json.decode(response.content)
|
||||
|
||||
# Test: 1 changed char in title
|
||||
self.assertEquals(len(feed['stories']), 6)
|
||||
self.assertEqual(len(feed['stories']), 6)
|
||||
|
||||
response = self.client.get(reverse('refresh-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds'][str(feed['feed_id'])]['nt'], 9)
|
||||
self.assertEqual(content['feeds'][str(feed['feed_id'])]['nt'], 9)
|
||||
|
||||
def test_load_feeds__google(self):
|
||||
# Freezegun the date to 2017-04-30
|
||||
|
@ -183,33 +183,33 @@ class FeedTest(TestCase):
|
|||
old_story_guid = "blog.google:443/topics/inside-google/google-earths-incredible-3d-imagery-explained/"
|
||||
|
||||
management.call_command('loaddata', 'google1.json', verbosity=1, skip_checks=False)
|
||||
print Feed.objects.all()
|
||||
print(Feed.objects.all())
|
||||
feed = Feed.objects.get(pk=766)
|
||||
print " Testing test_load_feeds__google: %s" % feed
|
||||
print(" Testing test_load_feeds__google: %s" % feed)
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 0)
|
||||
self.assertEqual(stories.count(), 0)
|
||||
|
||||
management.call_command('refresh_feed', force=False, feed=766, single_threaded=True, daemonize=False, skip_checks=False)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 20)
|
||||
self.assertEqual(stories.count(), 20)
|
||||
|
||||
response = self.client.get(reverse('load-feeds')+"?update_counts=true")
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds']['766']['nt'], 20)
|
||||
self.assertEqual(content['feeds']['766']['nt'], 20)
|
||||
|
||||
old_story = MStory.objects.get(story_feed_id=feed.pk, story_guid__contains=old_story_guid)
|
||||
self.client.post(reverse('mark-story-hashes-as-read'), {'story_hash': old_story.story_hash})
|
||||
|
||||
response = self.client.get(reverse('refresh-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds']['766']['nt'], 19)
|
||||
self.assertEqual(content['feeds']['766']['nt'], 19)
|
||||
|
||||
management.call_command('loaddata', 'google2.json', verbosity=1, skip_checks=False)
|
||||
management.call_command('refresh_feed', force=False, feed=766, single_threaded=True, daemonize=False, skip_checks=False)
|
||||
|
||||
stories = MStory.objects(story_feed_id=feed.pk)
|
||||
self.assertEquals(stories.count(), 20)
|
||||
self.assertEqual(stories.count(), 20)
|
||||
|
||||
url = reverse('load-single-feed', kwargs=dict(feed_id=766))
|
||||
response = self.client.get(url)
|
||||
|
@ -218,11 +218,11 @@ class FeedTest(TestCase):
|
|||
feed = json.decode(response.content)
|
||||
|
||||
# Test: 1 changed char in title
|
||||
self.assertEquals(len(feed['stories']), 6)
|
||||
self.assertEqual(len(feed['stories']), 6)
|
||||
|
||||
response = self.client.get(reverse('refresh-feeds'))
|
||||
content = json.decode(response.content)
|
||||
self.assertEquals(content['feeds']['766']['nt'], 19)
|
||||
self.assertEqual(content['feeds']['766']['nt'], 19)
|
||||
|
||||
def test_load_feeds__brokelyn__invalid_xml(self):
|
||||
self.client.login(username='conesus', password='test')
|
||||
|
@ -237,7 +237,7 @@ class FeedTest(TestCase):
|
|||
feed = json.decode(response.content)
|
||||
|
||||
# Test: 1 changed char in title
|
||||
self.assertEquals(len(feed['stories']), 6)
|
||||
self.assertEqual(len(feed['stories']), 6)
|
||||
|
||||
def test_all_feeds(self):
|
||||
pass
|
||||
|
|
|
@ -13,8 +13,8 @@ from OpenSSL.SSL import Error as OpenSSLError
|
|||
from pyasn1.error import PyAsn1Error
|
||||
from django.utils.encoding import smart_str
|
||||
from django.conf import settings
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
from urlparse import urljoin
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
|
||||
BROKEN_URLS = [
|
||||
"gamespot.com",
|
||||
|
@ -122,13 +122,13 @@ class TextImporter:
|
|||
|
||||
if text:
|
||||
text = text.replace("\xc2\xa0", " ") # Non-breaking space, is mangled when encoding is not utf-8
|
||||
text = text.replace("\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
|
||||
text = text.replace("\\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
|
||||
|
||||
original_text_doc = readability.Document(text, url=resp.url,
|
||||
positive_keywords="post, entry, postProp, article, postContent, postField")
|
||||
try:
|
||||
content = original_text_doc.summary(html_partial=True)
|
||||
except (readability.Unparseable, ParserError), e:
|
||||
except (readability.Unparseable, ParserError) as e:
|
||||
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
|
||||
return
|
||||
|
||||
|
@ -151,7 +151,7 @@ class TextImporter:
|
|||
self.story.original_text_z = zlib.compress(smart_str(content))
|
||||
try:
|
||||
self.story.save()
|
||||
except NotUniqueError, e:
|
||||
except NotUniqueError as e:
|
||||
logging.user(self.request, ("~SN~FYFetched ~FGoriginal text~FY: %s" % (e)), warn_color=False)
|
||||
pass
|
||||
logging.user(self.request, ("~SN~FYFetched ~FGoriginal text~FY: now ~SB%s bytes~SN vs. was ~SB%s bytes" % (
|
||||
|
@ -179,7 +179,7 @@ class TextImporter:
|
|||
if len(noscript.contents) > 0:
|
||||
noscript.replaceWith(noscript.contents[0])
|
||||
|
||||
content = unicode(soup)
|
||||
content = str(soup)
|
||||
|
||||
images = set([img['src'] for img in soup.findAll('img') if 'src' in img])
|
||||
for image_url in images:
|
||||
|
@ -212,7 +212,7 @@ class TextImporter:
|
|||
requests.models.ChunkedEncodingError,
|
||||
requests.models.ContentDecodingError,
|
||||
urllib3.exceptions.LocationValueError,
|
||||
LocationParseError, OpenSSLError, PyAsn1Error), e:
|
||||
LocationParseError, OpenSSLError, PyAsn1Error) as e:
|
||||
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
|
||||
return
|
||||
return r
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import datetime
|
||||
from urlparse import urlparse
|
||||
from urllib.parse import urlparse
|
||||
from utils import log as logging
|
||||
from django.shortcuts import get_object_or_404, render
|
||||
from django.views.decorators.http import condition
|
||||
|
@ -235,7 +235,7 @@ def assemble_statistics(user, feed_id):
|
|||
localoffset = timezone.utcoffset(datetime.datetime.utcnow())
|
||||
hours_offset = int(localoffset.total_seconds() / 3600)
|
||||
rotated_hours = {}
|
||||
for hour, value in stats['story_hours_history'].items():
|
||||
for hour, value in list(stats['story_hours_history'].items()):
|
||||
rotated_hours[str(int(hour)+hours_offset)] = value
|
||||
stats['story_hours_history'] = rotated_hours
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue