2to3 apps/rss_feeds

This commit is contained in:
jmath1 2020-06-15 02:54:37 -04:00
parent 2d62e80344
commit 6021afaec3
17 changed files with 269 additions and 178 deletions

View file

@ -1,20 +1,20 @@
import urllib2
import urllib.request, urllib.error, urllib.parse
import lxml.html
import numpy
import scipy
import scipy.misc
import scipy.cluster
import urlparse
import urllib.parse
import struct
import operator
import gzip
import datetime
import requests
import httplib
import http.client
from PIL import BmpImagePlugin, PngImagePlugin, Image
from socket import error as SocketError
from boto.s3.key import Key
from StringIO import StringIO
from io import StringIO
from django.conf import settings
from apps.rss_feeds.models import MFeedPage, MFeedIcon
from utils.facebook_fetcher import FacebookFetcher
@ -127,7 +127,7 @@ class IconImporter(object):
try:
image_file.seek(0)
header = struct.unpack('<3H', image_file.read(6))
except Exception, e:
except Exception as e:
return
# Check magic
@ -136,9 +136,9 @@ class IconImporter(object):
# Collect icon directories
directories = []
for i in xrange(header[2]):
for i in range(header[2]):
directory = list(struct.unpack('<4B2H2I', image_file.read(16)))
for j in xrange(3):
for j in range(3):
if not directory[j]:
directory[j] = 256
@ -222,9 +222,9 @@ class IconImporter(object):
requests.models.InvalidURL,
requests.models.ChunkedEncodingError,
requests.models.ContentDecodingError,
httplib.IncompleteRead,
http.client.IncompleteRead,
LocationParseError, OpenSSLError, PyAsn1Error,
ValueError), e:
ValueError) as e:
logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)
if url:
image, image_file = self.get_image_from_url(url)
@ -244,7 +244,7 @@ class IconImporter(object):
url = self.feed_icon.icon_url
if not url and self.feed.feed_link and len(self.feed.feed_link) > 6:
try:
url = urlparse.urljoin(self.feed.feed_link, 'favicon.ico')
url = urllib.parse.urljoin(self.feed.feed_link, 'favicon.ico')
except ValueError:
url = None
if not url:
@ -252,7 +252,7 @@ class IconImporter(object):
image, image_file = self.get_image_from_url(url)
if not image:
url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
url = urllib.parse.urljoin(self.feed.feed_link, '/favicon.ico')
image, image_file = self.get_image_from_url(url)
# print 'Found: %s - %s' % (url, image)
return image, image_file, url
@ -262,7 +262,7 @@ class IconImporter(object):
url = facebook_fetcher.favicon_url()
image, image_file = self.get_image_from_url(url)
if not image:
url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
url = urllib.parse.urljoin(self.feed.feed_link, '/favicon.ico')
image, image_file = self.get_image_from_url(url)
# print 'Found: %s - %s' % (url, image)
return image, image_file, url
@ -288,8 +288,8 @@ class IconImporter(object):
'Accept': 'image/png,image/x-icon,image/*;q=0.9,*/*;q=0.8'
}
try:
request = urllib2.Request(url, headers=headers)
icon = urllib2.urlopen(request).read()
request = urllib.request.Request(url, headers=headers)
icon = urllib.request.urlopen(request).read()
except Exception:
return None
return icon
@ -311,7 +311,7 @@ class IconImporter(object):
if not content:
return url
try:
if isinstance(content, unicode):
if isinstance(content, str):
content = content.encode('utf-8')
icon_path = lxml.html.fromstring(content).xpath(
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
@ -323,7 +323,7 @@ class IconImporter(object):
if str(icon_path[0]).startswith('http'):
url = icon_path[0]
else:
url = urlparse.urljoin(self.feed.feed_link, icon_path[0])
url = urllib.parse.urljoin(self.feed.feed_link, icon_path[0])
return url
def normalize_image(self, image):

View file

@ -37,12 +37,12 @@ class Command(BaseCommand):
usersubs = UserSubscription.objects.filter(user=u, active=True)
else:
usersubs = UserSubscription.objects.filter(user=u, needs_unread_recalc=True)
print " ---> %s has %s feeds (%s/%s)" % (u.username, usersubs.count(), i+1, user_count)
print(" ---> %s has %s feeds (%s/%s)" % (u.username, usersubs.count(), i+1, user_count))
for sub in usersubs:
try:
sub.calculate_feed_scores(silent=options['silent'])
except Exception, e:
print " ***> Exception: %s" % e
except Exception as e:
print(" ***> Exception: %s" % e)
continue
def daemonize():
@ -56,12 +56,12 @@ def daemonize():
os.setsid()
if os.fork(): # launch child and...
os._exit(0) # kill off parent again.
os.umask(077)
os.umask(0o77)
null = os.open("/dev/null", os.O_RDWR)
for i in range(3):
try:
os.dup2(null, i)
except OSError, e:
except OSError as e:
if e.errno != errno.EBADF:
raise
os.close(null)

View file

@ -20,4 +20,4 @@ class Command(BaseCommand):
for feed in feeds:
feed.count_stories(verbose=options['verbose'])
print "\nCounted %s feeds" % feeds.count()
print("\nCounted %s feeds" % feeds.count())

View file

@ -19,16 +19,16 @@ class Command(BaseCommand):
feeds_count = feeds.count()
for i in xrange(0, feeds_count, 100):
for i in range(0, feeds_count, 100):
feeds = Feed.objects.all()[i:i+100]
for feed in feeds.iterator():
feed.count_subscribers(verbose=options['verbose'])
if options['delete']:
print "# Deleting old feeds..."
print("# Deleting old feeds...")
old_feeds = Feed.objects.filter(num_subscribers=0)
for feed in old_feeds:
feed.count_subscribers(verbose=True)
if feed.num_subscribers == 0:
print ' ---> Deleting: [%s] %s' % (feed.pk, feed)
print(' ---> Deleting: [%s] %s' % (feed.pk, feed))
feed.delete()

View file

@ -16,7 +16,7 @@ class Command(BaseCommand):
elif options['username']:
user = User.objects.get(username__icontains=options['username'])
else:
raise Exception, "Need username or user id."
raise Exception("Need username or user id.")
user.profile.last_seen_on = datetime.datetime.utcnow()
user.profile.save()

View file

@ -39,10 +39,10 @@ class Command(BaseCommand):
feeds = Feed.objects.filter(next_scheduled_update__lte=now,
average_stories_per_month__lt=options['skip'],
active=True)
print " ---> Skipping %s feeds" % feeds.count()
print(" ---> Skipping %s feeds" % feeds.count())
for feed in feeds:
feed.set_next_scheduled_update()
print '.',
print('.', end=' ')
return
socket.setdefaulttimeout(options['timeout'])
@ -82,5 +82,5 @@ class Command(BaseCommand):
django.db.connection.close()
print " ---> Fetching %s feeds..." % feeds.count()
print(" ---> Fetching %s feeds..." % feeds.count())
disp.run_jobs()

View file

@ -56,11 +56,11 @@ class Command(BaseCommand):
execution_time = time.time() - starttime
raw_sql = self.db.ops.last_executed_query(self.cursor, sql, params)
if sqlparse:
print(sqlparse.format(raw_sql, reindent=True))
print((sqlparse.format(raw_sql, reindent=True)))
else:
print(raw_sql)
print("")
print('Execution time: %.6fs [Database: %s]' % (execution_time, self.db.alias))
print(('Execution time: %.6fs [Database: %s]' % (execution_time, self.db.alias)))
print("")
util.CursorDebugWrapper = PrintQueryWrapper
@ -154,7 +154,7 @@ class Command(BaseCommand):
except ImportError:
import traceback
traceback.print_exc()
print(self.style.ERROR("Could not load '%s' Python environment." % SETTINGS_SHELL_PLUS))
print((self.style.ERROR("Could not load '%s' Python environment." % SETTINGS_SHELL_PLUS)))
else:
for shell_name, func in shells:
try:
@ -166,5 +166,5 @@ class Command(BaseCommand):
else:
import traceback
traceback.print_exc()
print(self.style.ERROR("Could not load any interactive Python environment."))
print((self.style.ERROR("Could not load any interactive Python environment.")))

View file

@ -4,4 +4,4 @@ import redis
from apps.social.models import *
r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
print "Redis: %s" % r
print("Redis: %s" % r)

View file

@ -21,7 +21,7 @@ def import_objects(options, style):
model_aliases = getattr(settings, 'SHELL_PLUS_MODEL_ALIASES', {})
for app_mod in apps.app_configs.items():
for app_mod in list(apps.app_configs.items()):
app_models = apps.get_models(app_mod)
if not app_models:
continue
@ -50,9 +50,9 @@ def import_objects(options, style):
except AttributeError as e:
if not quiet_load:
print(style.ERROR("Failed to import '%s' from '%s' reason: %s" % (model.__name__, app_name, str(e))))
print((style.ERROR("Failed to import '%s' from '%s' reason: %s" % (model.__name__, app_name, str(e)))))
continue
if not quiet_load:
print(style.SQL_COLTYPE("From '%s' autoload: %s" % (app_mod.__name__.split('.')[-2], ", ".join(model_labels))))
print((style.SQL_COLTYPE("From '%s' autoload: %s" % (app_mod.__name__.split('.')[-2], ", ".join(model_labels)))))
return imported_objects

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
import utils.fields

View file

@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
import utils.fields
class Migration(migrations.Migration):
dependencies = [
]
operations = [
migrations.CreateModel(
name='DuplicateFeed',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('duplicate_address', models.CharField(max_length=764, db_index=True)),
('duplicate_link', models.CharField(max_length=764, null=True, db_index=True)),
('duplicate_feed_id', models.CharField(max_length=255, null=True, db_index=True)),
],
options={
},
bases=(models.Model,),
),
migrations.CreateModel(
name='Feed',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('feed_address', models.URLField(max_length=764, db_index=True)),
('feed_address_locked', models.NullBooleanField(default=False)),
('feed_link', models.URLField(default=b'', max_length=1000, null=True, blank=True)),
('feed_link_locked', models.BooleanField(default=False)),
('hash_address_and_link', models.CharField(unique=True, max_length=64)),
('feed_title', models.CharField(default=b'[Untitled]', max_length=255, null=True, blank=True)),
('is_push', models.NullBooleanField(default=False)),
('active', models.BooleanField(default=True, db_index=True)),
('num_subscribers', models.IntegerField(default=-1)),
('active_subscribers', models.IntegerField(default=-1, db_index=True)),
('premium_subscribers', models.IntegerField(default=-1)),
('active_premium_subscribers', models.IntegerField(default=-1)),
('last_update', models.DateTimeField(db_index=True)),
('next_scheduled_update', models.DateTimeField()),
('last_story_date', models.DateTimeField(null=True, blank=True)),
('fetched_once', models.BooleanField(default=False)),
('known_good', models.BooleanField(default=False)),
('has_feed_exception', models.BooleanField(default=False, db_index=True)),
('has_page_exception', models.BooleanField(default=False, db_index=True)),
('has_page', models.BooleanField(default=True)),
('exception_code', models.IntegerField(default=0)),
('errors_since_good', models.IntegerField(default=0)),
('min_to_decay', models.IntegerField(default=0)),
('days_to_trim', models.IntegerField(default=90)),
('creation', models.DateField(auto_now_add=True)),
('etag', models.CharField(max_length=255, null=True, blank=True)),
('last_modified', models.DateTimeField(null=True, blank=True)),
('stories_last_month', models.IntegerField(default=0)),
('average_stories_per_month', models.IntegerField(default=0)),
('last_load_time', models.IntegerField(default=0)),
('favicon_color', models.CharField(max_length=6, null=True, blank=True)),
('favicon_not_found', models.BooleanField(default=False)),
('s3_page', models.NullBooleanField(default=False)),
('s3_icon', models.NullBooleanField(default=False)),
('search_indexed', models.NullBooleanField(default=None)),
('branch_from_feed', models.ForeignKey(blank=True, to='rss_feeds.Feed', null=True)),
],
options={
'ordering': ['feed_title'],
'db_table': 'feeds',
},
bases=(models.Model,),
),
migrations.CreateModel(
name='FeedData',
fields=[
('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
('feed_tagline', models.CharField(max_length=1024, null=True, blank=True)),
('story_count_history', models.TextField(null=True, blank=True)),
('feed_classifier_counts', models.TextField(null=True, blank=True)),
('popular_tags', models.CharField(max_length=1024, null=True, blank=True)),
('popular_authors', models.CharField(max_length=2048, null=True, blank=True)),
('feed', utils.fields.AutoOneToOneField(related_name=b'data', to='rss_feeds.Feed')),
],
options={
},
bases=(models.Model,),
),
migrations.AddField(
model_name='duplicatefeed',
name='feed',
field=models.ForeignKey(related_name=b'duplicate_addresses', to='rss_feeds.Feed'),
preserve_default=True,
),
]

View file

@ -10,26 +10,24 @@ import zlib
import hashlib
import redis
import pymongo
import HTMLParser
import urlparse
import html.parser
import urllib.parse
from collections import defaultdict
from operator import itemgetter
from bson.objectid import ObjectId
from BeautifulSoup import BeautifulSoup
from pyes.exceptions import NotFoundException
from bs4 import BeautifulSoup
# from nltk.collocations import TrigramCollocationFinder, BigramCollocationFinder, TrigramAssocMeasures, BigramAssocMeasures
from django.db import models
from django.db import IntegrityError
from django.conf import settings
from django.db.models.query import QuerySet
from django.db.utils import DatabaseError
from django.core.urlresolvers import reverse
from django.urls import reverse
from django.contrib.auth.models import User
from django.contrib.sites.models import Site
from django.template.defaultfilters import slugify
from django.utils.encoding import smart_str, smart_unicode
from django.utils.encoding import smart_bytes, smart_text
from mongoengine.queryset import OperationError, Q, NotUniqueError
from mongoengine.base import ValidationError
from vendor.timezones.utilities import localtime_for_timezone
from apps.rss_feeds.tasks import UpdateFeeds, PushFeeds, ScheduleCountTagsForUser
from apps.rss_feeds.text_importer import TextImporter
@ -49,7 +47,7 @@ from utils.story_functions import strip_tags, htmldiff, strip_comments, strip_co
from utils.story_functions import prep_for_search
from utils.story_functions import create_imageproxy_signed_url
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = range(4)
ENTRY_NEW, ENTRY_UPDATED, ENTRY_SAME, ENTRY_ERR = list(range(4))
class Feed(models.Model):
@ -250,7 +248,7 @@ class Feed(models.Model):
try:
super(Feed, self).save(*args, **kwargs)
except IntegrityError, e:
except IntegrityError as e:
logging.debug(" ---> ~FRFeed save collision (%s), checking dupe hash..." % e)
feed_address = self.feed_address or ""
feed_link = self.feed_link or ""
@ -274,7 +272,7 @@ class Feed(models.Model):
return feed
else:
logging.debug(" ---> ~FRFeed is its own dupe? %s == %s" % (self, duplicate_feeds))
except DatabaseError, e:
except DatabaseError as e:
logging.debug(" ---> ~FBFeed update failed, no change: %s / %s..." % (kwargs.get('update_fields', None), e))
pass
@ -287,7 +285,7 @@ class Feed(models.Model):
last_pk = cls.objects.latest('pk').pk
for f in xrange(offset, last_pk, 1000):
print " ---> %s / %s (%.2s%%)" % (f, last_pk, float(f)/last_pk*100)
print(f" ---> {f} / {last_pk} ({str(float(f)/last_pk*100)[:2]}%")
feeds = Feed.objects.filter(pk__in=range(f, f+1000),
active=True,
active_subscribers__gte=subscribers)\
@ -673,7 +671,7 @@ class Feed(models.Model):
try:
feed_address, feed = _1()
except TimeoutError, e:
except TimeoutError as e:
logging.debug(' ---> [%-30s] Feed address check timed out...' % (self.log_title[:30]))
self.save_feed_history(505, 'Timeout', e)
feed = self
@ -886,14 +884,14 @@ class Feed(models.Model):
if verbose:
if self.num_subscribers <= 1:
print '.',
print('.', end=' ')
else:
print "\n %s> %s subscriber%s: %s" % (
print("\n %s> %s subscriber%s: %s" % (
'-' * min(self.num_subscribers, 20),
self.num_subscribers,
'' if self.num_subscribers == 1 else 's',
self.feed_title,
),
), end=' ')
def _split_favicon_color(self):
color = self.favicon_color
@ -971,8 +969,7 @@ class Feed(models.Model):
self.save(update_fields=['stories_last_month'])
if verbose:
print " ---> %s [%s]: %s stories last month" % (self.feed_title, self.pk,
self.stories_last_month)
print(f" ---> {self.feed} [{self.pk}]: {self.stories_last_month} stories last month")
def save_feed_story_history_statistics(self, current_counts=None):
"""
@ -1037,7 +1034,7 @@ class Feed(models.Model):
for year in range(min_year, now.year+1):
for month in range(1, 12+1):
if datetime.datetime(year, month, 1) < now:
key = u'%s-%s' % (year, month)
key = '%s-%s' % (year, month)
if dates.get(key) or start:
start = True
months.append((key, dates.get(key, 0)))
@ -1083,7 +1080,7 @@ class Feed(models.Model):
scores = []
res = cls.objects(feed_id=self.pk).map_reduce(map_f, reduce_f, output='inline')
for r in res:
facet_values = dict([(k, int(v)) for k,v in r.value.iteritems()])
facet_values = dict([(k, int(v)) for k,v in r.value.items()])
facet_values[facet] = r.key
if facet_values['pos'] + facet_values['neg'] >= 1:
scores.append(facet_values)
@ -1111,7 +1108,7 @@ class Feed(models.Model):
@property
def user_agent(self):
feed_parts = urlparse.urlparse(self.feed_address)
feed_parts = urllib.parse.urlparse(self.feed_address)
if feed_parts.netloc.find('.tumblr.com') != -1:
# Certain tumblr feeds will redirect to tumblr's login page when fetching.
# A known workaround is using facebook's user agent.
@ -1146,7 +1143,7 @@ class Feed(models.Model):
def update(self, **kwargs):
try:
from utils import feed_fetcher
except ImportError, e:
except ImportError as e:
logging.info(" ***> ~BR~FRImportError: %s" % e)
return
r = redis.Redis(connection_pool=settings.REDIS_FEED_UPDATE_POOL)
@ -1170,7 +1167,7 @@ class Feed(models.Model):
}
if getattr(settings, 'TEST_DEBUG', False):
print " ---> Testing feed fetch: %s" % self.log_title
print(" ---> Testing feed fetch: %s" % self.log_title)
# options['force_fp'] = True # No, why would this be needed?
original_feed_address = self.feed_address
original_feed_link = self.feed_link
@ -1245,7 +1242,7 @@ class Feed(models.Model):
logging.debug(" ---> [%-30s] ~FBChecking ~SB%s~SN new/updated against ~SB%s~SN stories" % (
self.log_title[:30],
len(stories),
len(existing_stories.keys())))
len(list(existing_stories.keys()))))
@timelimit(2)
def _1(story, story_content, existing_stories, new_story_hashes):
existing_story, story_has_changed = self._exists_story(story, story_content,
@ -1271,7 +1268,7 @@ class Feed(models.Model):
try:
existing_story, story_has_changed = _1(story, story_content,
existing_stories, new_story_hashes)
except TimeoutError, e:
except TimeoutError as e:
logging.debug(' ---> [%-30s] ~SB~FRExisting story check timed out...' % (self.log_title[:30]))
existing_story = None
story_has_changed = False
@ -1293,7 +1290,7 @@ class Feed(models.Model):
s.save()
ret_values['new'] += 1
s.publish_to_subscribers()
except (IntegrityError, OperationError), e:
except (IntegrityError, OperationError) as e:
ret_values['error'] += 1
if settings.DEBUG:
logging.info(' ---> [%-30s] ~SN~FRIntegrityError on new story: %s - %s' % (self.feed_title[:30], story.get('guid'), e))
@ -1316,7 +1313,7 @@ class Feed(models.Model):
original_only=True)
else:
raise MStory.DoesNotExist
except (MStory.DoesNotExist, OperationError), e:
except (MStory.DoesNotExist, OperationError) as e:
ret_values['error'] += 1
if verbose:
logging.info(' ---> [%-30s] ~SN~FROperation on existing story: %s - %s' % (self.feed_title[:30], story.get('guid'), e))
@ -1331,7 +1328,7 @@ class Feed(models.Model):
# Don't mangle stories with code, just use new
story_content_diff = story_content
else:
story_content_diff = htmldiff(smart_unicode(original_content), smart_unicode(story_content))
story_content_diff = htmldiff(smart_text(original_content), smart_text(story_content))
else:
story_content_diff = original_content
# logging.debug("\t\tDiff: %s %s %s" % diff.getStats())
@ -1405,12 +1402,12 @@ class Feed(models.Model):
if not feed_tags:
all_tags = MStory.objects(story_feed_id=self.pk,
story_tags__exists=True).item_frequencies('story_tags')
feed_tags = sorted([(k, v) for k, v in all_tags.items() if int(v) > 0],
feed_tags = sorted([(k, v) for k, v in list(all_tags.items()) if int(v) > 0],
key=itemgetter(1),
reverse=True)[:25]
popular_tags = json.encode(feed_tags)
if verbose:
print "Found %s tags: %s" % (len(feed_tags), popular_tags)
print("Found %s tags: %s" % (len(feed_tags), popular_tags))
# TODO: This len() bullshit will be gone when feeds move to mongo
# On second thought, it might stay, because we don't want
@ -1423,7 +1420,7 @@ class Feed(models.Model):
return
tags_list = []
if feed_tags and isinstance(feed_tags, unicode):
if feed_tags and isinstance(feed_tags, str):
tags_list = json.decode(feed_tags)
if len(tags_list) >= 1:
self.save_popular_tags(tags_list[:-1])
@ -1433,7 +1430,7 @@ class Feed(models.Model):
authors = defaultdict(int)
for story in MStory.objects(story_feed_id=self.pk).only('story_author_name'):
authors[story.story_author_name] += 1
feed_authors = sorted([(k, v) for k, v in authors.items() if k],
feed_authors = sorted([(k, v) for k, v in list(authors.items()) if k],
key=itemgetter(1),
reverse=True)[:20]
@ -1453,9 +1450,9 @@ class Feed(models.Model):
month_ago = now - datetime.timedelta(days=settings.DAYS_OF_STORY_HASHES)
feed_count = Feed.objects.latest('pk').pk
for feed_id in xrange(start, feed_count):
for feed_id in range(start, feed_count):
if feed_id % 1000 == 0:
print "\n\n -------------------------- %s (%s deleted so far) --------------------------\n\n" % (feed_id, total)
print("\n\n -------------------------- %s (%s deleted so far) --------------------------\n\n" % (feed_id, total))
try:
feed = Feed.objects.get(pk=feed_id)
except Feed.DoesNotExist:
@ -1466,17 +1463,17 @@ class Feed(models.Model):
months_ago = int((now - feed.last_story_date).days / 30.0)
cutoff = max(1, 6 - months_ago)
if dryrun:
print " DRYRUN: %s cutoff - %s" % (cutoff, feed)
print(" DRYRUN: %s cutoff - %s" % (cutoff, feed))
else:
total += MStory.trim_feed(feed=feed, cutoff=cutoff, verbose=verbose)
else:
if dryrun:
print " DRYRUN: %s/%s cutoff - %s" % (cutoff, feed.story_cutoff, feed)
print(" DRYRUN: %s/%s cutoff - %s" % (cutoff, feed.story_cutoff, feed))
else:
total += feed.trim_feed(verbose=verbose)
print " ---> Deleted %s stories in total." % total
print(" ---> Deleted %s stories in total." % total)
@property
def story_cutoff(self):
@ -1517,7 +1514,7 @@ class Feed(models.Model):
cutoff = min(cutoff, 10)
try:
logging.debug(" ---> [%-30s] ~FBTrimming down to ~SB%s (instead of %s)~SN stories (~FM%s~FB)" % (self.log_title[:30], cutoff, original_cutoff, self.last_story_date.strftime("%Y-%m-%d") if self.last_story_date else "No last story date"))
except ValueError, e:
except ValueError as e:
logging.debug(" ***> [%-30s] Error trimming: %s" % (self.log_title[:30], e))
pass
@ -1621,7 +1618,7 @@ class Feed(models.Model):
popularity[feed_id]['ng'] = -1 * classifier['neg']
popularity[feed_id]['story_ids'].append(story_hash)
sorted_popularity = sorted(popularity.values(), key=lambda x: x['reach_score'],
sorted_popularity = sorted(list(popularity.values()), key=lambda x: x['reach_score'],
reverse=True)
# Extract story authors from feeds
@ -1671,7 +1668,7 @@ class Feed(models.Model):
author['tags'][tag]['ps'] = classifier['pos']
author['tags'][tag]['ng'] = -1 * classifier['neg']
sorted_authors = sorted(feed['authors'].values(), key=lambda x: x['count'])
sorted_authors = sorted(list(feed['authors'].values()), key=lambda x: x['count'])
feed['authors'] = sorted_authors
# pprint(sorted_popularity)
@ -1716,7 +1713,7 @@ class Feed(models.Model):
import xlsxwriter
from xlsxwriter.utility import xl_rowcol_to_cell
if isinstance(queries, unicode):
if isinstance(queries, str):
queries = [q.strip() for q in queries.split(',')]
title = 'NewsBlur-%s.xlsx' % slugify('-'.join(queries))
@ -1818,7 +1815,7 @@ class Feed(models.Model):
worksheet.write_url(row, col+4, story['url'])
worksheet.write_datetime(row, col+5, story['date'], date_format)
row += 1
for tag in author['tags'].values():
for tag in list(author['tags'].values()):
worksheet.conditional_format(row, col+7, row, col+9, {'type': 'cell',
'criteria': '==',
'value': 0,
@ -1855,7 +1852,7 @@ class Feed(models.Model):
@classmethod
def format_story(cls, story_db, feed_id=None, text=False, include_permalinks=False,
show_changes=False):
if isinstance(story_db.story_content_z, unicode):
if isinstance(story_db.story_content_z, str):
story_db.story_content_z = story_db.story_content_z.decode('base64')
story_content = ''
@ -1864,9 +1861,9 @@ class Feed(models.Model):
if (not show_changes and
hasattr(story_db, 'story_latest_content_z') and
story_db.story_latest_content_z):
latest_story_content = smart_unicode(zlib.decompress(story_db.story_latest_content_z))
latest_story_content = smart_text(zlib.decompress(story_db.story_latest_content_z))
if story_db.story_content_z:
story_content = smart_unicode(zlib.decompress(story_db.story_content_z))
story_content = smart_text(zlib.decompress(story_db.story_content_z))
if '<ins' in story_content or '<del' in story_content:
has_changes = True
@ -1934,7 +1931,7 @@ class Feed(models.Model):
signed_urls = [create_imageproxy_signed_url(settings.IMAGES_URL,
settings.IMAGES_SECRET_KEY,
url) for url in urls]
return dict(zip(urls, signed_urls))
return dict(list(zip(urls, signed_urls)))
@classmethod
def secure_image_thumbnails(cls, urls, size=192):
@ -1942,11 +1939,11 @@ class Feed(models.Model):
settings.IMAGES_SECRET_KEY,
url,
size) for url in urls]
return dict(zip(urls, signed_urls))
return dict(list(zip(urls, signed_urls)))
def get_tags(self, entry):
fcat = []
if entry.has_key('tags'):
if 'tags' in entry:
for tcat in entry.tags:
term = None
if hasattr(tcat, 'label') and tcat.label:
@ -1986,18 +1983,18 @@ class Feed(models.Model):
story_in_system = None
story_has_changed = False
story_link = self.get_permalink(story)
existing_stories_hashes = existing_stories.keys()
existing_stories_hashes = list(existing_stories.keys())
story_pub_date = story.get('published')
# story_published_now = story.get('published_now', False)
# start_date = story_pub_date - datetime.timedelta(hours=8)
# end_date = story_pub_date + datetime.timedelta(hours=8)
for existing_story in existing_stories.values():
for existing_story in list(existing_stories.values()):
content_ratio = 0
# existing_story_pub_date = existing_story.story_date
# print 'Story pub date: %s %s' % (story_published_now, story_pub_date)
if isinstance(existing_story.id, unicode):
if isinstance(existing_story.id, str):
# Correcting a MongoDB bug
existing_story.story_guid = existing_story.id
@ -2013,15 +2010,15 @@ class Feed(models.Model):
continue
if 'story_latest_content_z' in existing_story:
existing_story_content = smart_unicode(zlib.decompress(existing_story.story_latest_content_z))
existing_story_content = smart_text(zlib.decompress(existing_story.story_latest_content_z))
elif 'story_latest_content' in existing_story:
existing_story_content = existing_story.story_latest_content
elif 'story_content_z' in existing_story:
existing_story_content = smart_unicode(zlib.decompress(existing_story.story_content_z))
existing_story_content = smart_text(zlib.decompress(existing_story.story_content_z))
elif 'story_content' in existing_story:
existing_story_content = existing_story.story_content
else:
existing_story_content = u''
existing_story_content = ''
# Title distance + content distance, checking if story changed
@ -2304,7 +2301,7 @@ class FeedData(models.Model):
super(FeedData, self).save(*args, **kwargs)
except (IntegrityError, OperationError):
if hasattr(self, 'id') and self.id: self.delete()
except DatabaseError, e:
except DatabaseError as e:
# Nothing updated
logging.debug(" ---> ~FRNothing updated in FeedData (%s): %s" % (self.feed, e))
pass
@ -2337,7 +2334,7 @@ class MFeedIcon(mongo.Document):
def save(self, *args, **kwargs):
if self.icon_url:
self.icon_url = unicode(self.icon_url)
self.icon_url = str(self.icon_url)
try:
return super(MFeedIcon, self).save(*args, **kwargs)
except (IntegrityError, OperationError):
@ -2441,7 +2438,7 @@ class MStory(mongo.Document):
@property
def decoded_story_title(self):
h = HTMLParser.HTMLParser()
h = html.parser.HTMLParser()
return h.unescape(self.story_title)
def save(self, *args, **kwargs):
@ -2452,13 +2449,13 @@ class MStory(mongo.Document):
self.extract_image_urls()
if self.story_content:
self.story_content_z = zlib.compress(smart_str(self.story_content))
self.story_content_z = zlib.compress(smart_bytes(self.story_content))
self.story_content = None
if self.story_original_content:
self.story_original_content_z = zlib.compress(smart_str(self.story_original_content))
self.story_original_content_z = zlib.compress(smart_bytes(self.story_original_content))
self.story_original_content = None
if self.story_latest_content:
self.story_latest_content_z = zlib.compress(smart_str(self.story_latest_content))
self.story_latest_content_z = zlib.compress(smart_bytes(self.story_latest_content))
self.story_latest_content = None
if self.story_title and len(self.story_title) > story_title_max:
self.story_title = self.story_title[:story_title_max]
@ -2499,9 +2496,9 @@ class MStory(mongo.Document):
SearchStory.create_elasticsearch_mapping(delete=True)
last_pk = Feed.objects.latest('pk').pk
for f in xrange(offset, last_pk, 1000):
print " ---> %s / %s (%.2s%%)" % (f, last_pk, float(f)/last_pk*100)
feeds = Feed.objects.filter(pk__in=range(f, f+1000),
for f in range(offset, last_pk, 1000):
print(" ---> %s / %s (%.2s%%)" % (f, last_pk, float(f)/last_pk*100))
feeds = Feed.objects.filter(pk__in=list(range(f, f+1000)),
active=True,
active_subscribers__gte=1)\
.values_list('pk')
@ -2525,7 +2522,7 @@ class MStory(mongo.Document):
def remove_from_search_index(self):
try:
SearchStory.remove(self.story_hash)
except NotFoundException:
except Exception:
pass
@classmethod
@ -2545,14 +2542,14 @@ class MStory(mongo.Document):
if stories.count() > cutoff:
logging.debug(' ---> [%-30s] ~FMFound %s stories. Trimming to ~SB%s~SN...' %
(unicode(feed)[:30], stories.count(), cutoff))
(str(feed)[:30], stories.count(), cutoff))
try:
story_trim_date = stories[cutoff].story_date
if story_trim_date == stories[0].story_date:
# Handle case where every story is the same time
story_trim_date = story_trim_date - datetime.timedelta(seconds=1)
except IndexError, e:
logging.debug(' ***> [%-30s] ~BRError trimming feed: %s' % (unicode(feed)[:30], e))
except IndexError as e:
logging.debug(' ***> [%-30s] ~BRError trimming feed: %s' % (str(feed)[:30], e))
return extra_stories_count
extra_stories = cls.objects(story_feed_id=feed_id,
@ -2901,7 +2898,7 @@ class MStarredStory(mongo.DynamicDocument):
@classmethod
def trim_old_stories(cls, stories=10, days=90, dryrun=False):
print " ---> Fetching starred story counts..."
print(" ---> Fetching starred story counts...")
stats = settings.MONGODB.newsblur.starred_stories.aggregate([{
"$group": {
"_id": "$user_id",
@ -2915,7 +2912,7 @@ class MStarredStory(mongo.DynamicDocument):
month_ago = datetime.datetime.now() - datetime.timedelta(days=days)
user_ids = list(stats)
user_ids = sorted(user_ids, key=lambda x:x['stories'], reverse=True)
print " ---> Found %s users with more than %s starred stories" % (len(user_ids), stories)
print(" ---> Found %s users with more than %s starred stories" % (len(user_ids), stories))
total = 0
for stat in user_ids:
@ -2929,17 +2926,17 @@ class MStarredStory(mongo.DynamicDocument):
total += stat['stories']
username = "%s (%s)" % (user and user.username or " - ", stat['_id'])
print " ---> %19.19s: %-20.20s %s stories" % (user and user.profile.last_seen_on or "Deleted",
print(" ---> %19.19s: %-20.20s %s stories" % (user and user.profile.last_seen_on or "Deleted",
username,
stat['stories'])
stat['stories']))
if not dryrun and stat['_id']:
cls.objects.filter(user_id=stat['_id']).delete()
elif not dryrun and stat['_id'] == 0:
print " ---> Deleting unstarred stories (user_id = 0)"
print(" ---> Deleting unstarred stories (user_id = 0)")
cls.objects.filter(user_id=stat['_id']).delete()
print " ---> Deleted %s stories in total." % total
print(" ---> Deleted %s stories in total." % total)
@property
def guid_hash(self):
@ -3036,7 +3033,7 @@ class MStarredStoryCounts(mongo.Document):
try:
user_tags = cls.count_tags_for_user(user_id)
user_feeds = cls.count_feeds_for_user(user_id)
except pymongo.errors.OperationFailure, e:
except pymongo.errors.OperationFailure as e:
logging.debug(" ---> ~FBOperationError on mongo: ~SB%s" % e)
total_stories_count = MStarredStory.objects(user_id=user_id).count()
@ -3049,11 +3046,11 @@ class MStarredStoryCounts(mongo.Document):
def count_tags_for_user(cls, user_id):
all_tags = MStarredStory.objects(user_id=user_id,
user_tags__exists=True).item_frequencies('user_tags')
user_tags = sorted([(k, v) for k, v in all_tags.items() if int(v) > 0 and k],
user_tags = sorted([(k, v) for k, v in list(all_tags.items()) if int(v) > 0 and k],
key=lambda x: x[0].lower(),
reverse=True)
for tag, count in dict(user_tags).items():
for tag, count in list(dict(user_tags).items()):
cls.objects(user_id=user_id, tag=tag, slug=slugify(tag)).update_one(set__count=count,
upsert=True)
@ -3062,7 +3059,7 @@ class MStarredStoryCounts(mongo.Document):
@classmethod
def count_feeds_for_user(cls, user_id):
all_feeds = MStarredStory.objects(user_id=user_id).item_frequencies('story_feed_id')
user_feeds = dict([(k, v) for k, v in all_feeds.items() if v])
user_feeds = dict([(k, v) for k, v in list(all_feeds.items()) if v])
# Clean up None'd and 0'd feed_ids, so they can be counted against the total
if user_feeds.get(None, False):
@ -3074,7 +3071,7 @@ class MStarredStoryCounts(mongo.Document):
del user_feeds[0]
too_many_feeds = False if len(user_feeds) < 1000 else True
for feed_id, count in user_feeds.items():
for feed_id, count in list(user_feeds.items()):
if too_many_feeds and count <= 1: continue
cls.objects(user_id=user_id,
feed_id=feed_id,
@ -3334,7 +3331,7 @@ def merge_feeds(original_feed_id, duplicate_feed_id, force=False):
duplicate_feed_id=duplicate_feed.pk,
feed=original_feed
)
except (IntegrityError, OperationError), e:
except (IntegrityError, OperationError) as e:
logging.info(" ***> Could not save DuplicateFeed: %s" % e)
# Switch this dupe feed's dupe feeds over to the new original.
@ -3374,7 +3371,7 @@ def rewrite_folders(folders, original_feed, duplicate_feed):
else:
new_folders.append(folder)
elif isinstance(folder, dict):
for f_k, f_v in folder.items():
for f_k, f_v in list(folder.items()):
new_folders.append({f_k: rewrite_folders(f_v, original_feed, duplicate_feed)})
return new_folders

View file

@ -1,11 +1,11 @@
import requests
import re
import urlparse
import urllib.parse
import traceback
import feedparser
import time
import urllib2
import httplib
import urllib.request, urllib.error, urllib.parse
import http.client
import zlib
from mongoengine.queryset import NotUniqueError
from socket import error as SocketError
@ -86,8 +86,8 @@ class PageImporter(object):
return
elif feed_link.startswith('http'):
if urllib_fallback:
request = urllib2.Request(feed_link, headers=self.headers)
response = urllib2.urlopen(request)
request = urllib.request.Request(feed_link, headers=self.headers)
response = urllib.request.urlopen(request)
time.sleep(0.01) # Grrr, GIL.
data = response.read()
else:
@ -96,7 +96,7 @@ class PageImporter(object):
response.connection.close()
except requests.exceptions.TooManyRedirects:
response = requests.get(feed_link)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError), e:
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError) as e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
self.save_no_page()
return
@ -127,23 +127,23 @@ class PageImporter(object):
else:
self.save_no_page()
return
except (ValueError, urllib2.URLError, httplib.BadStatusLine, httplib.InvalidURL,
requests.exceptions.ConnectionError), e:
except (ValueError, urllib.error.URLError, http.client.BadStatusLine, http.client.InvalidURL,
requests.exceptions.ConnectionError) as e:
self.feed.save_page_history(401, "Bad URL", e)
fp = feedparser.parse(self.feed.feed_address)
feed_link = fp.feed.get('link', "")
self.feed.save()
logging.debug(' ***> [%-30s] Page fetch failed: %s' % (self.feed.log_title[:30], e))
except (urllib2.HTTPError), e:
except (urllib.error.HTTPError) as e:
self.feed.save_page_history(e.code, e.msg, e.fp.read())
except (httplib.IncompleteRead), e:
except (http.client.IncompleteRead) as e:
self.feed.save_page_history(500, "IncompleteRead", e)
except (requests.exceptions.RequestException,
requests.packages.urllib3.exceptions.HTTPError), e:
requests.packages.urllib3.exceptions.HTTPError) as e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
# mail_feed_error_to_admin(self.feed, e, local_vars=locals())
return self.fetch_page(urllib_fallback=True, requests_exception=e)
except Exception, e:
except Exception as e:
logging.debug('[%d] ! -------------------------' % (self.feed.id,))
tb = traceback.format_exc()
logging.debug(tb)
@ -188,10 +188,10 @@ class PageImporter(object):
try:
response = requests.get(story_permalink, headers=self.headers)
response.connection.close()
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e:
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects) as e:
try:
response = requests.get(story_permalink)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects), e:
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects) as e:
logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e))
return
try:
@ -207,7 +207,7 @@ class PageImporter(object):
if data:
data = data.replace("\xc2\xa0", " ") # Non-breaking space, is mangled when encoding is not utf-8
data = data.replace("\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
data = data.replace("\\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
html = self.rewrite_page(data)
if not html:
return
@ -231,7 +231,7 @@ class PageImporter(object):
def rewrite_page(self, response):
BASE_RE = re.compile(r'<head(.*?\>)', re.I)
base_code = u'<base href="%s" />' % (self.feed.feed_link,)
base_code = '<base href="%s" />' % (self.feed.feed_link,)
try:
html = BASE_RE.sub(r'<head\1 '+base_code, response)
except:
@ -258,9 +258,9 @@ class PageImporter(object):
url = match.group(2)
if url[0] in "\"'":
url = url.strip(url[0])
parsed = urlparse.urlparse(url)
parsed = urllib.parse.urlparse(url)
if parsed.scheme == parsed.netloc == '': #relative to domain
url = urlparse.urljoin(self.feed.feed_link, url)
url = urllib.parse.urljoin(self.feed.feed_link, url)
ret.append(document[last_end:match.start(2)])
ret.append('"%s"' % (url,))
last_end = match.end(2)

View file

@ -167,7 +167,7 @@ class UpdateFeeds(Task):
continue
try:
feed.update(**options)
except SoftTimeLimitExceeded, e:
except SoftTimeLimitExceeded as e:
feed.save_feed_history(505, 'Timeout', e)
logging.info(" ---> [%-30s] ~BR~FWTime limit hit!~SB~FR Moving on to next feed..." % feed)
if profiler_activated: profiler.process_celery_finished()

View file

@ -37,12 +37,12 @@ class FeedTest(TestCase):
feed = Feed.objects.get(feed_link__contains='gawker')
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 0)
self.assertEqual(stories.count(), 0)
feed.update(force=True)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 38)
self.assertEqual(stories.count(), 38)
management.call_command('loaddata', 'gawker2.json', verbosity=0, skip_checks=False)
@ -50,12 +50,12 @@ class FeedTest(TestCase):
# Test: 1 changed char in content
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 38)
self.assertEqual(stories.count(), 38)
url = reverse('load-single-feed', kwargs=dict(feed_id=1))
response = self.client.get(url)
feed = json.decode(response.content)
self.assertEquals(len(feed['stories']), 6)
self.assertEqual(len(feed['stories']), 6)
def test_load_feeds__gothamist(self):
self.client.login(username='conesus', password='test')
@ -63,30 +63,30 @@ class FeedTest(TestCase):
management.call_command('loaddata', 'gothamist_aug_2009_1.json', verbosity=0, skip_checks=False)
feed = Feed.objects.get(feed_link__contains='gothamist')
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 0)
self.assertEqual(stories.count(), 0)
feed.update(force=True)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 42)
self.assertEqual(stories.count(), 42)
url = reverse('load-single-feed', kwargs=dict(feed_id=4))
response = self.client.get(url)
content = json.decode(response.content)
self.assertEquals(len(content['stories']), 6)
self.assertEqual(len(content['stories']), 6)
management.call_command('loaddata', 'gothamist_aug_2009_2.json', verbosity=0, skip_checks=False)
feed.update(force=True)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 42)
self.assertEqual(stories.count(), 42)
url = reverse('load-single-feed', kwargs=dict(feed_id=4))
response = self.client.get(url)
# print [c['story_title'] for c in json.decode(response.content)]
content = json.decode(response.content)
# Test: 1 changed char in title
self.assertEquals(len(content['stories']), 6)
self.assertEqual(len(content['stories']), 6)
def test_load_feeds__slashdot(self):
self.client.login(username='conesus', password='test')
@ -97,28 +97,28 @@ class FeedTest(TestCase):
feed = Feed.objects.get(feed_link__contains='slashdot')
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 0)
self.assertEqual(stories.count(), 0)
management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False, skip_checks=False)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 38)
self.assertEqual(stories.count(), 38)
response = self.client.get(reverse('load-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds']['5']['nt'], 38)
self.assertEqual(content['feeds']['5']['nt'], 38)
self.client.post(reverse('mark-story-as-read'), {'story_id': old_story_guid, 'feed_id': 5})
response = self.client.get(reverse('refresh-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds']['5']['nt'], 37)
self.assertEqual(content['feeds']['5']['nt'], 37)
management.call_command('loaddata', 'slashdot2.json', verbosity=0, skip_checks=False)
management.call_command('refresh_feed', force=1, feed=5, single_threaded=True, daemonize=False, skip_checks=False)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 38)
self.assertEqual(stories.count(), 38)
url = reverse('load-single-feed', kwargs=dict(feed_id=5))
response = self.client.get(url)
@ -127,11 +127,11 @@ class FeedTest(TestCase):
feed = json.decode(response.content)
# Test: 1 changed char in title
self.assertEquals(len(feed['stories']), 6)
self.assertEqual(len(feed['stories']), 6)
response = self.client.get(reverse('refresh-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds']['5']['nt'], 37)
self.assertEqual(content['feeds']['5']['nt'], 37)
def test_load_feeds__motherjones(self):
self.client.login(username='conesus', password='test')
@ -140,28 +140,28 @@ class FeedTest(TestCase):
feed = Feed.objects.get(feed_link__contains='motherjones')
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 0)
self.assertEqual(stories.count(), 0)
management.call_command('refresh_feed', force=1, feed=feed.pk, single_threaded=True, daemonize=False, skip_checks=False)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 10)
self.assertEqual(stories.count(), 10)
response = self.client.get(reverse('load-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds'][str(feed.pk)]['nt'], 10)
self.assertEqual(content['feeds'][str(feed.pk)]['nt'], 10)
self.client.post(reverse('mark-story-as-read'), {'story_id': stories[0].story_guid, 'feed_id': feed.pk})
response = self.client.get(reverse('refresh-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds'][str(feed.pk)]['nt'], 9)
self.assertEqual(content['feeds'][str(feed.pk)]['nt'], 9)
management.call_command('loaddata', 'motherjones2.json', verbosity=0, skip_checks=False)
management.call_command('refresh_feed', force=1, feed=feed.pk, single_threaded=True, daemonize=False, skip_checks=False)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 10)
self.assertEqual(stories.count(), 10)
url = reverse('load-single-feed', kwargs=dict(feed_id=feed.pk))
response = self.client.get(url)
@ -170,11 +170,11 @@ class FeedTest(TestCase):
feed = json.decode(response.content)
# Test: 1 changed char in title
self.assertEquals(len(feed['stories']), 6)
self.assertEqual(len(feed['stories']), 6)
response = self.client.get(reverse('refresh-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds'][str(feed['feed_id'])]['nt'], 9)
self.assertEqual(content['feeds'][str(feed['feed_id'])]['nt'], 9)
def test_load_feeds__google(self):
# Freezegun the date to 2017-04-30
@ -183,33 +183,33 @@ class FeedTest(TestCase):
old_story_guid = "blog.google:443/topics/inside-google/google-earths-incredible-3d-imagery-explained/"
management.call_command('loaddata', 'google1.json', verbosity=1, skip_checks=False)
print Feed.objects.all()
print(Feed.objects.all())
feed = Feed.objects.get(pk=766)
print " Testing test_load_feeds__google: %s" % feed
print(" Testing test_load_feeds__google: %s" % feed)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 0)
self.assertEqual(stories.count(), 0)
management.call_command('refresh_feed', force=False, feed=766, single_threaded=True, daemonize=False, skip_checks=False)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 20)
self.assertEqual(stories.count(), 20)
response = self.client.get(reverse('load-feeds')+"?update_counts=true")
content = json.decode(response.content)
self.assertEquals(content['feeds']['766']['nt'], 20)
self.assertEqual(content['feeds']['766']['nt'], 20)
old_story = MStory.objects.get(story_feed_id=feed.pk, story_guid__contains=old_story_guid)
self.client.post(reverse('mark-story-hashes-as-read'), {'story_hash': old_story.story_hash})
response = self.client.get(reverse('refresh-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds']['766']['nt'], 19)
self.assertEqual(content['feeds']['766']['nt'], 19)
management.call_command('loaddata', 'google2.json', verbosity=1, skip_checks=False)
management.call_command('refresh_feed', force=False, feed=766, single_threaded=True, daemonize=False, skip_checks=False)
stories = MStory.objects(story_feed_id=feed.pk)
self.assertEquals(stories.count(), 20)
self.assertEqual(stories.count(), 20)
url = reverse('load-single-feed', kwargs=dict(feed_id=766))
response = self.client.get(url)
@ -218,11 +218,11 @@ class FeedTest(TestCase):
feed = json.decode(response.content)
# Test: 1 changed char in title
self.assertEquals(len(feed['stories']), 6)
self.assertEqual(len(feed['stories']), 6)
response = self.client.get(reverse('refresh-feeds'))
content = json.decode(response.content)
self.assertEquals(content['feeds']['766']['nt'], 19)
self.assertEqual(content['feeds']['766']['nt'], 19)
def test_load_feeds__brokelyn__invalid_xml(self):
self.client.login(username='conesus', password='test')
@ -237,7 +237,7 @@ class FeedTest(TestCase):
feed = json.decode(response.content)
# Test: 1 changed char in title
self.assertEquals(len(feed['stories']), 6)
self.assertEqual(len(feed['stories']), 6)
def test_all_feeds(self):
pass

View file

@ -13,8 +13,8 @@ from OpenSSL.SSL import Error as OpenSSLError
from pyasn1.error import PyAsn1Error
from django.utils.encoding import smart_str
from django.conf import settings
from BeautifulSoup import BeautifulSoup
from urlparse import urljoin
from bs4 import BeautifulSoup
from urllib.parse import urljoin
BROKEN_URLS = [
"gamespot.com",
@ -122,13 +122,13 @@ class TextImporter:
if text:
text = text.replace("\xc2\xa0", " ") # Non-breaking space, is mangled when encoding is not utf-8
text = text.replace("\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
text = text.replace("\\u00a0", " ") # Non-breaking space, is mangled when encoding is not utf-8
original_text_doc = readability.Document(text, url=resp.url,
positive_keywords="post, entry, postProp, article, postContent, postField")
try:
content = original_text_doc.summary(html_partial=True)
except (readability.Unparseable, ParserError), e:
except (readability.Unparseable, ParserError) as e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
return
@ -151,7 +151,7 @@ class TextImporter:
self.story.original_text_z = zlib.compress(smart_str(content))
try:
self.story.save()
except NotUniqueError, e:
except NotUniqueError as e:
logging.user(self.request, ("~SN~FYFetched ~FGoriginal text~FY: %s" % (e)), warn_color=False)
pass
logging.user(self.request, ("~SN~FYFetched ~FGoriginal text~FY: now ~SB%s bytes~SN vs. was ~SB%s bytes" % (
@ -179,7 +179,7 @@ class TextImporter:
if len(noscript.contents) > 0:
noscript.replaceWith(noscript.contents[0])
content = unicode(soup)
content = str(soup)
images = set([img['src'] for img in soup.findAll('img') if 'src' in img])
for image_url in images:
@ -212,7 +212,7 @@ class TextImporter:
requests.models.ChunkedEncodingError,
requests.models.ContentDecodingError,
urllib3.exceptions.LocationValueError,
LocationParseError, OpenSSLError, PyAsn1Error), e:
LocationParseError, OpenSSLError, PyAsn1Error) as e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)
return
return r

View file

@ -1,5 +1,5 @@
import datetime
from urlparse import urlparse
from urllib.parse import urlparse
from utils import log as logging
from django.shortcuts import get_object_or_404, render
from django.views.decorators.http import condition
@ -235,7 +235,7 @@ def assemble_statistics(user, feed_id):
localoffset = timezone.utcoffset(datetime.datetime.utcnow())
hours_offset = int(localoffset.total_seconds() / 3600)
rotated_hours = {}
for hour, value in stats['story_hours_history'].items():
for hour, value in list(stats['story_hours_history'].items()):
rotated_hours[str(int(hour)+hours_offset)] = value
stats['story_hours_history'] = rotated_hours