Merge branch 'django3.0' into django3.1

* django3.0: (27 commits)
  Removing log override
  Moving logging over to the newsblur log.
  Fixing search indexer background task for new celery.
  Attempting to add gunicorn errors to console/log.
  Better handling of missing subs.
  Handling missing user sub on feed delete.
  Correct encoding for strings on systems that don't have utf-8 as default encoding.
  Writing in the real urllib3 dependency for requests.
  Upgrading requests due to urllib3 incompatibility.
  Login required should use the next parameter.
  Upgrading django oauth toolkit for django 1.11.
  Handling newsletters with multiple recipients.
  Extracting image urls sometimes fails.
  Handling ajax errors in json views.
  Adding timeouts to most outbound requests.
  Sentry SDK 0.19.4.
  Removing imperfect proxy warning for every story.
  Found four more GET/POST crosses.
  Feed unread count may need a POST.
  Namespacing settings.
  ...
This commit is contained in:
Samuel Clay 2020-12-08 09:10:51 -05:00
commit aea4fcbe5b
31 changed files with 115 additions and 71 deletions

View file

@ -98,7 +98,7 @@ def login(request):
return render(request, 'accounts/login.html', {
'form': form,
'next': request.POST.get('next', "")
'next': request.POST.get('next', "") or request.GET.get('next', "")
})
@csrf_exempt
@ -716,5 +716,6 @@ def ios_subscription_status(request):
}
def trigger_error(request):
logging.user(request.user, "~BR~FW~SBTriggering divison by zero")
division_by_zero = 1 / 0
return HttpResponseRedirect(reverse('index'))

View file

@ -1431,15 +1431,16 @@ class UserSubscriptionFolders(models.Model):
self.save()
if not multiples_found and deleted and commit_delete:
user_sub = None
try:
user_sub = UserSubscription.objects.get(user=self.user, feed=feed_id)
except Feed.DoesNotExist:
except (Feed.DoesNotExist, UserSubscription.DoesNotExist):
duplicate_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
if duplicate_feed:
try:
user_sub = UserSubscription.objects.get(user=self.user,
feed=duplicate_feed[0].feed)
except Feed.DoesNotExist:
except (Feed.DoesNotExist, UserSubscription.DoesNotExist):
return
if user_sub:
user_sub.delete()

View file

@ -559,9 +559,11 @@ def interactions_count(request):
@ajax_login_required
@json.json_view
def feed_unread_count(request):
get_post = getattr(request, request.method)
start = time.time()
user = request.user
feed_ids = request.GET.getlist('feed_id') or request.GET.getlist('feed_id[]')
feed_ids = get_post.getlist('feed_id') or get_post.getlist('feed_id[]')
force = request.GET.get('force', False)
social_feed_ids = [feed_id for feed_id in feed_ids if 'social:' in feed_id]
feed_ids = list(set(feed_ids) - set(social_feed_ids))

View file

@ -215,7 +215,7 @@ class IconImporter(object):
url = self._url_from_html(content)
if not url:
try:
content = requests.get(self.cleaned_feed_link).content
content = requests.get(self.cleaned_feed_link, timeout=10).content
url = self._url_from_html(content)
except (AttributeError, SocketError, requests.ConnectionError,
requests.models.MissingSchema, requests.sessions.InvalidSchema,
@ -224,6 +224,7 @@ class IconImporter(object):
requests.models.ChunkedEncodingError,
requests.models.ContentDecodingError,
http.client.IncompleteRead,
requests.adapters.ReadTimeout,
LocationParseError, OpenSSLError, PyAsn1Error,
ValueError) as e:
logging.debug(" ---> ~SN~FRFailed~FY to fetch ~FGfeed icon~FY: %s" % e)

View file

@ -409,6 +409,10 @@ class Feed(models.Model):
def favicon_fetching(self):
return bool(not (self.favicon_not_found or self.favicon_color))
@classmethod
def get_feed_by_url(self, *args, **kwargs):
return self.get_feed_from_url(*args, **kwargs)
@classmethod
def get_feed_from_url(cls, url, create=True, aggressive=False, fetch=True, offset=0, user=None, interactive=False):
feed = None
@ -416,7 +420,10 @@ class Feed(models.Model):
original_url = url
if url and url.startswith('newsletter:'):
return cls.objects.get(feed_address=url)
try:
return cls.objects.get(feed_address=url)
except cls.MultipleObjectsReturned:
return cls.objects.filter(feed_address=url)[0]
if url and re.match('(https?://)?twitter.com/\w+/?', url):
without_rss = True
if url and re.match(r'(https?://)?(www\.)?facebook.com/\w+/?$', url):
@ -2797,6 +2804,8 @@ class MStory(mongo.Document):
if len(image_urls):
self.image_urls = [u for u in image_urls if u]
else:
return
max_length = MStory.image_urls.field.max_length
while len(''.join(self.image_urls)) > max_length:

View file

@ -90,11 +90,12 @@ class PageImporter(object):
data = response.read()
else:
try:
response = requests.get(feed_link, headers=self.headers)
response = requests.get(feed_link, headers=self.headers, timeout=10)
response.connection.close()
except requests.exceptions.TooManyRedirects:
response = requests.get(feed_link)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError) as e:
response = requests.get(feed_link, timeout=10)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, TypeError,
requests.adapters.ReadTimeout) as e:
logging.debug(' ***> [%-30s] Page fetch failed using requests: %s' % (self.feed.log_title[:30], e))
self.save_no_page()
return
@ -184,12 +185,18 @@ class PageImporter(object):
return
try:
response = requests.get(story_permalink, headers=self.headers)
response = requests.get(story_permalink, headers=self.headers, timeout=10)
response.connection.close()
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects) as e:
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error,
requests.exceptions.ConnectionError,
requests.exceptions.TooManyRedirects,
requests.adapters.ReadTimeout) as e:
try:
response = requests.get(story_permalink)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects) as e:
response = requests.get(story_permalink, timeout=10)
except (AttributeError, SocketError, OpenSSLError, PyAsn1Error,
requests.exceptions.ConnectionError,
requests.exceptions.TooManyRedirects,
requests.adapters.ReadTimeout) as e:
logging.debug(' ***> [%-30s] Original story fetch failed using requests: %s' % (self.feed.log_title[:30], e))
return
try:

View file

@ -201,7 +201,7 @@ class TextImporter:
url = "https://www.newsblur.com/rss_feeds/original_text_fetcher?url=%s" % url
try:
r = requests.get(url, headers=headers)
r = requests.get(url, headers=headers, verify=False, timeout=15)
r.connection.close()
except (AttributeError, SocketError, requests.ConnectionError,
requests.models.MissingSchema, requests.sessions.InvalidSchema,
@ -209,6 +209,7 @@ class TextImporter:
requests.models.InvalidURL,
requests.models.ChunkedEncodingError,
requests.models.ContentDecodingError,
requests.adapters.ReadTimeout,
urllib3.exceptions.LocationValueError,
LocationParseError, OpenSSLError, PyAsn1Error) as e:
logging.user(self.request, "~SN~FRFailed~FY to fetch ~FGoriginal text~FY: %s" % e)

View file

@ -544,7 +544,7 @@ def original_text(request):
'failed': not original_text or len(original_text) < 100,
}
@required_params('story_hash')
@required_params('story_hash', method="GET")
def original_story(request):
story_hash = request.GET.get('story_hash')
force = request.GET.get('force', False)
@ -561,7 +561,7 @@ def original_story(request):
return HttpResponse(original_page or "")
@required_params('story_hash')
@required_params('story_hash', method="GET")
@json.json_view
def story_changes(request):
story_hash = request.GET.get('story_hash', None)

View file

@ -78,7 +78,7 @@ class MUserSearch(mongo.Document):
logging.user(user, "~FCIndexing ~SB%s feeds~SN in %s chunks..." %
(total, len(feed_id_chunks)))
tasks = [IndexSubscriptionsChunkForSearch().s(feed_ids=feed_id_chunk,
tasks = [IndexSubscriptionsChunkForSearch.s(feed_ids=feed_id_chunk,
user_id=self.user_id
).set(queue='search_indexer')
for feed_id_chunk in feed_id_chunks]

View file

@ -1,4 +1,5 @@
from newsblur.celeryapp import app
from utils import log as logging
@app.task()
def IndexSubscriptionsForSearch(user_id):
@ -9,6 +10,7 @@ def IndexSubscriptionsForSearch(user_id):
@app.task()
def IndexSubscriptionsChunkForSearch(feed_ids, user_id):
logging.debug(" ---> Indexing: %s for %s" % (feed_ids, user_id))
from apps.search.models import MUserSearch
user_search = MUserSearch.get_user(user_id)

View file

@ -2332,7 +2332,7 @@ class MSharedStory(mongo.DynamicDocument):
for image_source in self.image_urls[:10]:
if any(ignore in image_source for ignore in IGNORE_IMAGE_SOURCES):
continue
req = requests.get(image_source, headers=headers, stream=True)
req = requests.get(image_source, headers=headers, stream=True, timeout=10)
try:
datastream = BytesIO(req.content)
width, height = ImageOps.image_size(datastream)
@ -2717,7 +2717,7 @@ class MSocialServices(mongo.Document):
os.remove(filename)
else:
api.update_status(status=message)
except tweepy.TweepError as e:
except (tweepy.TweepError, requests.adapters.ReadError) as e:
user = User.objects.get(pk=self.user_id)
logging.user(user, "~FRTwitter error: ~SB%s" % e)
return
@ -2732,7 +2732,7 @@ class MSocialServices(mongo.Document):
url = shared_story.image_urls[0]
image_filename = os.path.basename(urllib.parse.urlparse(url).path)
req = requests.get(url, stream=True)
req = requests.get(url, stream=True, timeout=10)
filename = "/tmp/%s-%s" % (shared_story.story_hash, image_filename)
if req.status_code == 200:

View file

@ -507,7 +507,7 @@ def load_social_page(request, user_id, username=None, **kwargs):
return render(request, template, params)
@required_params('story_id', feed_id=int)
@required_params('story_id', feed_id=int, method="GET")
def story_public_comments(request):
format = request.GET.get('format', 'json')
relative_user_id = request.GET.get('user_id', None)
@ -1372,7 +1372,7 @@ def shared_stories_rss_feed(request, user_id, username):
))
return HttpResponse(rss.writeString('utf-8'), content_type='application/rss+xml')
@required_params('user_id')
@required_params('user_id', method="GET")
@json.json_view
def social_feed_trainer(request):
social_user_id = request.GET['user_id']

View file

@ -1,7 +1,7 @@
from newsblur.celeryapp import app
from apps.statistics.models import MStatistics
from apps.statistics.models import MFeedback
# from utils import log as logging
from utils import log as logging

View file

@ -2,7 +2,7 @@
# Plugin to monitor pg_stat_activity
#
# Copyright Dalibo <cedric.villemain@dalibo.com> 2007
# Based on a plugin (postgres_block_read_) from Björn Ruberg <bjorn@linpro.no>
# Based on a plugin (postgres_block_read_) from Bj<EFBFBD>rn Ruberg <bjorn@linpro.no>
#
# Licenced under GPL v2.
#
@ -131,8 +131,8 @@ else {
unless($dbh) {
die ("no Unable to access Database $dbname on host $dbhost as user $dbuser.\nError returned was: ". $DBI::errstr."\n");
}
my $sql = "select count(*), waiting from pg_stat_activity ";
$sql .= " where datname = ? group by waiting ";
my $sql = "select count(*), wait_event from pg_stat_activity ";
$sql .= " where datname = ? group by wait_event ";
print "# $sql\n" if $debug;
my $sth = $dbh->prepare($sql);
$sth->execute($dbname);

View file

@ -63,3 +63,4 @@ subdomains==3.0.1
tweepy==3.9.0
# -e git://github.com/tweepy/tweepy.git#egg=tweepy
xlsxwriter==1.3.7
urllib3==1.26.2

View file

@ -1,7 +1,6 @@
[program:celerybeat]
command=celery -A newsblur beat --schedule=/srv/newsblur/data/celerybeat-schedule.db --loglevel=INFO
command=/srv/newsblur/venv/newsblur3/bin/celery -A newsblur beat --schedule=/srv/newsblur/data/celerybeat-schedule.db --loglevel=INFO
directory=/srv/newsblur
environment=PATH="/srv/newsblur/venv/newsblur3/bin"
user=sclay
numprocs=1
stdout_logfile=/var/log/celerybeat.log

View file

@ -1,7 +1,6 @@
[program:celeryd_beat]
command=celery -A newsblur worker --loglevel=INFO -Q beat_tasks -c 3
command=/srv/newsblur/venv/newsblur3/bin/celery -A newsblur worker --loglevel=INFO -Q beat_tasks -c 3
directory=/srv/newsblur
environment=PATH="/srv/newsblur/venv/newsblur3/bin"
user=sclay
numprocs=1
stdout_logfile=/var/log/celeryd_beat.log

View file

@ -1,7 +1,6 @@
[program:celeryd_beat_feeds]
command=celery -A newsblur worker --loglevel=INFO -Q beat_feeds_task -c 1
command=/srv/newsblur/venv/newsblur3/bin/celery -A newsblur worker --loglevel=INFO -Q beat_feeds_task -c 1
directory=/srv/newsblur
environment=PATH="/srv/newsblur/venv/newsblur3/bin"
user=sclay
numprocs=1
stdout_logfile=/var/log/celeryd_beat_feeds.log

View file

@ -1,7 +1,6 @@
[program:celery]
command=celery -A newsblur worker --loglevel=INFO -Q new_feeds,push_feeds
command=/srv/newsblur/venv/newsblur3/bin/celery -A newsblur worker --loglevel=INFO -Q new_feeds,push_feeds
directory=/srv/newsblur
environment=PATH="/srv/newsblur/venv/newsblur3/bin"
user=sclay
numprocs=1
stdout_logfile=/var/log/celeryd.log

View file

@ -1,7 +1,6 @@
[program:celeryd_work_queue]
command=celery -A newsblur worker --loglevel=INFO -Q work_queue
command=/srv/newsblur/venv/newsblur3/bin/celery -A newsblur worker --loglevel=INFO -Q work_queue
directory=/srv/newsblur
environment=PATH="/srv/newsblur/venv/newsblur3/bin"
user=sclay
numprocs=1
stdout_logfile=/var/log/celeryd_workqueue.log

View file

@ -4,6 +4,7 @@ directory=/srv/newsblur
user=sclay
autostart=true
autorestart=true
#redirect_stderr=True
# redirect_stderr=True
# stdout_logfile=/srv/newsblur/logs/newsblur.log
priority=991
stopsignal=HUP

50
fabfile.py vendored
View file

@ -140,7 +140,8 @@ def assign_digitalocean_roledefs(split=False):
return droplets
def app():
web()
assign_digitalocean_roledefs()
env.roles = ['app']
def web():
assign_digitalocean_roledefs()
@ -148,7 +149,7 @@ def web():
def work():
assign_digitalocean_roledefs()
env.roles = ['work', 'search']
env.roles = ['work']
def www():
assign_digitalocean_roledefs()
@ -430,7 +431,7 @@ def setup_repo():
def setup_repo_local_settings():
with virtualenv():
run('cp local_settings.py.template local_settings.py')
run('cp newsblur/local_settings.py.template newsblur/local_settings.py')
run('mkdir -p logs')
run('touch logs/newsblur.log')
@ -494,20 +495,24 @@ def setup_pip():
@parallel
def pip():
role = role_for_host()
pull()
with virtualenv():
with settings(warn_only=True):
sudo('fallocate -l 4G /swapfile')
sudo('chmod 600 /swapfile')
sudo('mkswap /swapfile')
sudo('swapon /swapfile')
if role == "task":
with settings(warn_only=True):
sudo('fallocate -l 4G /swapfile')
sudo('chmod 600 /swapfile')
sudo('mkswap /swapfile')
sudo('swapon /swapfile')
sudo('chown %s.%s -R %s' % (env.user, env.user, os.path.join(env.NEWSBLUR_PATH, 'venv')))
run('easy_install -U pip')
run('pip install --upgrade pip')
run('pip install --upgrade setuptools')
run('pip install -r requirements.txt')
with settings(warn_only=True):
sudo('swapoff /swapfile')
if role == "task":
with settings(warn_only=True):
sudo('swapoff /swapfile')
def solo_pip(role):
if role == "app":
@ -751,7 +756,7 @@ def setup_gunicorn(supervisor=True, restart=True):
put('config/supervisor_gunicorn.conf', '/etc/supervisor/conf.d/gunicorn.conf', use_sudo=True)
sudo('supervisorctl reread')
if restart:
restart_gunicorn()
sudo('supervisorctl update')
# with cd(env.VENDOR_PATH):
# sudo('rm -fr gunicorn')
# run('git clone git://github.com/benoitc/gunicorn.git')
@ -961,7 +966,10 @@ def build_haproxy():
f.write(haproxy_template)
f.close()
def upgrade_django(role):
def upgrade_django(role=None):
if not role:
role = role_for_host()
with virtualenv(), settings(warn_only=True):
sudo('sudo dpkg --configure -a')
setup_supervisor()
@ -972,15 +980,24 @@ def upgrade_django(role):
run('./utils/kill_celery.sh')
copy_task_settings()
enable_celery_supervisor(update=False)
elif role == "app":
elif role == "work":
copy_app_settings()
enable_celerybeat()
elif role == "web" or role == "app":
sudo('supervisorctl stop gunicorn')
run('./utils/kill_gunicorn.sh')
copy_app_settings()
setup_gunicorn(restart=False)
elif role == "node":
copy_app_settings()
config_node(full=True)
else:
copy_task_settings()
pip()
clean()
sudo('reboot')
# sudo('reboot')
def clean():
with virtualenv(), settings(warn_only=True):
@ -1634,9 +1651,12 @@ def setup_ec2():
# ==========
@parallel
def pull():
def pull(master=False):
with virtualenv():
run('git pull')
if master:
run('git checkout master')
run('git pull')
def pre_deploy():
compress_assets(bundle=True)

View file

@ -103,9 +103,9 @@ NEWSBLUR.Views.StoryTabView = Backbone.View.extend({
var correct = this.$iframe.contents().find('body').children().length;
console.log(['correct?', this.$iframe.contents(), this.$iframe.contents().find('body').children().length]);
if (correct && this.flags.proxied_https) {
NEWSBLUR.app.taskbar_info.show_stories_error({
proxied_https: true
}, "Imperfect proxy due<br />to http over https");
// NEWSBLUR.app.taskbar_info.show_stories_error({
// proxied_https: true
// }, "Imperfect proxy due<br />to http over https");
} else if (!correct && this.flags.proxied_https) {
NEWSBLUR.reader.switch_taskbar_view('text', {skip_save_type: 'story'});
NEWSBLUR.app.taskbar_info.show_stories_error({}, "Sorry, the original story<br />could not be proxied.");
@ -123,4 +123,4 @@ NEWSBLUR.Views.StoryTabView = Backbone.View.extend({
}
}
});
});

View file

@ -133,10 +133,10 @@ class FetchFeed:
headers['If-Modified-Since'] = modified_header
if etag or modified:
headers['A-IM'] = 'feed'
raw_feed = requests.get(address, headers=headers)
raw_feed = requests.get(address, headers=headers, timeout=15)
if raw_feed.status_code >= 400:
logging.debug(" ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers))
raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True))
raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True), timeout=15)
if raw_feed.content and 'application/json' in raw_feed.headers.get('Content-Type', ""):
# JSON Feed

View file

@ -38,7 +38,7 @@ class FeedFinder(object):
def get_feed(self, url, skip_user_agent=False):
try:
r = requests.get(url, headers={"User-Agent": self.user_agent if not skip_user_agent else None})
r = requests.get(url, headers={"User-Agent": self.user_agent if not skip_user_agent else None}, timeout=15)
except Exception as e:
logging.warn("Error while getting '{0}'".format(url))
logging.warn("{0}".format(e))
@ -156,10 +156,10 @@ def sort_urls(feeds):
if __name__ == "__main__":
print((find_feeds("www.preposterousuniverse.com/blog/")))
print((find_feeds("http://xkcd.com")))
print((find_feeds("dan.iel.fm/atom.xml")))
print((find_feeds("dan.iel.fm", check_all=True)))
print((find_feeds("kapadia.github.io")))
print((find_feeds("blog.jonathansick.ca")))
print((find_feeds("asdasd")))
print(find_feeds("www.preposterousuniverse.com/blog/"))
print(find_feeds("http://xkcd.com"))
print(find_feeds("dan.iel.fm/atom.xml"))
print(find_feeds("dan.iel.fm", check_all=True))
print(find_feeds("kapadia.github.io"))
print(find_feeds("blog.jonathansick.ca"))
print(find_feeds("asdasd"))

View file

@ -126,7 +126,7 @@ def json_view(func):
def json_response(request, response=None):
code = 200
if isinstance(response, HttpResponseForbidden):
if isinstance(response, HttpResponseForbidden) or isinstance(response, HttpResponse):
return response
try:

View file

@ -5,7 +5,7 @@ sys.path.append('/srv/newsblur')
import subprocess
import requests
import settings
from newsblur import settings
import socket
def main():

View file

@ -5,7 +5,7 @@ sys.path.append('/srv/newsblur')
import subprocess
import requests
import settings
from newsblur import settings
import socket
def main():

View file

@ -6,7 +6,7 @@ sys.path.append('/srv/newsblur')
import os
import datetime
import requests
import settings
from newsblur import settings
import socket
def main():

View file

@ -5,7 +5,7 @@ sys.path.append('/srv/newsblur')
import subprocess
import requests
import settings
from newsblur import settings
import socket
import pymongo

View file

@ -4,6 +4,7 @@ import struct
import dateutil
import hashlib
import base64
import sys
from random import randint
from html.parser import HTMLParser
from lxml.html.diff import tokenize, fixup_ins_del_tags, htmldiff_tokens
@ -401,6 +402,8 @@ def create_imageproxy_signed_url(base_url, hmac_key, url, options=None):
if not options: options = []
if isinstance(options, int): options = [str(options)]
if not isinstance(options, list): options = [options]
if sys.getdefaultencoding() == 'ascii':
url = url.encode('utf-8')
base_url = base_url.rstrip('/')
signature = base64.urlsafe_b64encode(hmac.new(hmac_key.encode(), msg=url.encode(), digestmod=hashlib.sha256).digest())
options.append('sc')