Merge branch 'master' into pro

* master: (31 commits)
  Adding postgres backup log.
  Handling operation failures in flask metrics.
  Locking consul to a working version. Also changing flask status code response to handle custom status codes.
  Explicitly uploading mongo backups
  Only need date once in filename
  Rotating mongo backups
  Using full path for progress.
  Showing full path of postgres backup.
  Separating out restoring backups and storing backups.
  Moving to postgres backup directory.
  Executable postgres backup script.
  Compressing Postgres db sql backup.
  Adding date to tarball to allow for versioning.
  Correcting permissions on mongo backup
  Fixing mongo upload to s3 by using host networking mode.
  Removing unused backup code.
  Forgot import
  Adding progress bar to mongo backup.
  Refactoring aws keys for mongo backups.
  Moving mongo backups
  ...
This commit is contained in:
Samuel Clay 2022-02-04 13:55:38 -05:00
commit 88d07b143c
112 changed files with 470 additions and 326 deletions

3
.gitignore vendored
View file

@ -43,7 +43,8 @@ templates/maintenance_on.html
vendor/mms-agent/settings.py vendor/mms-agent/settings.py
apps/social/spam.py apps/social/spam.py
venv* venv*
/backups backup
backups
config/mongodb_keyfile.key config/mongodb_keyfile.key
# Docker Jinja templates # Docker Jinja templates

View file

@ -16,12 +16,12 @@
"media/ios": true, "media/ios": true,
"**/*.map": true, "**/*.map": true,
"ansible/playbooks/*/*": true, "ansible/playbooks/*/*": true,
"archive/*": true, // "archive/*": true,
"logs/*": true, "logs/*": true,
"static/*": true, // "static/*": true,
"media/fonts": true, "media/fonts": true,
"static/*.css": true, "static/*.css": true,
"static/*.js": true, "static/js/*.*.js": true,
"blog/.jekyll-cache": true, "blog/.jekyll-cache": true,
"blog/_site": true, "blog/_site": true,
"docker/volumes": true, "docker/volumes": true,

View file

@ -70,7 +70,7 @@
run_once: yes run_once: yes
connection: local connection: local
amazon.aws.aws_s3: amazon.aws.aws_s3:
bucket: newsblur_backups bucket: newsblur-backups
object: /static_py3.tgz object: /static_py3.tgz
src: /srv/newsblur/static.tgz src: /srv/newsblur/static.tgz
mode: put mode: put
@ -94,7 +94,7 @@
vars: vars:
ansible_python_interpreter: /usr/bin/python3 ansible_python_interpreter: /usr/bin/python3
amazon.aws.aws_s3: amazon.aws.aws_s3:
bucket: newsblur_backups bucket: newsblur-backups
object: /static_py3.tgz object: /static_py3.tgz
dest: /srv/newsblur/static.tgz dest: /srv/newsblur/static.tgz
mode: get mode: get

View file

@ -17,14 +17,14 @@
- name: Set backup vars - name: Set backup vars
set_fact: set_fact:
redis_story_filename: backup_redis_story_2021-04-13-04-00.rdb.gz redis_story_filename: backup_redis_story_2021-04-13-04-00.rdb.gz
postgres_filename: backup_postgresql_2022-01-06-19-46.sql.gz postgres_filename: backup_postgresql_2022-02-03-04-00.sql.gz
mongo_filename: backup_mongo_2021-03-15-04-00.tgz mongo_filename: backup_mongo_2021-03-15-04-00.tgz
redis_filename: backup_redis_2021-03-15-04-00.rdb.gz redis_filename: backup_redis_2021-03-15-04-00.rdb.gz
tags: never, restore_postgres, restore_mongo, restore_redis, restore_redis_story tags: never, restore_postgres, restore_mongo, restore_redis, restore_redis_story
- name: Download archives - name: Download archives
amazon.aws.aws_s3: amazon.aws.aws_s3:
bucket: newsblur_backups bucket: "newsblur-backups"
object: "{{ item.dir }}{{ item.file }}" object: "{{ item.dir }}{{ item.file }}"
dest: "/srv/newsblur/backups/{{ item.file }}" dest: "/srv/newsblur/backups/{{ item.file }}"
mode: get mode: get

View file

@ -1,4 +1,4 @@
#!/srv/newsblur/venv/newsblur3/bin/python #!/usr/bin/env python
import os import os
import digitalocean import digitalocean

View file

@ -13,8 +13,9 @@
- name: Installing Consul - name: Installing Consul
become: yes become: yes
apt: apt:
pkg: consul allow_downgrades: yes
state: latest pkg: consul=1.10.4
state: present
- name: Register Manager IP - name: Register Manager IP
run_once: yes run_once: yes

View file

@ -58,9 +58,16 @@
- name: Make backup directory - name: Make backup directory
become: yes become: yes
file: file:
path: /opt/mongo/newsblur/backup/ path: "/mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup/"
state: directory state: directory
mode: 0666 mode: 0777
- name: Create symlink to mounted volume for backups to live
file:
state: link
src: "/mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup"
path: /srv/newsblur/backup
force: yes
- name: Start db-mongo docker container - name: Start db-mongo docker container
become: yes become: yes
@ -86,7 +93,7 @@
- /srv/newsblur/ansible/roles/mongo/templates/mongo.conf:/etc/mongod.conf - /srv/newsblur/ansible/roles/mongo/templates/mongo.conf:/etc/mongod.conf
- /srv/newsblur/config/mongodb_keyfile.key:/srv/newsblur/config/mongodb_keyfile.key - /srv/newsblur/config/mongodb_keyfile.key:/srv/newsblur/config/mongodb_keyfile.key
- /var/log/mongodb/:/var/log/mongodb/ - /var/log/mongodb/:/var/log/mongodb/
- /opt/mongo/newsblur/backup/:/backup/ - /mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup/:/backup/
when: (inventory_hostname | regex_replace('[0-9]+', '')) in ['db-mongo', 'db-mongo-primary', 'db-mongo-secondary'] when: (inventory_hostname | regex_replace('[0-9]+', '')) in ['db-mongo', 'db-mongo-primary', 'db-mongo-secondary']
- name: Start db-mongo-analytics docker container - name: Start db-mongo-analytics docker container
@ -114,7 +121,7 @@
- /srv/newsblur/ansible/roles/mongo/templates/mongo.analytics.conf:/etc/mongod.conf - /srv/newsblur/ansible/roles/mongo/templates/mongo.analytics.conf:/etc/mongod.conf
- /srv/newsblur/config/mongodb_keyfile.key:/srv/newsblur/config/mongodb_keyfile.key - /srv/newsblur/config/mongodb_keyfile.key:/srv/newsblur/config/mongodb_keyfile.key
- /var/log/mongodb/:/var/log/mongodb/ - /var/log/mongodb/:/var/log/mongodb/
- /opt/mongo/newsblur/backup/:/backup/ - /mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup/:/backup/
when: (inventory_hostname | regex_replace('[0-9]+', '')) == 'db-mongo-analytics' when: (inventory_hostname | regex_replace('[0-9]+', '')) == 'db-mongo-analytics'
- name: Create mongo database user - name: Create mongo database user
@ -185,12 +192,18 @@
docker run --rm -it docker run --rm -it
OUTPUT=$(eval sudo df / | head -n 2 | tail -1); OUTPUT=$(eval sudo df / | head -n 2 | tail -1);
-v /srv/newsblur:/srv/newsblur -v /srv/newsblur:/srv/newsblur
--network=newsblurnet --network=host
--hostname {{ ansible_hostname }} --hostname {{ ansible_hostname }}
newsblur/newsblur_python3 /srv/newsblur/utils/monitor_disk_usage.py $OUTPUT newsblur/newsblur_python3 /srv/newsblur/utils/monitor_disk_usage.py $OUTPUT
tags: tags:
- sanity-checker - sanity-checker
- name: Copy common secrets
copy:
src: /srv/secrets-newsblur/settings/common_settings.py
dest: /srv/newsblur/newsblur_web/local_settings.py
register: app_changed
- name: Add mongo backup - name: Add mongo backup
cron: cron:
name: mongo backup name: mongo backup
@ -201,15 +214,15 @@
tags: tags:
- mongo-backup - mongo-backup
- name: Add mongo starred_stories+stories backup # - name: Add mongo starred_stories+stories backup
cron: # cron:
name: mongo starred/shared/all stories backup # name: mongo starred/shared/all stories backup
minute: "0" # minute: "0"
hour: "5" # hour: "5"
job: /srv/newsblur/docker/mongo/backup_mongo_stories.sh # job: /srv/newsblur/docker/mongo/backup_mongo.sh stories
when: '"db-mongo-secondary1" in inventory_hostname' # when: '"db-mongo-secondary1" in inventory_hostname'
tags: # tags:
- mongo-backup # - mongo-backup
# Renaming a db-mongo3 to db-mongo2: # Renaming a db-mongo3 to db-mongo2:
# - Change hostname to db-mongo2 on Digital Ocean (doctl) # - Change hostname to db-mongo2 on Digital Ocean (doctl)

View file

@ -1,6 +1,6 @@
{ {
"service": { "service": {
"name": "db-mongo", "name": "db-mongo-staging",
"id": "{{ inventory_hostname }}", "id": "{{ inventory_hostname }}",
"tags": [ "tags": [
"db" "db"

View file

@ -14,6 +14,13 @@
state: directory state: directory
mode: 0777 mode: 0777
- name: Ensure postgres backup directory
become: yes
file:
path: /srv/newsblur/backups
state: directory
mode: 0777
- name: Start postgres docker containers - name: Start postgres docker containers
become: yes become: yes
docker_container: docker_container:
@ -63,6 +70,12 @@
notify: notify:
- reload consul - reload consul
- name: Copy common secrets
copy:
src: /srv/secrets-newsblur/settings/common_settings.py
dest: /srv/newsblur/newsblur_web/local_settings.py
register: app_changed
- name: Add sanity checkers cronjob for disk usage - name: Add sanity checkers cronjob for disk usage
become: yes become: yes
cron: cron:
@ -78,19 +91,19 @@
--hostname {{ ansible_hostname }} --hostname {{ ansible_hostname }}
newsblur/newsblur_python3 /srv/newsblur/utils/monitor_disk_usage.py $OUTPUT newsblur/newsblur_python3 /srv/newsblur/utils/monitor_disk_usage.py $OUTPUT
- name: Add postgres backup log
become: yes
file:
path: /var/log/postgres_backup.log
state: touch
mode: 0777
owner: 1000
group: 1001
- name: Add postgres backup - name: Add postgres backup
cron: cron:
name: postgres backup name: postgres backup
minute: "0" minute: "0"
hour: "4" hour: "4"
job: >- job: /srv/newsblur/docker/postgres/backup_postgres.sh 1> /var/log/postgres_backup.log 2>&1
NOW=$(eval date +%F-%H-%M);
BACKUP_FILE=backup_postgresql_${NOW}.sql;
sudo docker exec -it postgres
/usr/lib/postgresql/13/bin/pg_dump -U newsblur -h 127.0.0.1 -Fc newsblur > backup/$BACKUP_FILE;
sudo docker run --rm -it
-v /srv/newsblur:/srv/newsblur
-v /backup/:/backup/
--network=newsblurnet
newsblur/newsblur_python3
python /srv/newsblur/utils/backups/backup_psql.py

View file

@ -217,6 +217,10 @@ def check_share_on_site(request, token):
logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % rss_url) logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % rss_url)
feed = Feed.get_feed_from_url(rss_url, create=False, fetch=False) feed = Feed.get_feed_from_url(rss_url, create=False, fetch=False)
if not feed:
rss_url = urllib.parse.urljoin(story_url, rss_url)
logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % rss_url)
feed = Feed.get_feed_from_url(rss_url, create=False, fetch=False)
if not feed: if not feed:
logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % story_url) logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % story_url)
feed = Feed.get_feed_from_url(story_url, create=False, fetch=False) feed = Feed.get_feed_from_url(story_url, create=False, fetch=False)

View file

@ -14,6 +14,7 @@ urlpatterns = [
url(r'^page/(?P<feed_id>\d+)', views.load_feed_page, name='load-feed-page'), url(r'^page/(?P<feed_id>\d+)', views.load_feed_page, name='load-feed-page'),
url(r'^refresh_feed/(?P<feed_id>\d+)', views.refresh_feed, name='refresh-feed'), url(r'^refresh_feed/(?P<feed_id>\d+)', views.refresh_feed, name='refresh-feed'),
url(r'^favicons', views.load_feed_favicons, name='load-feed-favicons'), url(r'^favicons', views.load_feed_favicons, name='load-feed-favicons'),
url(r'^river_stories_widget', views.load_river_stories_widget, name='load-river-stories-widget'),
url(r'^river_stories', views.load_river_stories__redis, name='load-river-stories'), url(r'^river_stories', views.load_river_stories__redis, name='load-river-stories'),
url(r'^complete_river', views.complete_river, name='complete-river'), url(r'^complete_river', views.complete_river, name='complete-river'),
url(r'^refresh_feeds', views.refresh_feeds, name='refresh-feeds'), url(r'^refresh_feeds', views.refresh_feeds, name='refresh-feeds'),

View file

@ -4,7 +4,13 @@ import redis
import requests import requests
import random import random
import zlib import zlib
import concurrent
import re import re
import ssl
import socket
import base64
import urllib.parse
import urllib.request
from django.shortcuts import get_object_or_404 from django.shortcuts import get_object_or_404
from django.shortcuts import render from django.shortcuts import render
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
@ -1453,7 +1459,7 @@ def load_river_stories__redis(request):
story_hashes = [] story_hashes = []
unread_feed_story_hashes = [] unread_feed_story_hashes = []
mstories = MStory.objects(story_hash__in=story_hashes).order_by(story_date_order) mstories = MStory.objects(story_hash__in=story_hashes[:limit]).order_by(story_date_order)
stories = Feed.format_stories(mstories) stories = Feed.format_stories(mstories)
found_feed_ids = list(set([story['story_feed_id'] for story in stories])) found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
@ -1595,6 +1601,57 @@ def load_river_stories__redis(request):
return data return data
@json.json_view
def load_river_stories_widget(request):
logging.user(request, "Widget load")
river_stories_data = json.decode(load_river_stories__redis(request).content)
timeout = 3
start = time.time()
def load_url(url):
original_url = url
url = urllib.parse.urljoin(settings.NEWSBLUR_URL, url)
scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
scontext.verify_mode = ssl.VerifyMode.CERT_NONE
try:
conn = urllib.request.urlopen(url, context=scontext, timeout=timeout)
except urllib.request.URLError:
url = url.replace('localhost', 'haproxy')
conn = urllib.request.urlopen(url, context=scontext, timeout=timeout)
except urllib.request.URLError as e:
logging.user(request.user, '"%s" not fetched in %ss: %s' % (url, (time.time() - start), e))
return None
except socket.timeout:
logging.user(request.user, '"%s" not fetched in %ss' % (url, (time.time() - start)))
return None
data = conn.read()
logging.user(request.user, '"%s" fetched in %ss' % (url, (time.time() - start)))
return dict(url=original_url, data=data)
# Find the image thumbnails and download in parallel
thumbnail_urls = []
for story in river_stories_data['stories']:
thumbnail_values = list(story['secure_image_thumbnails'].values())
if thumbnail_values:
thumbnail_urls.append(thumbnail_values[0])
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
pages = executor.map(load_url, thumbnail_urls)
# Reassemble thumbnails back into stories
thumbnail_data = dict()
for page in pages:
if not page: continue
thumbnail_data[page['url']] = base64.b64encode(page['data']).decode('utf-8')
for story in river_stories_data['stories']:
thumbnail_values = list(story['secure_image_thumbnails'].values())
if thumbnail_values and thumbnail_values[0] in thumbnail_data:
story['select_thumbnail_data'] = thumbnail_data[thumbnail_values[0]]
logging.user(request, ("Elapsed Time: %ss" % (time.time() - start)))
return river_stories_data
@json.json_view @json.json_view
def complete_river(request): def complete_river(request):
user = get_user(request) user = get_user(request)

View file

@ -1,34 +1,66 @@
#!/usr/bin/env bash #!/usr/bin/env bash
collections=( collections=(
classifier_tag activities
classifier_author
classifier_feed
classifier_title
# shared_stories
category category
category_site category_site
sent_emails classifier_author
social_profile classifier_feed
social_subscription classifier_tag
social_services classifier_title
statistics custom_styling
user_search dashboard_river
# feed_icons
# feed_pages
feedback feedback
# fetch_exception_history
# fetch_history
follow_request
gift_codes
inline
interactions
m_dashboard_river
notification_tokens
notifications
popularity_query
redeemed_codes
saved_searches
sent_emails
# shared_stories
social_invites
social_profile
social_services
social_subscription
# starred_stories
starred_stories_counts
statistics
# stories
system.profile
system.users
# uploaded_opml
user_search
) )
for collection in ${collections[@]}; do if [ "$1" = "stories" ]; then
collections+=(
shared_stories
starred_stories
)
fi
now=$(date '+%Y-%m-%d-%H-%M') now=$(date '+%Y-%m-%d-%H-%M')
for collection in ${collections[@]}; do
echo "---> Dumping $collection - ${now}" echo "---> Dumping $collection - ${now}"
docker exec -it mongo mongodump -d newsblur -c $collection -o /backup/backup_mongo docker exec -it mongo mongodump -d newsblur -c $collection -o /backup
done; done;
echo " ---> Compressing backup_mongo.tgz" echo " ---> Compressing /srv/newsblur/backup/newsblur into /srv/newsblur/backup/backup_mongo_${now}.tgz"
tar -zcf /opt/mongo/newsblur/backup/backup_mongo.tgz /opt/mongo/newsblur/backup/backup_mongo tar -zcf /srv/newsblur/backup/backup_mongo_${now}.tgz -C / srv/newsblur/backup/newsblur
echo " ---> Uploading backups to S3" echo " ---> Uploading backups to S3"
docker run --rm -v /srv/newsblur:/srv/newsblur -v /opt/mongo/newsblur/backup/:/opt/mongo/newsblur/backup/ --network=newsblurnet newsblur/newsblur_python3:latest python /srv/newsblur/utils/backups/backup_mongo.py docker run --user 1000:1001 --rm -v /srv/newsblur:/srv/newsblur -v /srv/newsblur/backup/:/srv/newsblur/backup/ --network=host newsblur/newsblur_python3:latest python /srv/newsblur/utils/backups/backup_mongo.py
# Don't delete backup since the backup_mongo.py script will rm them # Don't delete backup since the backup_mongo.py script will rm them
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz ## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz

View file

@ -0,0 +1,19 @@
#!/usr/bin/env bash
now=$(date '+%Y-%m-%d-%H-%M')
echo "---> PG dumping - ${now}"
BACKUP_FILE="/srv/newsblur/backup/backup_postgresql_${now}.sql"
sudo docker exec -it postgres /usr/lib/postgresql/13/bin/pg_dump -U newsblur -h 127.0.0.1 -Fc newsblur > $BACKUP_FILE
echo " ---> Uploading postgres backup to S3"
sudo docker run --user 1000:1001 --rm \
-v /srv/newsblur:/srv/newsblur \
--network=host \
newsblur/newsblur_python3 \
python /srv/newsblur/utils/backups/backup_psql.py
# Don't delete backup since the backup_mongo.py script will rm them
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}
echo " ---> Finished uploading backups to S3: "

Some files were not shown because too many files have changed in this diff Show more