Merge branch 'master' into pro

* master: (31 commits)
  Adding postgres backup log.
  Handling operation failures in flask metrics.
  Locking consul to a working version. Also changing flask status code response to handle custom status codes.
  Explicitly uploading mongo backups
  Only need date once in filename
  Rotating mongo backups
  Using full path for progress.
  Showing full path of postgres backup.
  Separating out restoring backups and storing backups.
  Moving to postgres backup directory.
  Executable postgres backup script.
  Compressing Postgres db sql backup.
  Adding date to tarball to allow for versioning.
  Correcting permissions on mongo backup
  Fixing mongo upload to s3 by using host networking mode.
  Removing unused backup code.
  Forgot import
  Adding progress bar to mongo backup.
  Refactoring aws keys for mongo backups.
  Moving mongo backups
  ...
This commit is contained in:
Samuel Clay 2022-02-04 13:55:38 -05:00
commit 88d07b143c
112 changed files with 470 additions and 326 deletions

3
.gitignore vendored
View file

@ -43,7 +43,8 @@ templates/maintenance_on.html
vendor/mms-agent/settings.py
apps/social/spam.py
venv*
/backups
backup
backups
config/mongodb_keyfile.key
# Docker Jinja templates

View file

@ -16,12 +16,12 @@
"media/ios": true,
"**/*.map": true,
"ansible/playbooks/*/*": true,
"archive/*": true,
// "archive/*": true,
"logs/*": true,
"static/*": true,
// "static/*": true,
"media/fonts": true,
"static/*.css": true,
"static/*.js": true,
"static/js/*.*.js": true,
"blog/.jekyll-cache": true,
"blog/_site": true,
"docker/volumes": true,

View file

@ -70,7 +70,7 @@
run_once: yes
connection: local
amazon.aws.aws_s3:
bucket: newsblur_backups
bucket: newsblur-backups
object: /static_py3.tgz
src: /srv/newsblur/static.tgz
mode: put
@ -94,7 +94,7 @@
vars:
ansible_python_interpreter: /usr/bin/python3
amazon.aws.aws_s3:
bucket: newsblur_backups
bucket: newsblur-backups
object: /static_py3.tgz
dest: /srv/newsblur/static.tgz
mode: get

View file

@ -17,14 +17,14 @@
- name: Set backup vars
set_fact:
redis_story_filename: backup_redis_story_2021-04-13-04-00.rdb.gz
postgres_filename: backup_postgresql_2022-01-06-19-46.sql.gz
postgres_filename: backup_postgresql_2022-02-03-04-00.sql.gz
mongo_filename: backup_mongo_2021-03-15-04-00.tgz
redis_filename: backup_redis_2021-03-15-04-00.rdb.gz
tags: never, restore_postgres, restore_mongo, restore_redis, restore_redis_story
- name: Download archives
amazon.aws.aws_s3:
bucket: newsblur_backups
bucket: "newsblur-backups"
object: "{{ item.dir }}{{ item.file }}"
dest: "/srv/newsblur/backups/{{ item.file }}"
mode: get

View file

@ -1,4 +1,4 @@
#!/srv/newsblur/venv/newsblur3/bin/python
#!/usr/bin/env python
import os
import digitalocean

View file

@ -13,8 +13,9 @@
- name: Installing Consul
become: yes
apt:
pkg: consul
state: latest
allow_downgrades: yes
pkg: consul=1.10.4
state: present
- name: Register Manager IP
run_once: yes

View file

@ -58,9 +58,16 @@
- name: Make backup directory
become: yes
file:
path: /opt/mongo/newsblur/backup/
path: "/mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup/"
state: directory
mode: 0666
mode: 0777
- name: Create symlink to mounted volume for backups to live
file:
state: link
src: "/mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup"
path: /srv/newsblur/backup
force: yes
- name: Start db-mongo docker container
become: yes
@ -86,7 +93,7 @@
- /srv/newsblur/ansible/roles/mongo/templates/mongo.conf:/etc/mongod.conf
- /srv/newsblur/config/mongodb_keyfile.key:/srv/newsblur/config/mongodb_keyfile.key
- /var/log/mongodb/:/var/log/mongodb/
- /opt/mongo/newsblur/backup/:/backup/
- /mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup/:/backup/
when: (inventory_hostname | regex_replace('[0-9]+', '')) in ['db-mongo', 'db-mongo-primary', 'db-mongo-secondary']
- name: Start db-mongo-analytics docker container
@ -114,7 +121,7 @@
- /srv/newsblur/ansible/roles/mongo/templates/mongo.analytics.conf:/etc/mongod.conf
- /srv/newsblur/config/mongodb_keyfile.key:/srv/newsblur/config/mongodb_keyfile.key
- /var/log/mongodb/:/var/log/mongodb/
- /opt/mongo/newsblur/backup/:/backup/
- /mnt/{{ inventory_hostname | regex_replace('db-|-', '') }}/backup/:/backup/
when: (inventory_hostname | regex_replace('[0-9]+', '')) == 'db-mongo-analytics'
- name: Create mongo database user
@ -185,12 +192,18 @@
docker run --rm -it
OUTPUT=$(eval sudo df / | head -n 2 | tail -1);
-v /srv/newsblur:/srv/newsblur
--network=newsblurnet
--network=host
--hostname {{ ansible_hostname }}
newsblur/newsblur_python3 /srv/newsblur/utils/monitor_disk_usage.py $OUTPUT
tags:
- sanity-checker
- name: Copy common secrets
copy:
src: /srv/secrets-newsblur/settings/common_settings.py
dest: /srv/newsblur/newsblur_web/local_settings.py
register: app_changed
- name: Add mongo backup
cron:
name: mongo backup
@ -201,15 +214,15 @@
tags:
- mongo-backup
- name: Add mongo starred_stories+stories backup
cron:
name: mongo starred/shared/all stories backup
minute: "0"
hour: "5"
job: /srv/newsblur/docker/mongo/backup_mongo_stories.sh
when: '"db-mongo-secondary1" in inventory_hostname'
tags:
- mongo-backup
# - name: Add mongo starred_stories+stories backup
# cron:
# name: mongo starred/shared/all stories backup
# minute: "0"
# hour: "5"
# job: /srv/newsblur/docker/mongo/backup_mongo.sh stories
# when: '"db-mongo-secondary1" in inventory_hostname'
# tags:
# - mongo-backup
# Renaming a db-mongo3 to db-mongo2:
# - Change hostname to db-mongo2 on Digital Ocean (doctl)

View file

@ -1,6 +1,6 @@
{
"service": {
"name": "db-mongo",
"name": "db-mongo-staging",
"id": "{{ inventory_hostname }}",
"tags": [
"db"

View file

@ -14,6 +14,13 @@
state: directory
mode: 0777
- name: Ensure postgres backup directory
become: yes
file:
path: /srv/newsblur/backups
state: directory
mode: 0777
- name: Start postgres docker containers
become: yes
docker_container:
@ -63,6 +70,12 @@
notify:
- reload consul
- name: Copy common secrets
copy:
src: /srv/secrets-newsblur/settings/common_settings.py
dest: /srv/newsblur/newsblur_web/local_settings.py
register: app_changed
- name: Add sanity checkers cronjob for disk usage
become: yes
cron:
@ -78,19 +91,19 @@
--hostname {{ ansible_hostname }}
newsblur/newsblur_python3 /srv/newsblur/utils/monitor_disk_usage.py $OUTPUT
- name: Add postgres backup log
become: yes
file:
path: /var/log/postgres_backup.log
state: touch
mode: 0777
owner: 1000
group: 1001
- name: Add postgres backup
cron:
name: postgres backup
minute: "0"
hour: "4"
job: >-
NOW=$(eval date +%F-%H-%M);
BACKUP_FILE=backup_postgresql_${NOW}.sql;
sudo docker exec -it postgres
/usr/lib/postgresql/13/bin/pg_dump -U newsblur -h 127.0.0.1 -Fc newsblur > backup/$BACKUP_FILE;
sudo docker run --rm -it
-v /srv/newsblur:/srv/newsblur
-v /backup/:/backup/
--network=newsblurnet
newsblur/newsblur_python3
python /srv/newsblur/utils/backups/backup_psql.py
job: /srv/newsblur/docker/postgres/backup_postgres.sh 1> /var/log/postgres_backup.log 2>&1

View file

@ -217,6 +217,10 @@ def check_share_on_site(request, token):
logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % rss_url)
feed = Feed.get_feed_from_url(rss_url, create=False, fetch=False)
if not feed:
rss_url = urllib.parse.urljoin(story_url, rss_url)
logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % rss_url)
feed = Feed.get_feed_from_url(rss_url, create=False, fetch=False)
if not feed:
logging.user(request.user, "~FBFinding feed (check_share_on_site): %s" % story_url)
feed = Feed.get_feed_from_url(story_url, create=False, fetch=False)

View file

@ -14,6 +14,7 @@ urlpatterns = [
url(r'^page/(?P<feed_id>\d+)', views.load_feed_page, name='load-feed-page'),
url(r'^refresh_feed/(?P<feed_id>\d+)', views.refresh_feed, name='refresh-feed'),
url(r'^favicons', views.load_feed_favicons, name='load-feed-favicons'),
url(r'^river_stories_widget', views.load_river_stories_widget, name='load-river-stories-widget'),
url(r'^river_stories', views.load_river_stories__redis, name='load-river-stories'),
url(r'^complete_river', views.complete_river, name='complete-river'),
url(r'^refresh_feeds', views.refresh_feeds, name='refresh-feeds'),

View file

@ -4,7 +4,13 @@ import redis
import requests
import random
import zlib
import concurrent
import re
import ssl
import socket
import base64
import urllib.parse
import urllib.request
from django.shortcuts import get_object_or_404
from django.shortcuts import render
from django.contrib.auth.decorators import login_required
@ -1453,7 +1459,7 @@ def load_river_stories__redis(request):
story_hashes = []
unread_feed_story_hashes = []
mstories = MStory.objects(story_hash__in=story_hashes).order_by(story_date_order)
mstories = MStory.objects(story_hash__in=story_hashes[:limit]).order_by(story_date_order)
stories = Feed.format_stories(mstories)
found_feed_ids = list(set([story['story_feed_id'] for story in stories]))
@ -1595,6 +1601,57 @@ def load_river_stories__redis(request):
return data
@json.json_view
def load_river_stories_widget(request):
logging.user(request, "Widget load")
river_stories_data = json.decode(load_river_stories__redis(request).content)
timeout = 3
start = time.time()
def load_url(url):
original_url = url
url = urllib.parse.urljoin(settings.NEWSBLUR_URL, url)
scontext = ssl.SSLContext(ssl.PROTOCOL_TLS)
scontext.verify_mode = ssl.VerifyMode.CERT_NONE
try:
conn = urllib.request.urlopen(url, context=scontext, timeout=timeout)
except urllib.request.URLError:
url = url.replace('localhost', 'haproxy')
conn = urllib.request.urlopen(url, context=scontext, timeout=timeout)
except urllib.request.URLError as e:
logging.user(request.user, '"%s" not fetched in %ss: %s' % (url, (time.time() - start), e))
return None
except socket.timeout:
logging.user(request.user, '"%s" not fetched in %ss' % (url, (time.time() - start)))
return None
data = conn.read()
logging.user(request.user, '"%s" fetched in %ss' % (url, (time.time() - start)))
return dict(url=original_url, data=data)
# Find the image thumbnails and download in parallel
thumbnail_urls = []
for story in river_stories_data['stories']:
thumbnail_values = list(story['secure_image_thumbnails'].values())
if thumbnail_values:
thumbnail_urls.append(thumbnail_values[0])
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
pages = executor.map(load_url, thumbnail_urls)
# Reassemble thumbnails back into stories
thumbnail_data = dict()
for page in pages:
if not page: continue
thumbnail_data[page['url']] = base64.b64encode(page['data']).decode('utf-8')
for story in river_stories_data['stories']:
thumbnail_values = list(story['secure_image_thumbnails'].values())
if thumbnail_values and thumbnail_values[0] in thumbnail_data:
story['select_thumbnail_data'] = thumbnail_data[thumbnail_values[0]]
logging.user(request, ("Elapsed Time: %ss" % (time.time() - start)))
return river_stories_data
@json.json_view
def complete_river(request):
user = get_user(request)

View file

@ -1,34 +1,66 @@
#!/usr/bin/env bash
collections=(
classifier_tag
classifier_author
classifier_feed
classifier_title
# shared_stories
activities
category
category_site
sent_emails
social_profile
social_subscription
social_services
statistics
user_search
classifier_author
classifier_feed
classifier_tag
classifier_title
custom_styling
dashboard_river
# feed_icons
# feed_pages
feedback
# fetch_exception_history
# fetch_history
follow_request
gift_codes
inline
interactions
m_dashboard_river
notification_tokens
notifications
popularity_query
redeemed_codes
saved_searches
sent_emails
# shared_stories
social_invites
social_profile
social_services
social_subscription
# starred_stories
starred_stories_counts
statistics
# stories
system.profile
system.users
# uploaded_opml
user_search
)
for collection in ${collections[@]}; do
if [ "$1" = "stories" ]; then
collections+=(
shared_stories
starred_stories
)
fi
now=$(date '+%Y-%m-%d-%H-%M')
for collection in ${collections[@]}; do
echo "---> Dumping $collection - ${now}"
docker exec -it mongo mongodump -d newsblur -c $collection -o /backup/backup_mongo
docker exec -it mongo mongodump -d newsblur -c $collection -o /backup
done;
echo " ---> Compressing backup_mongo.tgz"
tar -zcf /opt/mongo/newsblur/backup/backup_mongo.tgz /opt/mongo/newsblur/backup/backup_mongo
echo " ---> Compressing /srv/newsblur/backup/newsblur into /srv/newsblur/backup/backup_mongo_${now}.tgz"
tar -zcf /srv/newsblur/backup/backup_mongo_${now}.tgz -C / srv/newsblur/backup/newsblur
echo " ---> Uploading backups to S3"
docker run --rm -v /srv/newsblur:/srv/newsblur -v /opt/mongo/newsblur/backup/:/opt/mongo/newsblur/backup/ --network=newsblurnet newsblur/newsblur_python3:latest python /srv/newsblur/utils/backups/backup_mongo.py
docker run --user 1000:1001 --rm -v /srv/newsblur:/srv/newsblur -v /srv/newsblur/backup/:/srv/newsblur/backup/ --network=host newsblur/newsblur_python3:latest python /srv/newsblur/utils/backups/backup_mongo.py
# Don't delete backup since the backup_mongo.py script will rm them
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz

View file

@ -0,0 +1,19 @@
#!/usr/bin/env bash
now=$(date '+%Y-%m-%d-%H-%M')
echo "---> PG dumping - ${now}"
BACKUP_FILE="/srv/newsblur/backup/backup_postgresql_${now}.sql"
sudo docker exec -it postgres /usr/lib/postgresql/13/bin/pg_dump -U newsblur -h 127.0.0.1 -Fc newsblur > $BACKUP_FILE
echo " ---> Uploading postgres backup to S3"
sudo docker run --user 1000:1001 --rm \
-v /srv/newsblur:/srv/newsblur \
--network=host \
newsblur/newsblur_python3 \
python /srv/newsblur/utils/backups/backup_psql.py
# Don't delete backup since the backup_mongo.py script will rm them
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}
echo " ---> Finished uploading backups to S3: "

Some files were not shown because too many files have changed in this diff Show more