mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-31 21:41:33 +00:00
Rewriting mongo backup, adding in rotation.
This commit is contained in:
parent
d7c8f5f79c
commit
5aaf4d5c38
4 changed files with 129 additions and 57 deletions
|
@ -8,7 +8,7 @@ beautifulsoup4==4.9.3
|
||||||
billiard==3.6.4.0
|
billiard==3.6.4.0
|
||||||
bleach==3.2.1
|
bleach==3.2.1
|
||||||
boto==2.49.0
|
boto==2.49.0
|
||||||
boto3==1.17.67
|
boto3==1.18.9
|
||||||
botocore==1.20.67
|
botocore==1.20.67
|
||||||
celery==4.4.7
|
celery==4.4.7
|
||||||
certifi==2020.12.5
|
certifi==2020.12.5
|
||||||
|
|
|
@ -21,11 +21,11 @@ for collection in ${collections[@]}; do
|
||||||
now=$(date '+%Y-%m-%d-%H-%M')
|
now=$(date '+%Y-%m-%d-%H-%M')
|
||||||
echo "---> Dumping $collection - ${now}"
|
echo "---> Dumping $collection - ${now}"
|
||||||
|
|
||||||
docker exec -it mongo mongodump -d newsblur -c $collection -o /backup/backup_mongo_${now}
|
docker exec -it mongo mongodump -d newsblur -c $collection -o /backup/backup_mongo
|
||||||
done;
|
done;
|
||||||
|
|
||||||
echo " ---> Compressing backup_mongo_${now}.tgz"
|
echo " ---> Compressing backup_mongo_${now}.tgz"
|
||||||
tar -zcf /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz /opt/mongo/newsblur/backup/backup_mongo_${now}
|
tar -zcf /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz /opt/mongo/newsblur/backup/backup_mongo
|
||||||
|
|
||||||
echo " ---> Uploading backups to S3"
|
echo " ---> Uploading backups to S3"
|
||||||
docker run --rm -v /srv/newsblur:/srv/newsblur -v /opt/mongo/newsblur/backup/:/opt/mongo/newsblur/backup/ --network=newsblurnet newsblur/newsblur_python3:latest python /srv/newsblur/utils/backups/backup_mongo.py
|
docker run --rm -v /srv/newsblur:/srv/newsblur -v /opt/mongo/newsblur/backup/:/opt/mongo/newsblur/backup/ --network=newsblurnet newsblur/newsblur_python3:latest python /srv/newsblur/utils/backups/backup_mongo.py
|
||||||
|
@ -33,4 +33,4 @@ docker run --rm -v /srv/newsblur:/srv/newsblur -v /opt/mongo/newsblur/backup/:/o
|
||||||
# Don't delete backup since the backup_mongo.py script will rm them
|
# Don't delete backup since the backup_mongo.py script will rm them
|
||||||
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz
|
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}.tgz
|
||||||
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}
|
## rm /opt/mongo/newsblur/backup/backup_mongo_${now}
|
||||||
echo " ---> Finished uploading backups to S3: backup_mongo_${now}.tgz"
|
echo " ---> Finished uploading backups to S3: backup_mongo.tgz"
|
||||||
|
|
|
@ -1,19 +1,130 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
from datetime import datetime, timedelta
|
||||||
import os
|
import os
|
||||||
import shutil
|
import re
|
||||||
|
import logging
|
||||||
from newsblur_web import settings
|
import mimetypes
|
||||||
import boto3
|
import boto3
|
||||||
from botocore.exceptions import ClientError
|
import shutil
|
||||||
|
from boto3.s3.transfer import S3Transfer
|
||||||
|
from newsblur_web import settings
|
||||||
|
|
||||||
filenames = [f for f in os.listdir('/opt/mongo/newsblur/backup/') if '.tgz' in f]
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
for filename in filenames:
|
|
||||||
print('Uploading %s to S3...' % filename)
|
def main():
|
||||||
try:
|
BACKUP_DIR = '/opt/mongo/newsblur/backup/'
|
||||||
s3 = boto3.client('s3')
|
BACKUP_DIR = '/srv/newsblur'
|
||||||
s3.upload_file(f"mongo/{filename}", settings.S3_BACKUP_BUCKET)
|
filenames = [f for f in os.listdir(BACKUP_DIR) if '.tgz' in f]
|
||||||
except ClientError as e:
|
for filename in filenames:
|
||||||
print(" ****> Exceptions: %s" % e)
|
file_path = os.path.join(BACKUP_DIR, filename)
|
||||||
shutil.rmtree(filename[:-4])
|
basename = os.path.basename(file_path)
|
||||||
os.remove(filename)
|
key_base, key_ext = list(splitext(basename))
|
||||||
|
key_prefix = "".join(['mongo/', key_base])
|
||||||
|
key_datestamp = datetime.utcnow().strftime("-%Y-%m-%d-%H-%M")
|
||||||
|
key = "".join([key_prefix, key_datestamp, key_ext])
|
||||||
|
|
||||||
|
print("Uploading {0} to {1}".format(file_path, key))
|
||||||
|
upload(file_path, settings.S3_BACKUP_BUCKET, key)
|
||||||
|
|
||||||
|
print('Rotating file on S3 with key prefix {0} and extension {1}'.format(key_prefix, key_ext))
|
||||||
|
rotate(key_prefix, key_ext, settings.S3_BACKUP_BUCKET)
|
||||||
|
|
||||||
|
# shutil.rmtree(filename[:-4])
|
||||||
|
# os.remove(filename)
|
||||||
|
|
||||||
|
|
||||||
|
def upload_rotate(file_path, s3_bucket, s3_key_prefix, aws_key=None, aws_secret=None):
|
||||||
|
'''
|
||||||
|
Upload file_path to s3 bucket with prefix
|
||||||
|
Ex. upload_rotate('/tmp/file-2015-01-01.tar.bz2', 'backups', 'foo.net/')
|
||||||
|
would upload file to bucket backups with key=foo.net/file-2015-01-01.tar.bz2
|
||||||
|
and then rotate all files starting with foo.net/file and with extension .tar.bz2
|
||||||
|
Timestamps need to be present between the file root and the extension and in the same format as strftime("%Y-%m-%d").
|
||||||
|
Ex file-2015-12-28.tar.bz2
|
||||||
|
'''
|
||||||
|
key = ''.join([s3_key_prefix, os.path.basename(file_path)])
|
||||||
|
logger.debug("Uploading {0} to {1}".format(file_path, key))
|
||||||
|
upload(file_path, s3_bucket, key, aws_access_key_id=aws_key, aws_secret_access_key=aws_secret)
|
||||||
|
|
||||||
|
file_root, file_ext = splitext(os.path.basename(file_path))
|
||||||
|
# strip timestamp from file_base
|
||||||
|
regex = '(?P<filename>.*)-(?P<year>[\d]+?)-(?P<month>[\d]+?)-(?P<day>[\d]+?)'
|
||||||
|
match = re.match(regex, file_root)
|
||||||
|
if not match:
|
||||||
|
raise Exception('File does not contain a timestamp')
|
||||||
|
key_prefix = ''.join([s3_key_prefix, match.group('filename')])
|
||||||
|
logger.debug('Rotating files on S3 with key prefix {0} and extension {1}'.format(key_prefix, file_ext))
|
||||||
|
rotate(key_prefix, file_ext, s3_bucket, aws_key=aws_key, aws_secret=aws_secret)
|
||||||
|
|
||||||
|
|
||||||
|
def rotate(key_prefix, key_ext, bucket_name, daily_backups=7, weekly_backups=4, aws_key=None, aws_secret=None):
|
||||||
|
""" Delete old files we've uploaded to S3 according to grandfather, father, sun strategy """
|
||||||
|
|
||||||
|
session = boto3.Session(
|
||||||
|
aws_access_key_id=aws_key,
|
||||||
|
aws_secret_access_key=aws_secret
|
||||||
|
)
|
||||||
|
s3 = session.resource('s3')
|
||||||
|
bucket = s3.Bucket(bucket_name)
|
||||||
|
keys = bucket.objects.filter(Prefix=key_prefix)
|
||||||
|
|
||||||
|
regex = '{0}-(?P<year>[\d]+?)-(?P<month>[\d]+?)-(?P<day>[\d]+?)-(?P<hour>[\d]+?)-(?P<minute>[\d]+?){1}'.format(key_prefix, key_ext)
|
||||||
|
backups = []
|
||||||
|
|
||||||
|
for key in keys:
|
||||||
|
match = re.match(regex, str(key.key))
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
year = int(match.group('year'))
|
||||||
|
month = int(match.group('month'))
|
||||||
|
day = int(match.group('day'))
|
||||||
|
hour = int(match.group('hour'))
|
||||||
|
minute = int(match.group('minute'))
|
||||||
|
key_date = datetime(year, month, day, hour, minute)
|
||||||
|
backups[:0] = [key_date]
|
||||||
|
backups = sorted(backups, reverse=True)
|
||||||
|
|
||||||
|
if len(backups) > daily_backups+1 and backups[daily_backups] - backups[daily_backups+1] < timedelta(days=7):
|
||||||
|
key = bucket.Object("{0}{1}{2}".format(key_prefix,backups[daily_backups].strftime("-%Y-%m-%d-%H-%M"), key_ext))
|
||||||
|
logger.debug("deleting {0}".format(key))
|
||||||
|
key.delete()
|
||||||
|
del backups[daily_backups]
|
||||||
|
|
||||||
|
month_offset = daily_backups + weekly_backups
|
||||||
|
if len(backups) > month_offset+1 and backups[month_offset] - backups[month_offset+1] < timedelta(days=30):
|
||||||
|
key = bucket.Object("{0}{1}{2}".format(key_prefix,backups[month_offset].strftime("-%Y-%m-%d-%H-%M"), key_ext))
|
||||||
|
logger.debug("deleting {0}".format(key))
|
||||||
|
key.delete()
|
||||||
|
del backups[month_offset]
|
||||||
|
|
||||||
|
|
||||||
|
def splitext( filename ):
|
||||||
|
""" Return the filename and extension according to the first dot in the filename.
|
||||||
|
This helps date stamping .tar.bz2 or .ext.gz files properly.
|
||||||
|
"""
|
||||||
|
index = filename.find('.')
|
||||||
|
if index == 0:
|
||||||
|
index = 1+filename[1:].find('.')
|
||||||
|
if index == -1:
|
||||||
|
return filename, ''
|
||||||
|
return filename[:index], filename[index:]
|
||||||
|
return os.path.splitext(filename)
|
||||||
|
|
||||||
|
def upload(source_path, bucketname, keyname, acl='private', guess_mimetype=True, aws_access_key_id=None, aws_secret_access_key=None):
|
||||||
|
|
||||||
|
client = boto3.client('s3', 'us-west-2', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
|
||||||
|
transfer = S3Transfer(client)
|
||||||
|
# Upload /tmp/myfile to s3://bucket/key
|
||||||
|
extra_args = {
|
||||||
|
'ACL': acl,
|
||||||
|
}
|
||||||
|
if guess_mimetype:
|
||||||
|
mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream'
|
||||||
|
extra_args['ContentType'] = mtype
|
||||||
|
|
||||||
|
transfer.upload_file(source_path, bucketname, keyname, extra_args=extra_args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
|
@ -1,39 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
MONGODB_SHELL='mongo'
|
|
||||||
DUMP_UTILITY='mongodump'
|
|
||||||
DB_NAME='newsblur'
|
|
||||||
COLLECTIONS="classifier_tag classifier_author classifier_feed classifier_title userstories starred_stories"
|
|
||||||
|
|
||||||
date_now=`date +%Y_%m_%d_%H_%M`
|
|
||||||
dir_name='backup_mongo_'${date_now}
|
|
||||||
file_name='backup_mongo_'${date_now}'.bz2'
|
|
||||||
|
|
||||||
log() {
|
|
||||||
echo $1
|
|
||||||
}
|
|
||||||
|
|
||||||
do_cleanup(){
|
|
||||||
rm -rf backup_mongo_*
|
|
||||||
log 'cleaning up....'
|
|
||||||
}
|
|
||||||
|
|
||||||
do_backup(){
|
|
||||||
log 'snapshotting the db and creating archive'
|
|
||||||
# ${MONGODB_SHELL} admin fsync_lock.js
|
|
||||||
for collection in $COLLECTIONS
|
|
||||||
do
|
|
||||||
${DUMP_UTILITY} --db ${DB_NAME} --collection $collection -o ${dir_name}
|
|
||||||
done
|
|
||||||
tar -jcf $file_name ${dir_name}
|
|
||||||
# ${MONGODB_SHELL} admin fsync_unlock.js
|
|
||||||
log 'data backd up and created snapshot'
|
|
||||||
}
|
|
||||||
|
|
||||||
save_in_s3(){
|
|
||||||
log 'saving the backup archive in amazon S3' && \
|
|
||||||
python s3.py set ${file_name} && \
|
|
||||||
log 'data backup saved in amazon s3'
|
|
||||||
}
|
|
||||||
|
|
||||||
do_backup && save_in_s3 && do_cleanup
|
|
Loading…
Add table
Reference in a new issue