mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
135 lines
5.2 KiB
Python
Executable file
135 lines
5.2 KiB
Python
Executable file
#!/usr/bin/python3
|
|
from datetime import datetime, timedelta
|
|
import os
|
|
import sys
|
|
import re
|
|
import logging
|
|
import mimetypes
|
|
import boto3
|
|
import threading
|
|
import shutil
|
|
from boto3.s3.transfer import S3Transfer
|
|
from newsblur_web import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def main():
|
|
BACKUP_DIR = '/srv/newsblur/backup/'
|
|
filenames = [f for f in os.listdir(BACKUP_DIR) if '.tgz' in f]
|
|
for filename in filenames:
|
|
file_path = os.path.join(BACKUP_DIR, filename)
|
|
basename = os.path.basename(file_path)
|
|
key_prefix = 'backup_db_mongo/'
|
|
print("Uploading {0} to {1} on {2}".format(file_path, key_prefix, settings.S3_BACKUP_BUCKET))
|
|
sys.stdout.flush()
|
|
upload_rotate(file_path, settings.S3_BACKUP_BUCKET, key_prefix)
|
|
|
|
# shutil.rmtree(filename[:-4])
|
|
os.remove(file_path)
|
|
|
|
|
|
def upload_rotate(file_path, s3_bucket, s3_key_prefix):
|
|
'''
|
|
Upload file_path to s3 bucket with prefix
|
|
Ex. upload_rotate('/tmp/file-2015-01-01.tar.bz2', 'backups', 'foo.net/')
|
|
would upload file to bucket backups with key=foo.net/file-2015-01-01.tar.bz2
|
|
and then rotate all files starting with foo.net/file and with extension .tar.bz2
|
|
Timestamps need to be present between the file root and the extension and in the same format as strftime("%Y-%m-%d").
|
|
Ex file-2015-12-28.tar.bz2
|
|
'''
|
|
key = ''.join([s3_key_prefix, os.path.basename(file_path)])
|
|
print("Uploading {0} to {1}".format(file_path, key))
|
|
upload(file_path, s3_bucket, key)
|
|
|
|
file_root, file_ext = splitext(os.path.basename(file_path))
|
|
# strip timestamp from file_base
|
|
regex = '(?P<filename>.*)_(?P<year>[\d]+?)-(?P<month>[\d]+?)-(?P<day>[\d]+?)-(?P<hour>[\d]+?)-(?P<minute>[\d]+?)'
|
|
match = re.match(regex, file_root)
|
|
if not match:
|
|
raise Exception('File does not contain a timestamp')
|
|
key_prefix = ''.join([s3_key_prefix, match.group('filename')])
|
|
print('Rotating files on S3 with key prefix {0} and extension {1}'.format(key_prefix, file_ext))
|
|
rotate(key_prefix, file_ext, s3_bucket)
|
|
|
|
|
|
def rotate(key_prefix, key_ext, bucket_name, daily_backups=7, weekly_backups=4):
|
|
""" Delete old files we've uploaded to S3 according to grandfather, father, sun strategy """
|
|
|
|
session = boto3.Session(
|
|
aws_access_key_id=settings.S3_ACCESS_KEY,
|
|
aws_secret_access_key=settings.S3_SECRET
|
|
)
|
|
s3 = session.resource('s3')
|
|
bucket = s3.Bucket(bucket_name)
|
|
keys = bucket.objects.filter(Prefix=key_prefix)
|
|
|
|
regex = '{0}_(?P<year>[\d]+?)-(?P<month>[\d]+?)-(?P<day>[\d]+?)-(?P<hour>[\d]+?)-(?P<minute>[\d]+?){1}'.format(key_prefix, key_ext)
|
|
backups = []
|
|
|
|
for key in keys:
|
|
match = re.match(regex, str(key.key))
|
|
if not match:
|
|
continue
|
|
year = int(match.group('year'))
|
|
month = int(match.group('month'))
|
|
day = int(match.group('day'))
|
|
hour = int(match.group('hour'))
|
|
minute = int(match.group('minute'))
|
|
key_date = datetime(year, month, day, hour, minute)
|
|
backups[:0] = [key_date]
|
|
backups = sorted(backups, reverse=True)
|
|
|
|
if len(backups) > daily_backups+1 and backups[daily_backups] - backups[daily_backups+1] < timedelta(days=7):
|
|
key = bucket.Object("{0}{1}{2}".format(key_prefix,backups[daily_backups].strftime("_%Y-%m-%d-%H-%M"), key_ext))
|
|
logger.debug("[not] deleting daily {0}".format(key))
|
|
# key.delete()
|
|
del backups[daily_backups]
|
|
|
|
month_offset = daily_backups + weekly_backups
|
|
if len(backups) > month_offset+1 and backups[month_offset] - backups[month_offset+1] < timedelta(days=30):
|
|
key = bucket.Object("{0}{1}{2}".format(key_prefix,backups[month_offset].strftime("_%Y-%m-%d-%H-%M"), key_ext))
|
|
logger.debug("[not] deleting weekly {0}".format(key))
|
|
# key.delete()
|
|
del backups[month_offset]
|
|
|
|
|
|
def splitext( filename ):
|
|
""" Return the filename and extension according to the first dot in the filename.
|
|
This helps date stamping .tar.bz2 or .ext.gz files properly.
|
|
"""
|
|
index = filename.find('.')
|
|
if index == 0:
|
|
index = 1+filename[1:].find('.')
|
|
if index == -1:
|
|
return filename, ''
|
|
return filename[:index], filename[index:]
|
|
return os.path.splitext(filename)
|
|
|
|
def upload(source_path, bucketname, keyname, acl='private', guess_mimetype=True):
|
|
|
|
client = boto3.client('s3', aws_access_key_id=settings.S3_ACCESS_KEY, aws_secret_access_key=settings.S3_SECRET)
|
|
client.upload_file(source_path, bucketname, keyname, Callback=ProgressPercentage(source_path))
|
|
|
|
|
|
class ProgressPercentage(object):
|
|
|
|
def __init__(self, filename):
|
|
self._filename = filename
|
|
self._size = float(os.path.getsize(filename))
|
|
self._seen_so_far = 0
|
|
self._lock = threading.Lock()
|
|
|
|
def __call__(self, bytes_amount):
|
|
# To simplify, assume this is hooked up to a single filename
|
|
with self._lock:
|
|
self._seen_so_far += bytes_amount
|
|
percentage = (self._seen_so_far / self._size) * 100
|
|
sys.stdout.write(
|
|
"\r%s %s / %s (%.2f%%)" % (
|
|
self._filename, self._seen_so_far, self._size,
|
|
percentage))
|
|
sys.stdout.flush()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|