NewsBlur/utils/backups/copy_mongo_serialized.py
2020-06-19 02:29:40 -04:00

63 lines
2 KiB
Python

# Use this script to copy the contents of MongoDB from one server
# to another using only pymongo. This circumvents the mongod --repair
# option, which can fail.
import sys
import pymongo
import datetime
from apps.rss_feeds.models import Feed
collections = [
"classifier_author",
"classifier_feed",
"classifier_tag",
"classifier_title",
"feed_icons",
"feed_pages",
"feedback",
"starred_stories",
"statistics",
"userstories",
# "stories",
# "feed_fetch_history",
# "page_fetch_history",
]
db01 = pymongo.Connection('db01:27018')
db02 = pymongo.Connection('db02:27018')
db03 = pymongo.Connection('db03:27017')
for collection in collections:
i = 0
latest_item_id = 0
total = db03.newsblur[collection].count()
items = db03.newsblur[collection].find(sort=[('_id', pymongo.ASCENDING)])
for item in items:
if item.get('_id') != latest_item_id:
latest_item_id = item['_id']
print(" ---> Inserted %s items in %s (at: %s) (%2s%%)" % (
i, collection, item['_id'], (round(i/float(total), 4)*100)
))
sys.stdout.flush()
db02.newsblur[collection].insert(item)
i += 1
# Stories
feeds = Feed.objects.all().only('id', 'average_stories_per_month').order_by('-average_stories_per_month')
feed_count = feeds.count()
total_inserted = 0
for f, feed in enumerate(feeds):
feed_inserted = 0
latest_feed_id = feed.id
latest_story_date = datetime.datetime(2011, 12, 18, 18, 50, 59, 614000)
items = db03.newsblur.stories.find({'story_feed_id': feed.id, 'story_date': {'$gte': latest_story_date}})
for item in items:
db02.newsblur.stories.insert(item)
total_inserted += 1
feed_inserted += 1
if feed_inserted:
print(" ---> Inserted %s items (total: %s) in stories (at: %s -- %s/month) (%2s%%)" % (
feed_inserted, total_inserted, latest_feed_id, feed.average_stories_per_month, (round(f/float(feed_count), 4)*100)
))
sys.stdout.flush()