NewsBlur/apps/rss_feeds/migrations/bootstrap_mongo.py

109 lines
4.4 KiB
Python
Raw Normal View History

from pprint import pprint
from django.conf import settings
from apps.reader.models import MUserStory, UserStory
from apps.rss_feeds.models import Feed, Story, MStory, StoryAuthor, Tag
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
from apps.analyzer.models import ClassifierTitle, ClassifierAuthor, ClassifierFeed, ClassifierTag
import mongoengine
import sys
from utils import json
MONGO_DB = settings.MONGO_DB
db = mongoengine.connect(MONGO_DB['NAME'], host=MONGO_DB['HOST'], port=MONGO_DB['PORT'])
def bootstrap_stories():
print "Mongo DB stories: %s" % MStory.objects().count()
db.stories.drop()
print "Dropped! Mongo DB stories: %s" % MStory.objects().count()
print "Stories: %s" % Story.objects.all().count()
pprint(db.stories.index_information())
feeds = Feed.objects.all().order_by('-average_stories_per_month')
for feed in feeds:
print "%-5s: %s" % (Story.objects.select_related('story_author', 'tags').filter(story_feed=feed).count(),
feed)
sys.stdout.flush()
stories = Story.objects.filter(story_feed=feed, average_stories_per_month__lte=55).values()
for story in stories:
print '.',
# story['story_tags'] = [tag.name for tag in Tag.objects.filter(story=story['id'])]
try:
print '\n\n!\n\n'
story['story_tags'] = json.decode(story['story_tags'])
except:
continue
del story['id']
del story['story_author_id']
2010-08-22 20:03:25 -04:00
try:
MStory(**story).save()
except:
2010-08-22 20:05:33 -04:00
print '\n\n!\n\n'
2010-08-22 20:03:25 -04:00
continue
print "\nMongo DB stories: %s" % MStory.objects().count()
def bootstrap_userstories():
print "Mongo DB userstories: %s" % MUserStory.objects().count()
db.userstories.drop()
print "Dropped! Mongo DB userstories: %s" % MUserStory.objects().count()
print "UserStories: %s" % UserStory.objects.all().count()
pprint(db.userstories.index_information())
userstories = UserStory.objects.all().values()
for userstory in userstories:
try:
story = Story.objects.get(pk=userstory['story_id'])
except Story.DoesNotExist:
continue
userstory['story'] = MStory.objects(story_feed_id=story.story_feed.pk, story_guid=story.story_guid)[0]
print '.',
del userstory['id']
del userstory['opinion']
del userstory['story_id']
2010-08-22 20:05:33 -04:00
try:
MUserStory(**userstory).save()
except:
print '\n\n!\n\n'
continue
print "\nMongo DB userstories: %s" % MUserStory.objects().count()
def bootstrap_classifiers():
for sql_classifier, mongo_classifier in ((ClassifierTitle, MClassifierTitle),
(ClassifierAuthor, MClassifierAuthor),
(ClassifierFeed, MClassifierFeed),
(ClassifierTag, MClassifierTag)):
collection = mongo_classifier.meta['collection']
print "Mongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
db[collection].drop()
print "Dropped! Mongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
print "%s: %s" % (sql_classifier._meta.object_name, sql_classifier.objects.all().count())
pprint(db[collection].index_information())
for userclassifier in sql_classifier.objects.all().values():
del userclassifier['id']
if sql_classifier._meta.object_name == 'ClassifierAuthor':
author = StoryAuthor.objects.get(pk=userclassifier['author_id'])
userclassifier['author'] = author.author_name
del userclassifier['author_id']
if sql_classifier._meta.object_name == 'ClassifierTag':
tag = Tag.objects.get(pk=userclassifier['tag_id'])
userclassifier['tag'] = tag.name
del userclassifier['tag_id']
print '.',
2010-08-22 20:05:33 -04:00
try:
mongo_classifier(**userclassifier).save()
except:
print '\n\n!\n\n'
continue
print "\nMongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count())
if __name__ == '__main__':
2010-08-22 20:05:33 -04:00
bootstrap_stories()
bootstrap_userstories()
bootstrap_classifiers()