mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-04-13 09:42:01 +00:00
271 lines
8.3 KiB
Python
271 lines
8.3 KiB
Python
import sys
|
|
from pprint import pprint
|
|
|
|
import mongoengine
|
|
import pymongo
|
|
from django.conf import settings
|
|
from mongoengine.queryset import OperationError
|
|
|
|
from apps.analyzer.models import (
|
|
MClassifierAuthor,
|
|
MClassifierFeed,
|
|
MClassifierTag,
|
|
MClassifierTitle,
|
|
)
|
|
from apps.reader.models import MUserStory
|
|
from apps.rss_feeds.models import Feed, FeedIcon, MFeedIcon, MFeedPage, MStory
|
|
from utils import json_functions as json
|
|
|
|
MONGO_DB = settings.MONGO_DB
|
|
db = mongoengine.connect(MONGO_DB["NAME"], host=MONGO_DB["HOST"], port=MONGO_DB["PORT"])
|
|
|
|
|
|
def bootstrap_stories():
|
|
print("Mongo DB stories: %s" % MStory.objects().count())
|
|
# db.stories.drop()
|
|
print("Dropped! Mongo DB stories: %s" % MStory.objects().count())
|
|
|
|
print("Stories: %s" % Story.objects.all().count())
|
|
pprint(db.stories.index_information())
|
|
|
|
feeds = Feed.objects.all().order_by("-average_stories_per_month")
|
|
feed_count = feeds.count()
|
|
i = 0
|
|
for feed in feeds:
|
|
i += 1
|
|
print("%s/%s: %s (%s stories)" % (i, feed_count, feed, Story.objects.filter(story_feed=feed).count()))
|
|
sys.stdout.flush()
|
|
|
|
stories = list(Story.objects.filter(story_feed=feed).values())
|
|
for story in stories:
|
|
# story['story_tags'] = [tag.name for tag in Tag.objects.filter(story=story['id'])]
|
|
try:
|
|
story["story_tags"] = json.decode(story["story_tags"])
|
|
except:
|
|
continue
|
|
del story["id"]
|
|
del story["story_author_id"]
|
|
try:
|
|
MStory(**story).save()
|
|
except:
|
|
continue
|
|
|
|
print("\nMongo DB stories: %s" % MStory.objects().count())
|
|
|
|
|
|
def bootstrap_userstories():
|
|
print("Mongo DB userstories: %s" % MUserStory.objects().count())
|
|
# db.userstories.drop()
|
|
print("Dropped! Mongo DB userstories: %s" % MUserStory.objects().count())
|
|
|
|
print("UserStories: %s" % UserStory.objects.all().count())
|
|
pprint(db.userstories.index_information())
|
|
|
|
userstories = list(UserStory.objects.all().values())
|
|
for userstory in userstories:
|
|
try:
|
|
story = Story.objects.get(pk=userstory["story_id"])
|
|
except Story.DoesNotExist:
|
|
continue
|
|
try:
|
|
userstory["story"] = MStory.objects(
|
|
story_feed_id=story.story_feed.pk, story_guid=story.story_guid
|
|
)[0]
|
|
except:
|
|
print("!")
|
|
continue
|
|
print(".")
|
|
del userstory["id"]
|
|
del userstory["opinion"]
|
|
del userstory["story_id"]
|
|
try:
|
|
MUserStory(**userstory).save()
|
|
except:
|
|
print("\n\n!\n\n")
|
|
continue
|
|
|
|
print("\nMongo DB userstories: %s" % MUserStory.objects().count())
|
|
|
|
|
|
def bootstrap_classifiers():
|
|
for sql_classifier, mongo_classifier in (
|
|
(ClassifierTitle, MClassifierTitle),
|
|
(ClassifierAuthor, MClassifierAuthor),
|
|
(ClassifierFeed, MClassifierFeed),
|
|
(ClassifierTag, MClassifierTag),
|
|
):
|
|
collection = mongo_classifier.meta["collection"]
|
|
print("Mongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count()))
|
|
# db[collection].drop()
|
|
print("Dropped! Mongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count()))
|
|
|
|
print("%s: %s" % (sql_classifier._meta.object_name, sql_classifier.objects.all().count()))
|
|
pprint(db[collection].index_information())
|
|
|
|
for userclassifier in list(sql_classifier.objects.all().values()):
|
|
del userclassifier["id"]
|
|
if sql_classifier._meta.object_name == "ClassifierAuthor":
|
|
author = StoryAuthor.objects.get(pk=userclassifier["author_id"])
|
|
userclassifier["author"] = author.author_name
|
|
del userclassifier["author_id"]
|
|
if sql_classifier._meta.object_name == "ClassifierTag":
|
|
tag = Tag.objects.get(pk=userclassifier["tag_id"])
|
|
userclassifier["tag"] = tag.name
|
|
del userclassifier["tag_id"]
|
|
print(".")
|
|
try:
|
|
mongo_classifier(**userclassifier).save()
|
|
except:
|
|
print("\n\n!\n\n")
|
|
continue
|
|
|
|
print("\nMongo DB classifiers: %s - %s" % (collection, mongo_classifier.objects().count()))
|
|
|
|
|
|
def bootstrap_feedpages():
|
|
print("Mongo DB feed_pages: %s" % MFeedPage.objects().count())
|
|
# db.feed_pages.drop()
|
|
print("Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count())
|
|
|
|
print("FeedPages: %s" % FeedPage.objects.count())
|
|
pprint(db.feed_pages.index_information())
|
|
|
|
feeds = Feed.objects.all().order_by("-average_stories_per_month")
|
|
feed_count = feeds.count()
|
|
i = 0
|
|
for feed in feeds:
|
|
i += 1
|
|
print(
|
|
"%s/%s: %s"
|
|
% (
|
|
i,
|
|
feed_count,
|
|
feed,
|
|
)
|
|
)
|
|
sys.stdout.flush()
|
|
|
|
if not MFeedPage.objects(feed_id=feed.pk):
|
|
feed_page = list(FeedPage.objects.filter(feed=feed).values())
|
|
if feed_page:
|
|
del feed_page[0]["id"]
|
|
feed_page[0]["feed_id"] = feed.pk
|
|
try:
|
|
MFeedPage(**feed_page[0]).save()
|
|
except:
|
|
print("\n\n!\n\n")
|
|
continue
|
|
|
|
print("\nMongo DB feed_pages: %s" % MFeedPage.objects().count())
|
|
|
|
|
|
def bootstrap_feedicons():
|
|
print("Mongo DB feed_icons: %s" % MFeedIcon.objects().count())
|
|
db.feed_icons.drop()
|
|
print("Dropped! Mongo DB feed_icons: %s" % MFeedIcon.objects().count())
|
|
|
|
print("FeedIcons: %s" % FeedIcon.objects.count())
|
|
pprint(db.feed_icons.index_information())
|
|
|
|
feeds = Feed.objects.all().order_by("-average_stories_per_month")
|
|
feed_count = feeds.count()
|
|
i = 0
|
|
for feed in feeds:
|
|
i += 1
|
|
print(
|
|
"%s/%s: %s"
|
|
% (
|
|
i,
|
|
feed_count,
|
|
feed,
|
|
)
|
|
)
|
|
sys.stdout.flush()
|
|
|
|
if not MFeedIcon.objects(feed_id=feed.pk):
|
|
feed_icon = list(FeedIcon.objects.filter(feed=feed).values())
|
|
if feed_icon:
|
|
try:
|
|
MFeedIcon(**feed_icon[0]).save()
|
|
except:
|
|
print("\n\n!\n\n")
|
|
continue
|
|
|
|
print("\nMongo DB feed_icons: %s" % MFeedIcon.objects().count())
|
|
|
|
|
|
def compress_stories():
|
|
count = MStory.objects().count()
|
|
print("Mongo DB stories: %s" % count)
|
|
p = 0.0
|
|
i = 0
|
|
|
|
feeds = Feed.objects.all().order_by("-average_stories_per_month")
|
|
feed_count = feeds.count()
|
|
f = 0
|
|
for feed in feeds:
|
|
f += 1
|
|
print(
|
|
"%s/%s: %s"
|
|
% (
|
|
f,
|
|
feed_count,
|
|
feed,
|
|
)
|
|
)
|
|
sys.stdout.flush()
|
|
|
|
for story in MStory.objects(story_feed_id=feed.pk):
|
|
i += 1.0
|
|
if round(i / count * 100) != p:
|
|
p = round(i / count * 100)
|
|
print("%s%%" % p)
|
|
story.save()
|
|
|
|
|
|
def reindex_stories():
|
|
db = pymongo.Connection().newsblur
|
|
count = MStory.objects().count()
|
|
print("Mongo DB stories: %s" % count)
|
|
p = 0.0
|
|
i = 0
|
|
|
|
feeds = Feed.objects.all().order_by("-average_stories_per_month")
|
|
feed_count = feeds.count()
|
|
f = 0
|
|
for feed in feeds:
|
|
f += 1
|
|
print(
|
|
"%s/%s: %s"
|
|
% (
|
|
f,
|
|
feed_count,
|
|
feed,
|
|
)
|
|
)
|
|
sys.stdout.flush()
|
|
for story in MStory.objects(story_feed_id=feed.pk):
|
|
i += 1.0
|
|
if round(i / count * 100) != p:
|
|
p = round(i / count * 100)
|
|
print("%s%%" % p)
|
|
if isinstance(story.id, str):
|
|
story.story_guid = story.id
|
|
story.id = pymongo.objectid.ObjectId()
|
|
try:
|
|
story.save()
|
|
except OperationError as e:
|
|
print(" ***> OperationError: %s" % e)
|
|
except e:
|
|
print(" ***> Unknown Error: %s" % e)
|
|
db.stories.remove({"_id": story.story_guid})
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# bootstrap_stories()
|
|
# bootstrap_userstories()
|
|
# bootstrap_classifiers()
|
|
# bootstrap_feedpages()
|
|
# compress_stories()
|
|
# reindex_stories()
|
|
bootstrap_feedicons()
|