mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-09-18 21:50:56 +00:00
Compressing stories. Also fixing compression of feed_pages bootstrap.
This commit is contained in:
parent
e26ee9dbcc
commit
c9d06f9358
3 changed files with 38 additions and 10 deletions
|
@ -332,10 +332,10 @@ class Feed(models.Model):
|
|||
# logging.debug('- Updated story in feed (%s - %s): %s / %s' % (self.feed_title, story.get('title'), len(existing_story.story_content), len(story_content)))
|
||||
|
||||
original_content = None
|
||||
if existing_story.get('story_original_content'):
|
||||
original_content = existing_story.get('story_original_content')
|
||||
if existing_story.get('story_original_content_z'):
|
||||
original_content = zlib.decompress(existing_story.get('story_original_content_z'))
|
||||
else:
|
||||
original_content = existing_story.get('story_content')
|
||||
original_content = zlib.decompress(existing_story.get('story_content_z'))
|
||||
# print 'Type: %s %s' % (type(original_content), type(story_content))
|
||||
if len(story_content) > 10:
|
||||
diff = HTMLDiff(unicode(original_content), story_content)
|
||||
|
@ -462,7 +462,7 @@ class Feed(models.Model):
|
|||
story['story_date'] = story_db.story_date
|
||||
story['story_authors'] = story_db.story_author_name
|
||||
story['story_title'] = story_db.story_title
|
||||
story['story_content'] = story_db.story_content
|
||||
story['story_content'] = story_db.story_content_z and zlib.decompress(story_db.story_content_z)
|
||||
story['story_permalink'] = story_db.story_permalink
|
||||
story['story_feed_id'] = self.pk
|
||||
story['id'] = story_db.id
|
||||
|
@ -720,7 +720,9 @@ class MStory(mongo.Document):
|
|||
story_date = mongo.DateTimeField()
|
||||
story_title = mongo.StringField(max_length=1024)
|
||||
story_content = mongo.StringField()
|
||||
story_content_z = mongo.BinaryField()
|
||||
story_original_content = mongo.StringField()
|
||||
story_original_content_z = mongo.BinaryField()
|
||||
story_content_type = mongo.StringField(max_length=255)
|
||||
story_author_name = mongo.StringField()
|
||||
story_permalink = mongo.StringField()
|
||||
|
@ -734,6 +736,15 @@ class MStory(mongo.Document):
|
|||
'ordering': ['-story_date'],
|
||||
'allow_inheritance': False,
|
||||
}
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if self.story_content:
|
||||
self.story_content_z = zlib.compress(self.story_content)
|
||||
self.story_content = None
|
||||
if self.story_original_content:
|
||||
self.story_original_content_z = zlib.compress(self.story_original_content)
|
||||
self.story_original_content = None
|
||||
super(MStory, self).save(*args, **kwargs)
|
||||
|
||||
class FeedUpdateHistory(models.Model):
|
||||
fetch_date = models.DateTimeField(default=datetime.datetime.now)
|
||||
|
|
|
@ -109,13 +109,13 @@ def bootstrap_classifiers():
|
|||
|
||||
def bootstrap_feedpages():
|
||||
print "Mongo DB feed_pages: %s" % MFeedPage.objects().count()
|
||||
db.feed_pages.drop()
|
||||
# db.feed_pages.drop()
|
||||
print "Dropped! Mongo DB feed_pages: %s" % MFeedPage.objects().count()
|
||||
|
||||
print "FeedPages: %s" % MFeedPage.objects().count()
|
||||
pprint(db.feed_pages.index_information())
|
||||
|
||||
feeds = Feed.objects.all().order_by('-average_stories_per_month')
|
||||
feeds = Feed.objects.filter(average_stories_per_month=0).order_by('-average_stories_per_month')
|
||||
feed_count = feeds.count()
|
||||
i = 0
|
||||
for feed in feeds:
|
||||
|
@ -127,13 +127,31 @@ def bootstrap_feedpages():
|
|||
if feed_page:
|
||||
del feed_page[0]['id']
|
||||
feed_page[0]['feed_id'] = feed.pk
|
||||
MFeedPage(**feed_page[0]).save()
|
||||
try:
|
||||
MFeedPage(**feed_page[0]).save()
|
||||
except:
|
||||
print '\n\n!\n\n'
|
||||
continue
|
||||
|
||||
|
||||
print "\nMongo DB feed_pages: %s" % MFeedPage.objects().count()
|
||||
|
||||
def compress_stories():
|
||||
count = MStory.objects().count()
|
||||
print "Mongo DB stories: %s" % count
|
||||
p = 0.0
|
||||
i = 0
|
||||
for story in MStory.objects():
|
||||
i += 1.0
|
||||
if round(i / count * 100) != p:
|
||||
p = round(i / count * 100)
|
||||
print '%s%%' % p
|
||||
story.save()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# bootstrap_stories()
|
||||
# bootstrap_userstories()
|
||||
# bootstrap_classifiers()
|
||||
bootstrap_feedpages()
|
||||
bootstrap_feedpages()
|
||||
compress_stories()
|
|
@ -93,8 +93,7 @@ class ProcessFeed:
|
|||
ENTRY_SAME:0,
|
||||
ENTRY_ERR:0}
|
||||
|
||||
logging.debug(u' ---> [%d] Processing %s' % (self.feed.id,
|
||||
self.feed.feed_title))
|
||||
# logging.debug(u' ---> [%d] Processing %s' % (self.feed.id, self.feed.feed_title))
|
||||
|
||||
if hasattr(self.fpf, 'status'):
|
||||
if self.options['verbose']:
|
||||
|
|
Loading…
Add table
Reference in a new issue