From 8d516fc493a6e652913736a7ed08ac7ee30151df Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Mon, 29 Apr 2013 16:07:08 -0700 Subject: [PATCH] Adding story_hash to starred stories, user stories, and shared stories. --- apps/reader/models.py | 5 +- .../migrations/0069_story_hash_fields.py | 100 ++++++++++++++++++ apps/rss_feeds/models.py | 7 ++ apps/social/models.py | 6 +- 4 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 apps/rss_feeds/migrations/0069_story_hash_fields.py diff --git a/apps/reader/models.py b/apps/reader/models.py index 548501149..8814e15ca 100644 --- a/apps/reader/models.py +++ b/apps/reader/models.py @@ -591,6 +591,7 @@ class MUserStory(mongo.Document): feed_id = mongo.IntField() read_date = mongo.DateTimeField() story_id = mongo.StringField() + story_hash = mongo.StringField() story_date = mongo.DateTimeField() story = mongo.ReferenceField(MStory, dbref=True) found_story = mongo.GenericReferenceField() @@ -610,6 +611,8 @@ class MUserStory(mongo.Document): } def save(self, *args, **kwargs): + self.story_hash = self.feed_guid_hash + self.sync_redis() super(MUserStory, self).save(*args, **kwargs) @@ -625,7 +628,7 @@ class MUserStory(mongo.Document): @property def feed_guid_hash(self): - return "%s:%s" % (self.feed_id, self.guid_hash) + return "%s:%s" % (self.feed_id or "0", self.guid_hash) @classmethod def delete_old_stories(cls, feed_id): diff --git a/apps/rss_feeds/migrations/0069_story_hash_fields.py b/apps/rss_feeds/migrations/0069_story_hash_fields.py new file mode 100644 index 000000000..f692a226f --- /dev/null +++ b/apps/rss_feeds/migrations/0069_story_hash_fields.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import DataMigration +from django.db import models + +class Migration(DataMigration): + + def forwards(self, orm): + from apps.rss_feeds.models import MStarredStory + from apps.social.models import MSharedStory + + GROUPS = 1000 + starred_count = MStarredStory.objects.count() + print " ---> Saving %s starred stories..." % starred_count + group_size = starred_count/GROUPS + for group in range(GROUPS): + offset = group_size*group + print " ---> Group offset: %s/%s-%s" % (group, offset, group_size*(group+1)) + stories = MStarredStory.objects.order_by('id')[offset:group_size*(group+1)] + for i, story in enumerate(stories): + story.save() + + shared_count = MSharedStory.objects.count() + print " ---> Saving %s shared stories..." % shared_count + group_size = shared_count/GROUPS + for group in range(GROUPS): + offset = group_size*group + print " ---> Group offset: %s/%s-%s" % (group, offset, group_size*(group+1)) + stories = MSharedStory.objects.order_by('id')[offset:group_size*(group+1)] + for i, story in enumerate(stories): + story.save() + + + + def backwards(self, orm): + "Write your backwards methods here." + + models = { + u'rss_feeds.duplicatefeed': { + 'Meta': {'object_name': 'DuplicateFeed'}, + 'duplicate_address': ('django.db.models.fields.CharField', [], {'max_length': '764', 'db_index': 'True'}), + 'duplicate_feed_id': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'db_index': 'True'}), + 'duplicate_link': ('django.db.models.fields.CharField', [], {'max_length': '764', 'null': 'True', 'db_index': 'True'}), + 'feed': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'duplicate_addresses'", 'to': u"orm['rss_feeds.Feed']"}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + u'rss_feeds.feed': { + 'Meta': {'ordering': "['feed_title']", 'object_name': 'Feed', 'db_table': "'feeds'"}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'db_index': 'True'}), + 'active_premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'active_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1', 'db_index': 'True'}), + 'average_stories_per_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'branch_from_feed': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['rss_feeds.Feed']", 'null': 'True', 'blank': 'True'}), + 'creation': ('django.db.models.fields.DateField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'days_to_trim': ('django.db.models.fields.IntegerField', [], {'default': '90'}), + 'errors_since_good': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'etag': ('django.db.models.fields.CharField', [], {'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'exception_code': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'favicon_color': ('django.db.models.fields.CharField', [], {'max_length': '6', 'null': 'True', 'blank': 'True'}), + 'favicon_not_found': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'feed_address': ('django.db.models.fields.URLField', [], {'max_length': '764', 'db_index': 'True'}), + 'feed_address_locked': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'feed_link': ('django.db.models.fields.URLField', [], {'default': "''", 'max_length': '1000', 'null': 'True', 'blank': 'True'}), + 'feed_link_locked': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'feed_title': ('django.db.models.fields.CharField', [], {'default': "'[Untitled]'", 'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'fetched_once': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'has_feed_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'has_page': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'has_page_exception': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'hash_address_and_link': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_push': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'known_good': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_load_time': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_story_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'last_update': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'min_to_decay': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'next_scheduled_update': ('django.db.models.fields.DateTimeField', [], {}), + 'num_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'premium_subscribers': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 's3_icon': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 's3_page': ('django.db.models.fields.NullBooleanField', [], {'default': 'False', 'null': 'True', 'blank': 'True'}), + 'stories_last_month': ('django.db.models.fields.IntegerField', [], {'default': '0'}) + }, + u'rss_feeds.feeddata': { + 'Meta': {'object_name': 'FeedData'}, + 'feed': ('utils.fields.AutoOneToOneField', [], {'related_name': "'data'", 'unique': 'True', 'to': u"orm['rss_feeds.Feed']"}), + 'feed_classifier_counts': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'feed_tagline': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'popular_authors': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'popular_tags': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'story_count_history': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}) + } + } + + complete_apps = ['rss_feeds'] + symmetrical = True diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 18df93c21..20985b3fe 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -1720,6 +1720,7 @@ class MStarredStory(mongo.Document): story_author_name = mongo.StringField() story_permalink = mongo.StringField() story_guid = mongo.StringField() + story_hash = mongo.StringField() story_tags = mongo.ListField(mongo.StringField(max_length=250)) meta = { @@ -1737,6 +1738,8 @@ class MStarredStory(mongo.Document): if self.story_original_content: self.story_original_content_z = zlib.compress(self.story_original_content) self.story_original_content = None + self.story_hash = self.feed_guid_hash + super(MStarredStory, self).save(*args, **kwargs) # self.index_for_search() @@ -1755,6 +1758,10 @@ class MStarredStory(mongo.Document): def guid_hash(self): return hashlib.sha1(self.story_guid).hexdigest()[:6] + @property + def feed_guid_hash(self): + return "%s:%s" % (self.story_feed_id or "0", self.guid_hash) + def fetch_original_text(self, force=False, request=None): original_text_z = self.original_text_z diff --git a/apps/social/models.py b/apps/social/models.py index 3a76c1bf0..440d436c8 100644 --- a/apps/social/models.py +++ b/apps/social/models.py @@ -1189,6 +1189,7 @@ class MSharedStory(mongo.Document): replies = mongo.ListField(mongo.EmbeddedDocumentField(MCommentReply)) source_user_id = mongo.IntField() story_db_id = mongo.ObjectIdField() + story_hash = mongo.StringField() story_feed_id = mongo.IntField() story_date = mongo.DateTimeField() story_title = mongo.StringField(max_length=1024) @@ -1241,7 +1242,7 @@ class MSharedStory(mongo.Document): @property def feed_guid_hash(self): - return "%s:%s" % (self.story_feed_id, self.guid_hash) + return "%s:%s" % (self.story_feed_id or "0", self.guid_hash) def to_json(self): return { @@ -1265,7 +1266,8 @@ class MSharedStory(mongo.Document): self.story_guid_hash = hashlib.sha1(self.story_guid).hexdigest()[:6] self.story_title = strip_tags(self.story_title) - + self.story_hash = self.feed_guid_hash + self.comments = linkify(strip_tags(self.comments)) for reply in self.replies: reply.comments = linkify(strip_tags(reply.comments))