From 7e52fc37efef9cdc98671bcb82e5c6408f54a65c Mon Sep 17 00:00:00 2001 From: Samuel Clay Date: Mon, 28 Jan 2013 15:43:00 -0800 Subject: [PATCH] Fixing assortment of small bugs. --- apps/analyzer/views.py | 10 +++++----- apps/rss_feeds/models.py | 15 ++++++++++++++- apps/rss_feeds/text_importer.py | 17 +++++++++++------ apps/social/models.py | 14 ++++++++++---- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/apps/analyzer/views.py b/apps/analyzer/views.py index fb309d047..664389b50 100644 --- a/apps/analyzer/views.py +++ b/apps/analyzer/views.py @@ -3,7 +3,7 @@ from utils import log as logging from django.shortcuts import get_object_or_404 from django.views.decorators.http import require_POST from django.conf import settings -# from mongoengine.queryset import OperationError +from mongoengine.queryset import NotUniqueError from apps.rss_feeds.models import Feed from apps.reader.models import UserSubscription from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag @@ -80,10 +80,10 @@ def save_classifier(request): if content_type == 'feed': if not post_content.startswith('social:'): classifier_dict['feed_id'] = post_content - # try: - classifier, created = ClassifierCls.objects.get_or_create(**classifier_dict) - # except OperationError: - # continue + try: + classifier, created = ClassifierCls.objects.get_or_create(**classifier_dict) + except NotUniqueError: + continue if score == 0: classifier.delete() elif classifier.score != score: diff --git a/apps/rss_feeds/models.py b/apps/rss_feeds/models.py index 2d1d879a3..6f4cff10c 100644 --- a/apps/rss_feeds/models.py +++ b/apps/rss_feeds/models.py @@ -1573,6 +1573,7 @@ class MStarredStory(mongo.Document): story_content_z = mongo.BinaryField() story_original_content = mongo.StringField() story_original_content_z = mongo.BinaryField() + original_text_z = mongo.BinaryField() story_content_type = mongo.StringField(max_length=255) story_author_name = mongo.StringField() story_permalink = mongo.StringField() @@ -1611,7 +1612,19 @@ class MStarredStory(mongo.Document): @property def guid_hash(self): return hashlib.sha1(self.story_guid).hexdigest()[:6] - + + def fetch_original_text(self, force=False, request=None): + original_text_z = self.original_text_z + + if not original_text_z or force: + ti = TextImporter(self, request=request) + original_text = ti.fetch() + else: + logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.") + original_text = zlib.decompress(original_text_z) + + return original_text + class MFeedFetchHistory(mongo.Document): feed_id = mongo.IntField() diff --git a/apps/rss_feeds/text_importer.py b/apps/rss_feeds/text_importer.py index 8c33b31eb..727afde2d 100644 --- a/apps/rss_feeds/text_importer.py +++ b/apps/rss_feeds/text_importer.py @@ -23,13 +23,18 @@ class TextImporter: 'Connection': 'close', } - def fetch(self): - html = requests.get(self.story.story_permalink, headers=self.headers) - original_text_doc = readability.Document(html.text, url=html.url, debug=settings.DEBUG) - content = original_text_doc.summary(html_partial=True) + def fetch(self, skip_save=False): + try: + html = requests.get(self.story.story_permalink, headers=self.headers) + original_text_doc = readability.Document(html.text, url=html.url, debug=settings.DEBUG) + content = original_text_doc.summary(html_partial=True) + except: + content = None + if content: - self.story.original_text_z = zlib.compress(content) - self.story.save() + if not skip_save: + self.story.original_text_z = zlib.compress(content) + self.story.save() logging.user(self.request, "~SN~FYFetched ~FGoriginal text~FY: now ~SB%s bytes~SN vs. was ~SB%s bytes" % ( len(unicode(content)), self.story.story_content_z and len(zlib.decompress(self.story.story_content_z)) diff --git a/apps/social/models.py b/apps/social/models.py index d42b5c450..f9597f6b8 100644 --- a/apps/social/models.py +++ b/apps/social/models.py @@ -1899,11 +1899,17 @@ class MSharedStory(mongo.Document): return image_sizes - def fetch_original_text(self): - ti = TextImporter(self) - original_text_doc = ti.fetch() + def fetch_original_text(self, force=False, request=None): + original_text_z = self.original_text_z - return original_text_doc + if not original_text_z or force: + ti = TextImporter(self, request=request) + original_text = ti.fetch() + else: + logging.user(request, "~FYFetching ~FGoriginal~FY story text, ~SBfound.") + original_text = zlib.decompress(original_text_z) + + return original_text class MSocialServices(mongo.Document): user_id = mongo.IntField()