Myriad number of bug fixes to reduce the number of exception e-mails I get. Forbidden JSON requests now send 403s. Adding a URl no longer uses a partial URL saearch on feed_link (which would link usetoday.com -> usatoday.com/sports.xml). Also adding the model for FeedPage for mongo.

This commit is contained in:
Samuel Clay 2010-08-27 18:35:33 -04:00
parent f32bc396b4
commit 060f63f8a0
6 changed files with 67 additions and 25 deletions

View file

@ -56,9 +56,13 @@ class OPMLImporter(Importer):
if not hasattr(feed, 'title'):
setattr(feed, 'title', feed.htmlUrl)
feed_address = urlnorm.normalize(feed.xmlUrl)
if len(feed_address) > 255:
continue
feed_link = urlnorm.normalize(feed.htmlUrl)
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
continue
if len(feed_link) > Feed._meta.get_field('feed_link').max_length:
continue
if len(feed.title) > Feed._meta.get_field('feed_title').max_length:
feed.title = feed.title[:255]
logging.info(' ---> \t%s - %s - %s' % (feed.title, feed_link, feed_address,))
feed_data = dict(feed_address=feed_address, feed_link=feed_link, feed_title=feed.title)
# feeds.append(feed_data)
@ -123,7 +127,7 @@ class GoogleReaderImporter(Importer):
feed_link = urlnorm.normalize(feed_link)
feed_address = urlnorm.normalize(feed_address)
if len(feed_address) > 255:
if len(feed_address) > Feed._meta.get_field('feed_address').max_length:
return folders
# See if it exists as a duplicate first

View file

@ -11,6 +11,7 @@ from django.contrib.auth import login as login_user
from django.contrib.auth.models import User
from django.http import HttpResponse, HttpResponseRedirect, HttpResponseForbidden
from django.conf import settings
from mongoengine.queryset import OperationError
from apps.analyzer.models import MClassifierTitle, MClassifierAuthor, MClassifierFeed, MClassifierTag
from apps.analyzer.models import apply_classifier_titles, apply_classifier_feeds, apply_classifier_authors, apply_classifier_tags
from apps.analyzer.models import get_classifiers_for_user
@ -281,7 +282,8 @@ def load_single_feed(request):
return data
def load_feed_page(request):
feed = get_object_or_404(Feed, id=request.REQUEST.get('feed_id'))
feed_id = request.GET.get('feed_id')
feed = get_object_or_404(Feed, pk=request.GET['feed_id'])
feed_page, created = FeedPage.objects.get_or_create(feed=feed)
data = None
@ -343,7 +345,7 @@ def mark_story_as_read(request):
try:
m.save()
data.update({'code': 1})
except IntegrityError:
except OperationError:
data.update({'code': -1})
return data
@ -393,8 +395,7 @@ def add_url(request):
if duplicate_feed:
feed = [duplicate_feed[0].feed]
else:
feed = Feed.objects.filter(Q(feed_address=url)
| Q(feed_link__icontains=url))
feed = Feed.objects.filter(feed_address=url)
if feed:
feed = feed[0]
@ -408,7 +409,7 @@ def add_url(request):
if not feed:
code = -1
message = "That URL does not point to a website or RSS feed."
message = "That URL does not point to an RSS feed or a website that has an RSS feed."
else:
us, _ = UserSubscription.objects.get_or_create(
feed=feed,

View file

@ -202,9 +202,34 @@ class Feed(models.Model):
if not current_counts:
current_counts = []
map_f = """
function() {
var date = (this.story_date.getFullYear()) + "-" + (this.story_date.getMonth()+1);
emit(date, 1);
}
"""
reduce_f = """
function(key, values) {
var total = 0;
for (var i=0; i < values.length; i++) {
total += values[i];
}
return total;
}
"""
dates = []
res = MStory.objects(story_feed_id=Feed.objects.all()[5].pk).map_reduce(map_f, reduce_f)
for r in res:
dates.append((r.key, r.value))
min_year = datetime.datetime.now().year
if dates:
min_year = dates[0][:4]
print dates, min_year
# Count stories, aggregate by year and month
stories = Story.objects.filter(story_feed=self).extra(select={
stories = MStory.objects(story_feed=self).extra(select={
'year': "EXTRACT(year FROM story_date)",
'month': "EXTRACT(month from story_date)"
}).values('year', 'month')
@ -238,12 +263,13 @@ class Feed(models.Model):
total += d.get(key, 0)
month_count += 1
self.story_count_history = json.encode(months)
if not total:
self.average_stories_per_month = 0
else:
self.average_stories_per_month = total / month_count
self.save(lock)
print months
# self.story_count_history = json.encode(months)
# if not total:
# self.average_stories_per_month = 0
# else:
# self.average_stories_per_month = total / month_count
# self.save(lock)
def last_updated(self):
@ -651,6 +677,19 @@ class StoryAuthor(models.Model):
class FeedPage(models.Model):
feed = models.OneToOneField(Feed, related_name="feed_page")
page_data = StoryField(null=True, blank=True)
class MFeedPage(mongo.Document):
feed_id = mongo.IntField(primary_key=True)
page_data = mongo.StringField()
meta = {
'collection': 'feed_page',
'allow_inheritance': False,
}
def save(self, *args, **kwargs):
super(MFeedPage, self).save(*args, **kwargs)
class FeedXML(models.Model):
feed = models.OneToOneField(Feed, related_name="feed_xml")
@ -687,7 +726,7 @@ class Story(models.Model):
def save(self, *args, **kwargs):
if not self.story_guid_hash and self.story_guid:
self.story_guid_hash = hashlib.md5(self.story_guid).hexdigest()
if len(self.story_title) > 255:
if len(self.story_title) > self._meta.get_field('story_title').max_length:
self.story_title = self.story_title[:255]
super(Story, self).save(*args, **kwargs)

View file

@ -53,7 +53,7 @@ def exception_change_feed_address(request):
feed = get_object_or_404(Feed, pk=feed_id)
feed_address = request.POST['feed_address']
if not feed.has_feed_exception:
if not feed.has_feed_exception and not feed.has_page_exception:
logging.info(" ***********> [%s] Incorrect feed address change: %s" % (request.user, feed))
return HttpResponseForbidden()

View file

@ -269,8 +269,6 @@ class Dispatcher:
try:
ffeed = FetchFeed(feed, self.options)
ret_feed, fetched_feed = ffeed.fetch()
delta = datetime.datetime.now() - start_time
if fetched_feed and ret_feed == FEED_OK:
pfeed = ProcessFeed(feed, fetched_feed, db, self.options)
@ -304,8 +302,7 @@ class Dispatcher:
page_importer = PageImporter(feed.feed_link, feed)
page_importer.fetch_page()
if not delta:
delta = datetime.datetime.now() - start_time
delta = datetime.datetime.now() - start_time
if delta.seconds > SLOWFEED_WARNING:
comment = u' (SLOW FEED!)'
else:

View file

@ -5,9 +5,8 @@ from django.utils.encoding import force_unicode
from django.utils import simplejson as json
from decimal import Decimal
from django.core import serializers
from django.http import HttpResponse
from django.http import HttpResponse, HttpResponseForbidden
from django.core.mail import mail_admins
from django.utils.translation import ugettext as _
from django.db.models.query import QuerySet
import sys
@ -115,7 +114,9 @@ def json_view(func):
response = {'result': 'error',
'text': unicode(e)}
if isinstance(response, HttpResponseForbidden):
return response
json = json_encode(response)
return HttpResponse(json, mimetype='application/json')
return wrap
@ -123,7 +124,7 @@ def json_view(func):
def main():
test = {1: True, 2: u"string", 3: 30}
json_test = json_encode(test)
# print test, json_test
print test, json_test
if __name__ == '__main__':
main()